diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index dc7c629fa228e244a4d0054496048e9c20dd61da..7d2f0a1f7404c61eeaaa31e56cfa23eb070a47fd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -102,7 +102,24 @@ private static void addDeprecatedKeys() { YarnConfiguration.NM_PREFIX + "log-container-debug-info.enabled"; public static final boolean DEFAULT_NM_LOG_CONTAINER_DEBUG_INFO = false; - + + /** Command to run on container killed event. + * This can be used to collect and save debug information to find the root + * cause of the issue. + * Example: jmap -heap {{PID}} 1>{{LOG_DIR}}/heap.out + */ + public static final String NM_SAVE_DEBUG_INFO_COMMAND = + YarnConfiguration.NM_PREFIX + "save-debug-info.command"; + + public static final String DEFAULT_NM_SAVE_DEBUG_INFO_COMMAND = + ""; + + /** Timeout to save container debug information on container killed event. */ + public static final String NM_SAVE_DEBUG_INFO_TIMEOUT_SEC = + YarnConfiguration.NM_PREFIX + "save-debug-info.timeout-sec"; + + public static final Integer DEFAULT_NM_SAVE_DEBUG_INFO_TIMEOUT_SEC = 60; + //////////////////////////////// // IPC Configs //////////////////////////////// diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index c8c4edd7713afcef4c28b9d9b3253403c389f23f..9ddb21d2af6c6fafb60a8a02a90b46765590fe77 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1027,6 +1027,36 @@ + + This property specifies a command to run, if the container is killed due + to resource constraints. + This can be used to collect and save debug information to find the root + cause of the issue. If Linux container executor is enabled, the script + has to run as root or the user, who executes the container. It has to + have the SUID bit set in these cases to be called by the Yarn user. + See yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user + for details. + Examples: + jmap -heap {{PID}} 2>{{LOG_DIR}}/heap.err 1>{{LOG_DIR}}/heap.out + gcore -o {{LOG_DIR}}/heap.core {{PID}} 2>{{LOG_DIR}}/heap.err + 1>{{LOG_DIR}}/heap.out + run_gcore_with_suid.sh {{LOG_DIR}} {{PID}} + + yarn.nodemanager.save-debug-info.command + + + + + + This property tells node manager to kill the save debug information + command, if it exceeds this timeout. The unit is in seconds. + The command is specified by yarn.nodemanager.save-debug-info.command. + + yarn.nodemanager.save-debug-info.timeout-sec + 60 + + + Keytab for NM. yarn.nodemanager.keytab /etc/krb5.keytab diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index 8004f3393aab23953abf303c38216201a8fb83bd..4c1ca99768bc83bdd1d4dcbdb48c0e0d495da410 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -91,5 +91,7 @@ void sendLaunchEvent(); + void sendCleanupEvent(); + void sendKillEvent(int exitStatus, String description); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 4a6be32976ff243056da2fae682c44f46b451567..931b8d452695c9d8502193ec42a5d1e3434c836c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -721,6 +721,14 @@ public void sendLaunchEvent() { } @SuppressWarnings("unchecked") // dispatcher not typed + @Override + public void sendCleanupEvent() { + dispatcher.getEventHandler().handle( + new ContainersLauncherEvent(this, + ContainersLauncherEventType.CLEANUP_CONTAINER)); + } + + @SuppressWarnings("unchecked") // dispatcher not typed private void sendScheduleEvent() { dispatcher.getEventHandler().handle( new ContainerSchedulerEvent(this, @@ -1461,12 +1469,22 @@ public void transition(ContainerImpl container, ContainerEvent event) { @SuppressWarnings("unchecked") @Override public void transition(ContainerImpl container, ContainerEvent event) { + ContainerKillEvent killEvent = (ContainerKillEvent)event; + boolean isDebugInfoCollectionSupported = + killEvent.getContainerExitStatus() == ContainerExitStatus + .KILLED_EXCEEDED_VMEM || + killEvent.getContainerExitStatus() == ContainerExitStatus + .KILLED_EXCEEDED_PMEM; + ContainersLauncherEventType nextEvent = + isDebugInfoCollectionSupported ? + ContainersLauncherEventType.DEBUG_AND_CLEANUP_CONTAINER : + ContainersLauncherEventType.CLEANUP_CONTAINER; + // Kill the process/process-grp container.setIsReInitializing(false); container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, - ContainersLauncherEventType.CLEANUP_CONTAINER)); - ContainerKillEvent killEvent = (ContainerKillEvent) event; + nextEvent)); container.addDiagnostics(killEvent.getDiagnostic(), "\n"); container.exitCode = killEvent.getContainerExitStatus(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 823457ffc59b714202700cf3f0ec6cc8ea549450..158df59c42fc7161a075cf90774d46219a40b09b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -34,6 +34,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; @@ -651,6 +652,77 @@ public void cleanupContainer() throws IOException { } /** + * Clean up container saving debug information. + * Collects debug information from the container process like a heap dump. + * It cleans up the container after this step. + */ + void cleanupSavingDebugInfo() { + // get process id from pid file or shell + String processId = null; + if (pidFilePath != null) { + try { + processId = getContainerPid(pidFilePath); + } catch (Exception e) { + LOG.warn("Could not collect PID information for debugging ", e); + } + } + + // Check if debug info collection is enabled + String command = null; + Integer timeoutSec = null; + if (conf != null) { + command = conf.get(YarnConfiguration.NM_SAVE_DEBUG_INFO_COMMAND, + YarnConfiguration.DEFAULT_NM_SAVE_DEBUG_INFO_COMMAND); + timeoutSec = + conf.getInt(YarnConfiguration.NM_SAVE_DEBUG_INFO_TIMEOUT_SEC, + YarnConfiguration.DEFAULT_NM_SAVE_DEBUG_INFO_TIMEOUT_SEC); + } + + if (command != null && + !command.isEmpty() && + processId != null && + !processId.isEmpty()) { + // Replace current PID and log directory in the command + final CharSequence pid = "{{PID}}"; + final CharSequence logDir = "{{LOG_DIR}}"; + String specializedCommand = command + .replace(pid, processId) + .replace(logDir, container.getLogDir()); + + // Build process as a shell script + String[] args = new String[3]; + args[0] = Shell.WINDOWS ? "cmd" : "sh"; + args[1] = Shell.WINDOWS ? "/c" : "-c"; + args[2] = specializedCommand; + + // Run collection command + ProcessBuilder builder = new ProcessBuilder(); + builder.command(args); + builder.directory(new File(container.getWorkDir())); + Process process = null; + try { + process = builder.start(); + } catch (IOException e) { + LOG.info("Could not start collecting debug information ", e); + } + + // Wait for collection command + try { + if (process != null && + !process.waitFor(timeoutSec, TimeUnit.SECONDS)) { + // Timeout expired + process.destroy(); + LOG.info("Collection of debug information did not finish in " + + timeoutSec.toString() + " seconds. Collection killed."); + } + } catch (InterruptedException e) { + LOG.info("Collection of debug information interrupted."); + process.destroy(); + } + } + } + + /** * Send a signal to the container. * * diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java index d4a7bfdacf03d9dfbef87c0ee7c88e8f36d56bc3..092a0aac165f7216d069f30332d47eaec57b0c20 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java @@ -107,69 +107,79 @@ public void handle(ContainersLauncherEvent event) { Container container = event.getContainer(); ContainerId containerId = container.getContainerId(); switch (event.getType()) { - case LAUNCH_CONTAINER: - Application app = + case LAUNCH_CONTAINER: + Application app = context.getApplications().get( + containerId.getApplicationAttemptId().getApplicationId()); + + ContainerLaunch launch = + new ContainerLaunch(context, getConfig(), dispatcher, exec, app, + event.getContainer(), dirsHandler, containerManager); + containerLauncher.submit(launch); + running.put(containerId, launch); + break; + case RELAUNCH_CONTAINER: + app = context.getApplications().get( containerId.getApplicationAttemptId().getApplicationId()); - ContainerLaunch launch = - new ContainerLaunch(context, getConfig(), dispatcher, exec, app, + ContainerRelaunch relaunch = + new ContainerRelaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager); - containerLauncher.submit(launch); - running.put(containerId, launch); - break; - case RELAUNCH_CONTAINER: - app = context.getApplications().get( - containerId.getApplicationAttemptId().getApplicationId()); - - ContainerRelaunch relaunch = - new ContainerRelaunch(context, getConfig(), dispatcher, exec, app, - event.getContainer(), dirsHandler, containerManager); - containerLauncher.submit(relaunch); - running.put(containerId, relaunch); - break; - case RECOVER_CONTAINER: - app = context.getApplications().get( - containerId.getApplicationAttemptId().getApplicationId()); - launch = new RecoveredContainerLaunch(context, getConfig(), dispatcher, - exec, app, event.getContainer(), dirsHandler, containerManager); - containerLauncher.submit(launch); - running.put(containerId, launch); - break; - case CLEANUP_CONTAINER: - case CLEANUP_CONTAINER_FOR_REINIT: - ContainerLaunch launcher = running.remove(containerId); - if (launcher == null) { - // Container not launched. So nothing needs to be done. - return; - } - - // Cleanup a container whether it is running/killed/completed, so that - // no sub-processes are alive. - try { - launcher.cleanupContainer(); - } catch (IOException e) { - LOG.warn("Got exception while cleaning container " + containerId - + ". Ignoring."); - } - break; - case SIGNAL_CONTAINER: - SignalContainersLauncherEvent signalEvent = - (SignalContainersLauncherEvent) event; - ContainerLaunch runningContainer = running.get(containerId); - if (runningContainer == null) { - // Container not launched. So nothing needs to be done. - LOG.info("Container " + containerId + " not running, nothing to signal."); - return; - } - - try { - runningContainer.signalContainer(signalEvent.getCommand()); - } catch (IOException e) { - LOG.warn("Got exception while signaling container " + containerId - + " with command " + signalEvent.getCommand()); - } - break; + containerLauncher.submit(relaunch); + running.put(containerId, relaunch); + break; + case RECOVER_CONTAINER: + app = context.getApplications().get( + containerId.getApplicationAttemptId().getApplicationId()); + launch = new RecoveredContainerLaunch(context, getConfig(), dispatcher, + exec, app, event.getContainer(), dirsHandler, containerManager); + containerLauncher.submit(launch); + running.put(containerId, launch); + break; + case DEBUG_AND_CLEANUP_CONTAINER: + ContainerLaunch launcherToDebug = running.get(containerId); + if (launcherToDebug != null) { + // Save debug information like heap dump + launcherToDebug.cleanupSavingDebugInfo(); + } + // Clean up the container as the next step + container.sendCleanupEvent(); + break; + case CLEANUP_CONTAINER: + case CLEANUP_CONTAINER_FOR_REINIT: + ContainerLaunch launcher = running.remove(containerId); + if (launcher == null) { + // Container not launched. So nothing needs to be done. + return; + } + + // Cleanup a container whether it is running/killed/completed, so that + // no sub-processes are alive. + try { + launcher.cleanupContainer(); + } catch (IOException e) { + LOG.warn("Got exception while cleaning container " + containerId + + ". Ignoring."); + } + break; + case SIGNAL_CONTAINER: + SignalContainersLauncherEvent signalEvent = + (SignalContainersLauncherEvent) event; + ContainerLaunch runningContainer = running.get(containerId); + if (runningContainer == null) { + // Container not launched. So nothing needs to be done. + LOG.info( + "Container " + containerId + " not running, nothing to signal."); + return; + } + + try { + runningContainer.signalContainer(signalEvent.getCommand()); + } catch (IOException e) { + LOG.warn("Got exception while signaling container " + containerId + + " with command " + signalEvent.getCommand()); + } + break; } } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java index 380a032ca78f0d5c6e9d2a3c34a484348e932a87..4444edbf80c3022c6d0206d1fbcee94dad29432b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java @@ -23,6 +23,7 @@ RELAUNCH_CONTAINER, RECOVER_CONTAINER, CLEANUP_CONTAINER, // The process(grp) itself. + DEBUG_AND_CLEANUP_CONTAINER, // The process(grp) itself. CLEANUP_CONTAINER_FOR_REINIT, // The process(grp) itself. SIGNAL_CONTAINER, } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 4ce816a66f710d6d3adcfd7096559e7fb5110563..9e7c064195afe71c7c2004a158868515435912ab 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -44,6 +45,7 @@ import java.util.jar.Manifest; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -909,9 +911,16 @@ public void testAuxiliaryServiceHelper() throws Exception { AuxiliaryServiceHelper.getServiceDataFromEnv(serviceName, env)); } - private void internalKillTest(boolean delayed) throws Exception { + /** + * Run a container kill test. + * @param delayed Delayed stop + * @param limitViolation Simulate memory limit violation + * @throws Exception An exception has occured + */ + private void internalKillTest(boolean delayed, boolean limitViolation) + throws Exception { conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, - delayed ? 1000 : 0); + delayed ? 1000 : 0); containerManager.start(); // ////// Construct the Container-id @@ -987,17 +996,23 @@ private void internalKillTest(boolean delayed) throws Exception { Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists()); + Container container = containerManager.getContext().getContainers() + .get(cId); NMContainerStatus nmContainerStatus = - containerManager.getContext().getContainers().get(cId) - .getNMContainerStatus(); + container.getNMContainerStatus(); Assert.assertEquals(priority, nmContainerStatus.getPriority()); // Now test the stop functionality. List containerIds = new ArrayList(); containerIds.add(cId); - StopContainersRequest stopRequest = - StopContainersRequest.newInstance(containerIds); - containerManager.stopContainers(stopRequest); + + if (limitViolation) { + container.sendKillEvent(ContainerExitStatus.KILLED_EXCEEDED_VMEM, ""); + } else { + StopContainersRequest stopRequest = + StopContainersRequest.newInstance(containerIds); + containerManager.stopContainers(stopRequest); + } BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE); @@ -1010,7 +1025,10 @@ private void internalKillTest(boolean delayed) throws Exception { ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest) .getContainerStatuses().get(0); - Assert.assertEquals(ContainerExitStatus.KILLED_BY_APPMASTER, + Assert.assertEquals( + limitViolation + ? ContainerExitStatus.KILLED_EXCEEDED_VMEM + : ContainerExitStatus.KILLED_BY_APPMASTER, containerStatus.getExitStatus()); // Now verify the contents of the file. Script generates a message when it @@ -1043,12 +1061,96 @@ private void internalKillTest(boolean delayed) throws Exception { @Test (timeout = 30000) public void testDelayedKill() throws Exception { - internalKillTest(true); + internalKillTest(true, false); } @Test (timeout = 30000) public void testImmediateKill() throws Exception { - internalKillTest(false); + internalKillTest(false, false); + } + + /** + * Setup collection of debug information on container kill. + * @return the path to a canary file showing that the script ran. + */ + private File setupCollectDebugInformation(String command) { + File canary = new File("canary.txt"); + if (canary.exists()) { + assertTrue("Canary should be deleted", canary.delete()); + } + conf.set(YarnConfiguration.NM_SAVE_DEBUG_INFO_COMMAND, + command.replace("", canary.getAbsolutePath())); + return canary; + } + + /** + * Cleanup collection of debug information on container kill. + */ + private void cleanupCollectDebugInformation(File canary) { + if (canary.exists()) { + assertTrue("Canary should be deleted", canary.delete()); + } + } + + + /** + * Test, if the debug hook on container kill collects a PID. + * @throws Exception An error occurred + */ + @Test (timeout = 30000) + public void testImmediateKillWithDebugInfoCollectionPid() throws Exception { + File canary = setupCollectDebugInformation( + "echo {{PID}} >"); + + internalKillTest(false, true); + + Assert.assertTrue("The script should have run", canary.exists()); + String pidString = FileUtils.readFileToString(canary); + int pid = Integer.parseInt(pidString.trim()); + Assert.assertNotEquals(0, pid); + + cleanupCollectDebugInformation(canary); + } + + /** + * Test, if the debug hook on container kill collects a log dir. + * @throws Exception An error occurred + */ + @Test (timeout = 30000) + public void testImmediateKillWithDebugInfoCollectionLogDir() + throws Exception { + conf.set(YarnConfiguration.NM_LOG_DIRS, + new File(".").getAbsolutePath()); + File canary = setupCollectDebugInformation( + "echo {{LOG_DIR}} >"); + + internalKillTest(false, true); + + Assert.assertTrue("The script should have run", canary.exists()); + String dirString = FileUtils.readFileToString(canary); + File logDir = new File(dirString.trim()); + Assert.assertTrue( + "We should get the expected log dir", + logDir.getAbsolutePath().startsWith(localLogDir.getAbsolutePath())); + + cleanupCollectDebugInformation(canary); + } + + /** + * Test, if the debug hook on container kill can be killed gracefully. + * @throws Exception An error occurred + */ + @Test (timeout = 30000) + public void testImmediateKillWithDebugInfoCollectionTimeout() + throws Exception { + File canary = setupCollectDebugInformation( + "sleep 100 && echo {{PID}} {{LOG_DIR}}>"); + conf.setInt(YarnConfiguration.NM_SAVE_DEBUG_INFO_TIMEOUT_SEC, 1); + internalKillTest(false, true); + Assert.assertFalse("The script should have been interrupted", + canary.exists()); + + cleanupCollectDebugInformation(canary); } @SuppressWarnings("rawtypes") diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index 686a0d9fdd5c2ca51ac81d4d3415ccf68149d5e6..92f48863275adc827cda2e3d6ab8be01b487c0cb 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -227,6 +227,11 @@ public void sendLaunchEvent() { } @Override + public void sendCleanupEvent() { + + } + + @Override public void sendKillEvent(int exitStatus, String description) { }