diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java index 7122578..62908ff 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java @@ -46,4 +46,27 @@ * Containers preempted by the framework. */ public static final int PREEMPTED = -102; + + /** + * Containers killed by the framework due to exceeded virtual or + * physical memory usage. + */ + public static final int KILL_EXCEEDED_VMEM = -103; + public static final int KILL_EXCEEDED_PMEM = -104; + + /** + * Containers killed by AppMaster's indication. + */ + public static final int KILL_BY_APPMASTER = -105; + + /** + * Containers killed by ResourceManager's indication(e.g. resync). + */ + public static final int KILL_BY_RESOURCEMANAGER = -106; + + /** + * Containers killed by ResourceManager, due to accomplish of applications. + */ + public static final int KILL_FINISHED_APPMASTER = -107; + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 750c11a..834f023 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -64,6 +64,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -738,7 +739,8 @@ private void stopContainerInternal(NMTokenIdentifier nmTokenIdentifier, } else { dispatcher.getEventHandler().handle( new ContainerKillEvent(containerID, - "Container killed by the ApplicationMaster.")); + ContainerExitStatus.KILL_BY_APPMASTER, + "Container killed by the ApplicationMaster.")); NMAuditLogger.logSuccess(container.getUser(), AuditConstants.STOP_CONTAINER, "ContainerManageImpl", containerID @@ -887,6 +889,7 @@ public void handle(ContainerManagerEvent event) { .getContainersToCleanup()) { this.dispatcher.getEventHandler().handle( new ContainerKillEvent(container, + ContainerExitStatus.KILL_BY_RESOURCEMANAGER, "Container Killed by ResourceManager")); } break; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java index 21d2f91..3d459de 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java @@ -30,6 +30,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy; @@ -375,6 +376,7 @@ public ApplicationState transition(ApplicationImpl app, for (ContainerId containerID : app.containers.keySet()) { app.dispatcher.getEventHandler().handle( new ContainerKillEvent(containerID, + ContainerExitStatus.KILL_FINISHED_APPMASTER, "Container killed on application-finish event: " + appEvent.getDiagnostic())); } return ApplicationState.FINISHING_CONTAINERS_WAIT; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 1b683a1..cb5694e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -48,7 +48,6 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; -import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; @@ -773,7 +772,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.cleanup(); container.metrics.endInitingContainer(); ContainerKillEvent killEvent = (ContainerKillEvent) event; - container.exitCode = ExitCode.TERMINATED.getExitCode(); + container.exitCode = killEvent.getReason(); container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); container.diagnostics.append("Container is killed before being launched.\n"); } @@ -817,6 +816,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { ContainersLauncherEventType.CLEANUP_CONTAINER)); ContainerKillEvent killEvent = (ContainerKillEvent) event; container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); + container.exitCode = killEvent.getReason(); } } @@ -829,7 +829,6 @@ public void transition(ContainerImpl container, ContainerEvent event) { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerExitEvent exitEvent = (ContainerExitEvent) event; - container.exitCode = exitEvent.getExitCode(); if (exitEvent.getDiagnosticInfo() != null) { container.diagnostics.append(exitEvent.getDiagnosticInfo()) .append('\n'); @@ -853,7 +852,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { @SuppressWarnings("unchecked") public void transition(ContainerImpl container, ContainerEvent event) { container.finished(); - //if the current state is NEW it means the CONTAINER_INIT was never + //if the current state is NEW it means the CONTAINER_INIT was never // sent for the event, thus no need to send the CONTAINER_STOP if (container.getCurrentState() != org.apache.hadoop.yarn.api.records.ContainerState.NEW) { @@ -871,7 +870,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerKillEvent killEvent = (ContainerKillEvent) event; - container.exitCode = ExitCode.TERMINATED.getExitCode(); + container.exitCode = killEvent.getReason(); container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); container.diagnostics.append("Container is killed before being launched.\n"); super.transition(container, event); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java index 313b6a8..f4f1a16 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java @@ -23,13 +23,21 @@ public class ContainerKillEvent extends ContainerEvent { private final String diagnostic; + private final int reason; - public ContainerKillEvent(ContainerId cID, String diagnostic) { + public ContainerKillEvent(ContainerId cID, + int reason, String diagnostic) { super(cID, ContainerEventType.KILL_CONTAINER); + this.reason = reason; this.diagnostic = diagnostic; } public String getDiagnostic() { return this.diagnostic; } + + public int getReason() { + return this.reason; + } + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index b681b34..4940206 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -403,6 +404,7 @@ public void run() { boolean isMemoryOverLimit = false; String msg = ""; + int containerExitStatus = ContainerExitStatus.INVALID; if (isVmemCheckEnabled() && isProcessTreeOverLimit(containerId.toString(), currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { @@ -414,6 +416,7 @@ public void run() { currentPmemUsage, pmemLimit, pId, containerId, pTree); isMemoryOverLimit = true; + containerExitStatus = ContainerExitStatus.KILL_EXCEEDED_VMEM; } else if (isPmemCheckEnabled() && isProcessTreeOverLimit(containerId.toString(), currentPmemUsage, curRssMemUsageOfAgedProcesses, @@ -426,6 +429,7 @@ public void run() { currentPmemUsage, pmemLimit, pId, containerId, pTree); isMemoryOverLimit = true; + containerExitStatus = ContainerExitStatus.KILL_EXCEEDED_PMEM; } if (isMemoryOverLimit) { @@ -440,7 +444,8 @@ public void run() { } // kill the container eventDispatcher.getEventHandler().handle( - new ContainerKillEvent(containerId, msg)); + new ContainerKillEvent(containerId, + containerExitStatus, msg)); it.remove(); LOG.info("Removed ProcessTree with root " + pId); } else { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 53a3c4b..4de12b5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.junit.Assert; import org.apache.commons.logging.LogFactory; @@ -348,8 +349,7 @@ public void testContainerLaunchAndStop() throws IOException, GetContainerStatusesRequest.newInstance(containerIds); ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0); - int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(); + int expectedExitCode = ContainerExitStatus.KILL_BY_APPMASTER; Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Assert that the process is not alive anymore diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 8af9518..bef7105 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -319,7 +320,7 @@ public void testKillOnNew() throws Exception { assertEquals(ContainerState.NEW, wc.c.getContainerState()); wc.killContainer(); assertEquals(ContainerState.DONE, wc.c.getContainerState()); - assertEquals(ExitCode.TERMINATED.getExitCode(), + assertEquals(ContainerExitStatus.KILL_BY_RESOURCEMANAGER, wc.c.cloneAndGetContainerStatus().getExitStatus()); assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() .contains("KillRequest")); @@ -339,7 +340,7 @@ public void testKillOnLocalizing() throws Exception { assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState()); wc.killContainer(); assertEquals(ContainerState.KILLING, wc.c.getContainerState()); - assertEquals(ExitCode.TERMINATED.getExitCode(), + assertEquals(ContainerExitStatus.KILL_BY_RESOURCEMANAGER, wc.c.cloneAndGetContainerStatus().getExitStatus()); assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() .contains("KillRequest")); @@ -898,12 +899,14 @@ public void containerFailed(int exitCode) { } public void killContainer() { - c.handle(new ContainerKillEvent(cId, "KillRequest")); + c.handle(new ContainerKillEvent(cId, + ContainerExitStatus.KILL_BY_RESOURCEMANAGER, + "KillRequest")); drainDispatcherEvents(); } public void containerKilledOnRequest() { - int exitCode = ExitCode.FORCE_KILLED.getExitCode(); + int exitCode = ContainerExitStatus.KILL_BY_RESOURCEMANAGER; String diagnosticMsg = "Container completed with exit code " + exitCode; c.handle(new ContainerExitEvent(cId, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index c8fc85a..88de4b9 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import static org.junit.Assert.fail; @@ -604,8 +605,7 @@ public void testContainerEnvVariables() throws Exception { GetContainerStatusesRequest.newInstance(containerIds); ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0); - int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(); + int expectedExitCode = ContainerExitStatus.KILL_BY_APPMASTER; Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Assert that the process is not alive anymore @@ -717,7 +717,7 @@ private void internalKillTest(boolean delayed) throws Exception { ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest) .getContainerStatuses().get(0); - Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), + Assert.assertEquals(ContainerExitStatus.KILL_BY_APPMASTER, containerStatus.getExitStatus()); // Now verify the contents of the file. Script generates a message when it diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 1102ebb..af110f8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -60,7 +61,6 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; -import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; @@ -270,7 +270,7 @@ public void testContainerKillOnMemoryOverflow() throws IOException, GetContainerStatusesRequest.newInstance(containerIds); ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0); - Assert.assertEquals(ExitCode.TERMINATED.getExitCode(), + Assert.assertEquals(ContainerExitStatus.KILL_EXCEEDED_VMEM, containerStatus.getExitStatus()); String expectedMsgPattern = "Container \\[pid=" + pid + ",containerID=" + cId