diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java index 7122578..9afe0b4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java @@ -46,4 +46,18 @@ * Containers preempted by the framework. */ public static final int PREEMPTED = -102; + + /** + * Containers killed by the framework due to exceeded virtual or + * physical memory usage. + */ + public static final int KILL_EXCEEDED_VMEM = -103; + public static final int KILL_EXCEEDED_PMEM = -104; + + public static boolean isAMAware(int exitReason) { + return (exitReason == DISKS_FAILED) + || (exitReason == KILL_EXCEEDED_PMEM) + || (exitReason == KILL_EXCEEDED_VMEM); + } + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 50653f5..8f708c5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -761,7 +761,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.cleanup(); container.metrics.endInitingContainer(); ContainerKillEvent killEvent = (ContainerKillEvent) event; - container.exitCode = ExitCode.TERMINATED.getExitCode(); + container.exitCode = killEvent.getReason(); container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); container.diagnostics.append("Container is killed before being launched.\n"); } @@ -805,6 +805,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { ContainersLauncherEventType.CLEANUP_CONTAINER)); ContainerKillEvent killEvent = (ContainerKillEvent) event; container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); + container.exitCode = killEvent.getReason(); } } @@ -817,7 +818,12 @@ public void transition(ContainerImpl container, ContainerEvent event) { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerExitEvent exitEvent = (ContainerExitEvent) event; - container.exitCode = exitEvent.getExitCode(); + // If the containers' exit code is AMAware, pass the exit code before + // this transition to AM directly for handling the event. + if (!ContainerExitStatus.isAMAware(container.exitCode)) { + container.exitCode = exitEvent.getExitCode(); + } + if (exitEvent.getDiagnosticInfo() != null) { container.diagnostics.append(exitEvent.getDiagnosticInfo()) .append('\n'); @@ -859,7 +865,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerKillEvent killEvent = (ContainerKillEvent) event; - container.exitCode = ExitCode.TERMINATED.getExitCode(); + container.exitCode = killEvent.getReason(); container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); container.diagnostics.append("Container is killed before being launched.\n"); super.transition(container, event); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java index 313b6a8..0bbe6c85 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java @@ -18,18 +18,37 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; public class ContainerKillEvent extends ContainerEvent { private final String diagnostic; + private final int reason; public ContainerKillEvent(ContainerId cID, String diagnostic) { + this(cID, ContainerExitStatus.INVALID, diagnostic); + } + + public ContainerKillEvent(ContainerId cID, + int reason, String diagnostic) { super(cID, ContainerEventType.KILL_CONTAINER); + this.reason = reason; this.diagnostic = diagnostic; } public String getDiagnostic() { return this.diagnostic; } + + public int getReason() { + if (ContainerExitStatus.isAMAware(this.reason)) { + return this.reason; + } else { + // hack for backward compatibility. + return ContainerExecutor.ExitCode.TERMINATED.getExitCode(); + } + } + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index b681b34..4940206 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -403,6 +404,7 @@ public void run() { boolean isMemoryOverLimit = false; String msg = ""; + int containerExitStatus = ContainerExitStatus.INVALID; if (isVmemCheckEnabled() && isProcessTreeOverLimit(containerId.toString(), currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { @@ -414,6 +416,7 @@ public void run() { currentPmemUsage, pmemLimit, pId, containerId, pTree); isMemoryOverLimit = true; + containerExitStatus = ContainerExitStatus.KILL_EXCEEDED_VMEM; } else if (isPmemCheckEnabled() && isProcessTreeOverLimit(containerId.toString(), currentPmemUsage, curRssMemUsageOfAgedProcesses, @@ -426,6 +429,7 @@ public void run() { currentPmemUsage, pmemLimit, pId, containerId, pTree); isMemoryOverLimit = true; + containerExitStatus = ContainerExitStatus.KILL_EXCEEDED_PMEM; } if (isMemoryOverLimit) { @@ -440,7 +444,8 @@ public void run() { } // kill the container eventDispatcher.getEventHandler().handle( - new ContainerKillEvent(containerId, msg)); + new ContainerKillEvent(containerId, + containerExitStatus, msg)); it.remove(); LOG.info("Removed ProcessTree with root " + pId); } else { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 1102ebb..7996f95 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -270,7 +271,7 @@ public void testContainerKillOnMemoryOverflow() throws IOException, GetContainerStatusesRequest.newInstance(containerIds); ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0); - Assert.assertEquals(ExitCode.TERMINATED.getExitCode(), + Assert.assertEquals(ContainerExitStatus.KILL_EXCEEDED_VMEM, containerStatus.getExitStatus()); String expectedMsgPattern = "Container \\[pid=" + pid + ",containerID=" + cId