diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index 56b4fdd..c2430ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -53,6 +53,12 @@ NMContainerStatus getNMContainerStatus(); + void setPendingKillEvent(ContainerKillEvent event); + + ContainerKillEvent getPendingKillEvent(); + + boolean isContainerRecovered(); + String toString(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 3c76596..73d8afa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -124,6 +124,9 @@ RecoveredContainerStatus.REQUESTED; // whether container was marked as killed after recovery private boolean recoveredAsKilled = false; + // pending kill event while container is In New State and to be recovered + private ContainerKillEvent pendingKillEvent; + private boolean isContainerRecovered = false; public ContainerImpl(Configuration conf, Dispatcher dispatcher, NMStateStoreService stateStore, ContainerLaunchContext launchContext, @@ -156,6 +159,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, String diagnostics, boolean wasKilled) { this(conf, dispatcher, stateStore, launchContext, creds, metrics, containerTokenIdentifier); + this.isContainerRecovered = true; this.recoveredStatus = recoveredStatus; this.exitCode = exitCode; this.recoveredAsKilled = wasKilled; @@ -180,7 +184,8 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, .addTransition(ContainerState.NEW, ContainerState.NEW, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) - .addTransition(ContainerState.NEW, ContainerState.DONE, + .addTransition(ContainerState.NEW, + EnumSet.of(ContainerState.NEW, ContainerState.DONE), ContainerEventType.KILL_CONTAINER, new KillOnNewTransition()) // From LOCALIZING State @@ -574,8 +579,10 @@ public ContainerState transition(ContainerImpl container, if (container.recoveredStatus == RecoveredContainerStatus.COMPLETED) { container.sendFinishedEvents(); return ContainerState.DONE; - } else if (container.recoveredAsKilled && + } else if ((container.recoveredAsKilled || + container.getPendingKillEvent() != null) && container.recoveredStatus == RecoveredContainerStatus.REQUESTED) { + container.cleanup(); // container was killed but never launched container.metrics.killedContainer(); NMAuditLogger.logSuccess(container.user, @@ -771,6 +778,11 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.CLEANUP_CONTAINER)); + } else { + ContainerKillEvent killEvent = container.getPendingKillEvent(); + if (killEvent != null) { + container.dispatcher.getEventHandler().handle(killEvent); + } } } } @@ -983,21 +995,32 @@ public void transition(ContainerImpl container, ContainerEvent event) { /** * Handle the following transition: + * - NEW -> NEW if container is recovered * - NEW -> DONE upon KILL_CONTAINER */ - static class KillOnNewTransition extends ContainerDoneTransition { + static class KillOnNewTransition implements + MultipleArcTransition { @Override - public void transition(ContainerImpl container, ContainerEvent event) { - ContainerKillEvent killEvent = (ContainerKillEvent) event; - container.exitCode = killEvent.getContainerExitStatus(); - container.addDiagnostics(killEvent.getDiagnostic(), "\n"); - container.addDiagnostics("Container is killed before being launched.\n"); - container.metrics.killedContainer(); - NMAuditLogger.logSuccess(container.user, - AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", - container.containerId.getApplicationAttemptId().getApplicationId(), - container.containerId); - super.transition(container, event); + public ContainerState transition(ContainerImpl container, + ContainerEvent event) { + if (container.isContainerRecovered()) { + container.setPendingKillEvent((ContainerKillEvent) event); + return ContainerState.NEW; + } else { + ContainerKillEvent killEvent = (ContainerKillEvent) event; + container.exitCode = killEvent.getContainerExitStatus(); + container.addDiagnostics(killEvent.getDiagnostic(), "\n"); + container.addDiagnostics( + "Container is killed before being launched.\n"); + container.metrics.killedContainer(); + NMAuditLogger.logSuccess(container.user, + AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + container.metrics.releaseContainer(container.resource); + container.sendFinishedEvents(); + return ContainerState.DONE; + } } } @@ -1136,6 +1159,21 @@ public void handle(ContainerEvent event) { } @Override + public void setPendingKillEvent(ContainerKillEvent event){ + pendingKillEvent = event; + } + + @Override + public ContainerKillEvent getPendingKillEvent() { + return pendingKillEvent; + } + + @Override + public boolean isContainerRecovered() { + return isContainerRecovered; + } + + @Override public String toString() { this.readLock.lock(); try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index b2ccb61..68ab27a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -140,4 +141,19 @@ public ContainerTokenIdentifier getContainerTokenIdentifier() { public NMContainerStatus getNMContainerStatus() { return null; } + + @Override + public void setPendingKillEvent(ContainerKillEvent containerKillEvent) { + + } + + @Override + public ContainerKillEvent getPendingKillEvent() { + return null; + } + + @Override + public boolean isContainerRecovered() { + return false; + } }