diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 6b65a54..2c39169 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -155,9 +155,6 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, this.diagnostics.append(diagnostics); } - private static final ContainerDoneTransition CONTAINER_DONE_TRANSITION = - new ContainerDoneTransition(); - private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION = new ContainerDiagnosticsUpdateTransition(); @@ -198,7 +195,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, - CONTAINER_DONE_TRANSITION) + new LocalizationFailedToDoneTransition()) .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.LOCALIZATION_FAILED, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, @@ -250,7 +247,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, // From CONTAINER_EXITED_WITH_SUCCESS State .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, - CONTAINER_DONE_TRANSITION) + new ExitedWithSuccessToDoneTransition()) .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.EXITED_WITH_SUCCESS, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, @@ -262,7 +259,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, // From EXITED_WITH_FAILURE State .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, - CONTAINER_DONE_TRANSITION) + new ExitedWithFailureToDoneTransition()) .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, @@ -297,7 +294,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, .addTransition(ContainerState.KILLING, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, - CONTAINER_DONE_TRANSITION) + new KillingToDoneTransition()) // Handle a launched container during killing stage is a no-op // as cleanup container is always handled after launch container event // in the container launcher @@ -309,7 +306,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, .addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, - CONTAINER_DONE_TRANSITION) + new ContainerCleanedupAfterKillToDoneTransition()) .addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, @@ -459,47 +456,6 @@ public ContainerTokenIdentifier getContainerTokenIdentifier() { } } - @SuppressWarnings("fallthrough") - private void finished() { - ApplicationId applicationId = - containerId.getApplicationAttemptId().getApplicationId(); - switch (getContainerState()) { - case EXITED_WITH_SUCCESS: - metrics.endRunningContainer(); - metrics.completedContainer(); - NMAuditLogger.logSuccess(user, - AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl", - applicationId, containerId); - break; - case EXITED_WITH_FAILURE: - if (wasLaunched) { - metrics.endRunningContainer(); - } - // fall through - case LOCALIZATION_FAILED: - metrics.failedContainer(); - NMAuditLogger.logFailure(user, - AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl", - "Container failed with state: " + getContainerState(), - applicationId, containerId); - break; - case CONTAINER_CLEANEDUP_AFTER_KILL: - if (wasLaunched) { - metrics.endRunningContainer(); - } - // fall through - case NEW: - metrics.killedContainer(); - NMAuditLogger.logSuccess(user, - AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", - applicationId, - containerId); - } - - metrics.releaseContainer(this.resource); - sendFinishedEvents(); - } - @SuppressWarnings("unchecked") private void sendFinishedEvents() { // Inform the application @@ -606,7 +562,13 @@ public ContainerState transition(ContainerImpl container, } else if (container.recoveredAsKilled && container.recoveredStatus == RecoveredContainerStatus.REQUESTED) { // container was killed but never launched - container.finished(); + container.metrics.killedContainer(); + NMAuditLogger.logSuccess(container.user, + AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + container.metrics.releaseContainer(container.resource); + container.sendFinishedEvents(); return ContainerState.DONE; } @@ -987,7 +949,8 @@ public void transition(ContainerImpl container, ContainerEvent event) { @Override @SuppressWarnings("unchecked") public void transition(ContainerImpl container, ContainerEvent event) { - container.finished(); + container.metrics.releaseContainer(container.resource); + container.sendFinishedEvents(); //if the current state is NEW it means the CONTAINER_INIT was never // sent for the event, thus no need to send the CONTAINER_STOP if (container.getCurrentState() @@ -1009,6 +972,105 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.exitCode = killEvent.getContainerExitStatus(); container.addDiagnostics(killEvent.getDiagnostic(), "\n"); container.addDiagnostics("Container is killed before being launched.\n"); + container.metrics.killedContainer(); + NMAuditLogger.logSuccess(container.user, + AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + super.transition(container, event); + } + } + + /** + * Handle the following transition: + * - LOCALIZATION_FAILED -> DONE upon CONTAINER_RESOURCES_CLEANEDUP + */ + static class LocalizationFailedToDoneTransition extends + ContainerDoneTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + container.metrics.failedContainer(); + NMAuditLogger.logFailure(container.user, + AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl", + "Container failed with state: " + container.getContainerState(), + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + super.transition(container, event); + } + } + + /** + * Handle the following transition: + * - EXITED_WITH_SUCCESS -> DONE upon CONTAINER_RESOURCES_CLEANEDUP + */ + static class ExitedWithSuccessToDoneTransition extends + ContainerDoneTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + container.metrics.endRunningContainer(); + container.metrics.completedContainer(); + NMAuditLogger.logSuccess(container.user, + AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl", + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + super.transition(container, event); + } + } + + /** + * Handle the following transition: + * - EXITED_WITH_FAILURE -> DONE upon CONTAINER_RESOURCES_CLEANEDUP + */ + static class ExitedWithFailureToDoneTransition extends + ContainerDoneTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + if (container.wasLaunched) { + container.metrics.endRunningContainer(); + } + container.metrics.failedContainer(); + NMAuditLogger.logFailure(container.user, + AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl", + "Container failed with state: " + container.getContainerState(), + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + super.transition(container, event); + } + } + + /** + * Handle the following transition: + * - KILLING -> DONE upon CONTAINER_RESOURCES_CLEANEDUP + */ + static class KillingToDoneTransition extends + ContainerDoneTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + container.metrics.killedContainer(); + NMAuditLogger.logSuccess(container.user, + AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); + super.transition(container, event); + } + } + + /** + * Handle the following transition: + * CONTAINER_CLEANEDUP_AFTER_KILL -> DONE upon CONTAINER_RESOURCES_CLEANEDUP + */ + static class ContainerCleanedupAfterKillToDoneTransition extends + ContainerDoneTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + if (container.wasLaunched) { + container.metrics.endRunningContainer(); + } + container.metrics.killedContainer(); + NMAuditLogger.logSuccess(container.user, + AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", + container.containerId.getApplicationAttemptId().getApplicationId(), + container.containerId); super.transition(container, event); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 3da21f0..4bcd32a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -26,6 +26,8 @@ import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.yarn.api.records.Resource; +import com.google.common.annotations.VisibleForTesting; + @Metrics(about="Metrics for node manager", context="yarn") public class NodeManagerMetrics { @Metric MutableCounterInt containersLaunched; @@ -111,4 +113,9 @@ public void addResource(Resource res) { public int getRunningContainers() { return containersRunning.value(); } + + @VisibleForTesting + public int getKilledContainers() { + return containersKilled.value(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index c28d691..600dcc1 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -345,6 +345,10 @@ public void testKillOnLocalizing() throws Exception { wc.c.cloneAndGetContainerStatus().getExitStatus()); assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() .contains("KillRequest")); + int killed = metrics.getKilledContainers(); + wc.containerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(killed + 1, metrics.getKilledContainers()); } finally { if (wc != null) { wc.finished();