diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index 25aba77f539..25cc19da556 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -152,10 +152,14 @@ REINITIALIZED), START, new StartedAfterUpgradeTransition()) .addTransition(CANCEL_UPGRADING, EnumSet.of(CANCEL_UPGRADING, INIT), STOP, new StoppedAfterCancelUpgradeTransition()) + + // FROM REINITIALIZED .addTransition(REINITIALIZED, CANCEL_UPGRADING, CANCEL_UPGRADE, new CancelledAfterReinitTransition()) .addTransition(REINITIALIZED, READY, BECOME_READY, new ContainerBecomeReadyTransition(true)) + .addTransition(REINITIALIZED, REINITIALIZED, STOP, + new StoppedAfterUpgradeTransition()) .installTopology(); public ComponentInstance(Component component, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java index 09652d7403b..f857353a629 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java @@ -140,6 +140,42 @@ public void testContainerUpgradeFailed() throws Exception { .getId().toString()).getState()); } + @Test + public void testFailureAfterReinit() throws Exception { + ServiceContext context = TestComponent.createTestContext(rule, + "testContainerUpgradeFailed"); + Component component = context.scheduler.getAllComponents().entrySet() + .iterator().next().getValue(); + upgradeComponent(component); + + ComponentInstance instance = component.getAllComponentInstances().iterator() + .next(); + + ComponentInstanceEvent upgradeEvent = new ComponentInstanceEvent( + instance.getContainer().getId(), ComponentInstanceEventType.UPGRADE); + instance.handle(upgradeEvent); + + // NM finished updgrae + instance.handle(new ComponentInstanceEvent(instance.getContainer().getId(), + ComponentInstanceEventType.START)); + Assert.assertEquals("instance not running", + ContainerState.RUNNING_BUT_UNREADY, + component.getComponentSpec().getContainer(instance.getContainer() + .getId().toString()).getState()); + + ContainerStatus containerStatus = mock(ContainerStatus.class); + when(containerStatus.getExitStatus()).thenReturn( + ContainerExitStatus.ABORTED); + ComponentInstanceEvent stopEvent = new ComponentInstanceEvent( + instance.getContainer().getId(), ComponentInstanceEventType.STOP) + .setStatus(containerStatus); + // this is the call back from NM for the upgrade + instance.handle(stopEvent); + Assert.assertEquals("instance did not fail", ContainerState.FAILED_UPGRADE, + component.getComponentSpec().getContainer(instance.getContainer() + .getId().toString()).getState()); + } + @Test public void testCancelNothingToUpgrade() throws Exception { ServiceContext context = TestComponent.createTestContext(rule, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 6716dbb02e9..b86159beaca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -1729,7 +1729,10 @@ public void transition(ContainerImpl container, + "] for re-initialization !!"); container.wasLaunched = false; container.metrics.endRunningContainer(); - + // Remove the container from the resource-monitor. When container + // is launched again, it is added back to monitoring service. + container.dispatcher.getEventHandler().handle( + new ContainerStopMonitoringEvent(container.containerId)); container.launchContext = container.reInitContext.newLaunchContext; // Re configure the Retry Context