diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java index 3c8fed68a12..a665955d771 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java @@ -287,15 +287,18 @@ private void upgradeNextCompIfAny(boolean cancelUpgrade) { org.apache.hadoop.yarn.service.api.records.Component component = componentsToUpgrade.get(0); - serviceSpec.getComponent(component.getName()).getContainers().forEach( - container -> { - ComponentInstanceEvent upgradeEvent = new ComponentInstanceEvent( - ContainerId.fromString(container.getId()), - !cancelUpgrade ? ComponentInstanceEventType.UPGRADE : - ComponentInstanceEventType.CANCEL_UPGRADE); - LOG.info("Upgrade container {} {}", container.getId(), - cancelUpgrade); - dispatcher.getEventHandler().handle(upgradeEvent); + serviceSpec.getComponent(component.getName()).getContainers().stream() + .filter(container -> + scheduler.getLiveInstances().containsKey( + ContainerId.fromString(container.getId()))) + .forEach(container -> { + ComponentInstanceEvent upgradeEvent = new ComponentInstanceEvent( + ContainerId.fromString(container.getId()), + !cancelUpgrade ? ComponentInstanceEventType.UPGRADE : + ComponentInstanceEventType.CANCEL_UPGRADE); + LOG.info("Upgrade container {} {}", container.getId(), + cancelUpgrade); + dispatcher.getEventHandler().handle(upgradeEvent); }); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java index cbc489c4e69..7cd4a97298a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java @@ -628,10 +628,22 @@ public void transition(Component component, ComponentEvent event) { component.setComponentState(org.apache.hadoop.yarn.service.api. records.ComponentState.NEEDS_UPGRADE); + // upgrade failed container may have been released already component.getAllComponentInstances().forEach(instance -> { - instance.setContainerState(ContainerState.NEEDS_UPGRADE); + if (component.getScheduler().getLiveInstances().containsKey( + instance.getContainer().getId())) { + instance.setContainerState(ContainerState.NEEDS_UPGRADE); + } else { + status.decContainersThatNeedUpgrade(); + LOG.info("{} is not live. request a new container", + instance.getContainer().getId()); + component.reInsertPendingInstance(instance); + } }); - + if (component.getPendingInstances().size() > 0) { + // re-ask the failed containers + component.requestContainers(component.getPendingInstances().size()); + } if (event.getType().equals(CANCEL_UPGRADE)) { component.upgradeStatus.reset(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index 700408e6633..eabc00182e8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -158,7 +158,7 @@ CANCEL_UPGRADE, new CancelUpgradeTransition()) .addTransition(UPGRADING, EnumSet.of(REINITIALIZED), START, new StartedAfterUpgradeTransition()) - .addTransition(UPGRADING, UPGRADING, STOP, + .addTransition(UPGRADING, INIT, STOP, new StoppedAfterUpgradeTransition()) // FROM CANCEL_UPGRADING @@ -172,7 +172,7 @@ new CancelledAfterReinitTransition()) .addTransition(REINITIALIZED, READY, BECOME_READY, new ContainerBecomeReadyTransition(true)) - .addTransition(REINITIALIZED, REINITIALIZED, STOP, + .addTransition(REINITIALIZED, INIT, STOP, new StoppedAfterUpgradeTransition()) .installTopology();