diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java index d384585..16ecdbb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java @@ -518,7 +518,8 @@ public void addComponentHostTokens(Map tokens, */ public void updateServiceRecord(StateAccessForProviders amState, YarnRegistryViewForProviders yarnRegistry, - String containerId, String roleName, List ip, String hostname) { + String containerId, String roleName, List ip, String hostname) + throws IOException { try { RoleInstance role = null; if(ip != null && !ip.isEmpty()){ @@ -550,9 +551,6 @@ public void updateServiceRecord(StateAccessForProviders amState, } catch (NoSuchNodeException e) { // ignore - there is nothing to do if we don't find a container log.warn("Owned container {} not found - {}", containerId, e); - } catch (IOException e) { - log.warn("Error updating container {} service record in registry", - containerId, e); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/docker/DockerProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/docker/DockerProviderService.java index 93a481c..bd90c72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/docker/DockerProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/docker/DockerProviderService.java @@ -141,8 +141,15 @@ public boolean processContainerStatus(ContainerId containerId, return false; } - providerUtils.updateServiceRecord(amState, yarnRegistry, - containerId.toString(), instance.role, status.getIPs(), status.getHost()); + try { + providerUtils.updateServiceRecord(amState, yarnRegistry, + containerId.toString(), instance.role, status.getIPs(), status.getHost()); + } catch (IOException e) { + // could not write service record to ZK, log and retry + log.warn("Error updating container {} service record in registry, " + + "retrying", containerId, e); + return true; + } // TODO publish ip and host org.apache.slider.api.resource.Container container = instance.providerRole.component.getContainer(containerId.toString()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java index 1f379ea..fd4fb40 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java @@ -113,6 +113,7 @@ import org.apache.slider.providers.ProviderCompleted; import org.apache.slider.providers.ProviderService; import org.apache.slider.providers.SliderProviderFactory; +import org.apache.slider.providers.docker.DockerProviderService; import org.apache.slider.server.appmaster.actions.ActionHalt; import org.apache.slider.server.appmaster.actions.ActionRegisterServiceInstance; import org.apache.slider.server.appmaster.actions.ActionStopSlider; @@ -1948,9 +1949,12 @@ public void onContainerStarted(ContainerId containerId, //trigger an async container status nmClientAsync.getContainerStatusAsync(containerId, cinfo.container.getNodeId()); - // push out a registration - queue(new RegisterComponentInstance(containerId, cinfo, - 0, TimeUnit.MILLISECONDS)); + if (!(providerService instanceof DockerProviderService)) { + // in the docker case, registration will be performed after the IP + // has been obtained. otherwise, push out a registration now + queue(new RegisterComponentInstance(containerId, cinfo, + 0, TimeUnit.MILLISECONDS)); + } } else { //this is a hypothetical path not seen. We react by warning