diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml index 45b1bc725b1..4483fc1633a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml @@ -430,6 +430,7 @@ definitions: type: string description: E.g. HTTP (YARN will perform a simple REST call at a regular interval and expect a 204 No content). enum: + - DEFAULT - HTTP - PORT properties: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ContainerFailureTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ContainerFailureTracker.java index 4743f283445..5982728ca73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ContainerFailureTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ContainerFailureTracker.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.service; import org.apache.hadoop.yarn.service.component.Component; +import org.apache.hadoop.yarn.service.conf.YarnServiceConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,6 +30,7 @@ import java.util.Map; import java.util.Set; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.DEFAULT_NODE_BLACKLIST_THRESHOLD; import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.NODE_BLACKLIST_THRESHOLD; /** @@ -51,8 +53,9 @@ public ContainerFailureTracker(ServiceContext context, Component component) { this.context = context; this.component = component; - maxFailurePerNode = component.getComponentSpec().getConfiguration() - .getPropertyInt(NODE_BLACKLIST_THRESHOLD, 3); + maxFailurePerNode = YarnServiceConf.getInt(NODE_BLACKLIST_THRESHOLD, + DEFAULT_NODE_BLACKLIST_THRESHOLD, component.getComponentSpec() + .getConfiguration(), context.scheduler.getConfig()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java index af7c5427e7c..0665cb53fad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java @@ -60,6 +60,7 @@ @XmlType(name = "type") @XmlEnum public enum TypeEnum { + DEFAULT("DEFAULT"), HTTP("HTTP"), PORT("PORT"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java index 04ca9437426..7f0b3ea5f16 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java @@ -628,8 +628,8 @@ ApplicationId submitApp(Service app) throws IOException, YarnException { submissionContext.setApplicationTimeouts(appTimeout); } submissionContext.setMaxAppAttempts(YarnServiceConf - .getInt(YarnServiceConf.AM_RESTART_MAX, 20, app.getConfiguration(), - conf)); + .getInt(YarnServiceConf.AM_RESTART_MAX, DEFAULT_AM_RESTART_MAX, app + .getConfiguration(), conf)); setLogAggregationContext(app, conf, submissionContext); @@ -655,7 +655,7 @@ ApplicationId submitApp(Service app) throws IOException, YarnException { conf), 1)); String queue = app.getQueue(); if (StringUtils.isEmpty(queue)) { - queue = conf.get(YARN_QUEUE, "default"); + queue = conf.get(YARN_QUEUE, DEFAULT_YARN_QUEUE); } submissionContext.setQueue(queue); submissionContext.setApplicationName(serviceName); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java index a2127c80316..6a8359e668b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceId; +import org.apache.hadoop.yarn.service.conf.YarnServiceConf; import org.apache.hadoop.yarn.service.monitor.probe.MonitorUtils; import org.apache.hadoop.yarn.service.monitor.probe.Probe; import org.apache.hadoop.yarn.service.provider.ProviderUtils; @@ -78,7 +79,7 @@ import static org.apache.hadoop.yarn.service.component.ComponentEventType.*; import static org.apache.hadoop.yarn.service.component.ComponentState.*; import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType.*; -import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURE_THRESHOLD; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.*; public class Component implements EventHandler { private static final Logger LOG = LoggerFactory.getLogger(Component.class); @@ -175,9 +176,15 @@ public Component( dispatcher = scheduler.getDispatcher(); failureTracker = new ContainerFailureTracker(context, this); - probe = MonitorUtils.getProbe(componentSpec.getReadinessCheck()); - maxContainerFailurePerComp = componentSpec.getConfiguration() - .getPropertyInt(CONTAINER_FAILURE_THRESHOLD, 10); + if (componentSpec.getReadinessCheck() != null || + YarnServiceConf.getBoolean(DEFAULT_READINESS_CHECK_ENABLED, + DEFAULT_READINESS_CHECK_ENABLED_DEFAULT, + componentSpec.getConfiguration(), scheduler.getConfig())) { + probe = MonitorUtils.getProbe(componentSpec.getReadinessCheck()); + } + maxContainerFailurePerComp = YarnServiceConf.getInt( + CONTAINER_FAILURE_THRESHOLD, DEFAULT_CONTAINER_FAILURE_THRESHOLD, + componentSpec.getConfiguration(), scheduler.getConfig()); createNumCompInstances(component.getNumberOfContainers()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index 0e3e11bc72e..c57d8882720 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -20,7 +20,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.registry.client.api.RegistryConstants; import org.apache.hadoop.registry.client.binding.RegistryPathUtils; +import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.registry.client.types.ServiceRecord; import org.apache.hadoop.registry.client.types.yarn.PersistencePolicies; import org.apache.hadoop.util.ExitUtil; @@ -520,6 +522,24 @@ private void cancelContainerStatusRetriever() { } } + public String getHostname() { + String domain = getComponent().getScheduler().getConfig() + .get(RegistryConstants.KEY_DNS_DOMAIN); + String hostname; + if (domain == null || domain.isEmpty()) { + hostname = MessageFormat + .format("{0}.{1}.{2}", getCompInstanceName(), + getComponent().getContext().service.getName(), + RegistryUtils.currentUser()); + } else { + hostname = MessageFormat + .format("{0}.{1}.{2}.{3}", getCompInstanceName(), + getComponent().getContext().service.getName(), + RegistryUtils.currentUser(), domain); + } + return hostname; + } + @Override public int compareTo(ComponentInstance to) { long delta = containerStartedTime - to.containerStartedTime; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java index b9a759438e0..43a5b9aef75 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java @@ -35,10 +35,12 @@ "yarn.service.container-failure.validity-interval-ms"; public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts"; + public static final int DEFAULT_AM_RESTART_MAX = 20; public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory"; public static final long DEFAULT_KEY_AM_RESOURCE_MEM = 1024; public static final String YARN_QUEUE = "yarn.service.queue"; + public static final String DEFAULT_YARN_QUEUE = "default"; public static final String API_SERVER_ADDRESS = "yarn.service.api-server.address"; public static final String DEFAULT_API_SERVER_ADDRESS = "0.0.0.0:"; @@ -63,11 +65,14 @@ */ public static final String CONTAINER_FAILURE_THRESHOLD = "yarn.service.container-failure-per-component.threshold"; + public static final int DEFAULT_CONTAINER_FAILURE_THRESHOLD = 10; + /** * Maximum number of container failures on a node before the node is blacklisted */ public static final String NODE_BLACKLIST_THRESHOLD = "yarn.service.node-blacklist.threshold"; + public static int DEFAULT_NODE_BLACKLIST_THRESHOLD = 3; /** * The failure count for CONTAINER_FAILURE_THRESHOLD and NODE_BLACKLIST_THRESHOLD @@ -75,6 +80,7 @@ */ public static final String CONTAINER_FAILURE_WINDOW = "yarn.service.failure-count-reset.window"; + public static final long DEFAULT_CONTAINER_FAILURE_WINDOW = 21600; /** * interval between readiness checks. @@ -83,6 +89,13 @@ public static final int DEFAULT_READINESS_CHECK_INTERVAL = 30; // seconds /** + * Default readiness check enabled. + */ + public static final String DEFAULT_READINESS_CHECK_ENABLED = + "yarn.service.default-readiness-check.enabled"; + public static final boolean DEFAULT_READINESS_CHECK_ENABLED_DEFAULT = true; + + /** * JVM opts. */ public static final String JVM_OPTS = "yarn.service.am.java.opts"; @@ -122,6 +135,12 @@ public static int getInt(String name, int defaultValue, return userConf.getPropertyInt(name, systemConf.getInt(name, defaultValue)); } + public static boolean getBoolean(String name, boolean defaultValue, + Configuration userConf, org.apache.hadoop.conf.Configuration systemConf) { + return userConf.getPropertyBool(name, systemConf.getBoolean(name, + defaultValue)); + } + public static String get(String name, String defaultVal, Configuration userConf, org.apache.hadoop.conf.Configuration systemConf) { return userConf.getProperty(name, systemConf.get(name, defaultVal)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/ServiceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/ServiceMonitor.java index 982448ad713..1da6009e226 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/ServiceMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/ServiceMonitor.java @@ -42,9 +42,7 @@ import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType.BECOME_NOT_READY; import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType.BECOME_READY; import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceState.READY; -import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURE_WINDOW; -import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.DEFAULT_READINESS_CHECK_INTERVAL; -import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.READINESS_CHECK_INTERVAL; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.*; public class ServiceMonitor extends AbstractService { @@ -81,7 +79,7 @@ public void serviceStart() throws Exception { // Default 6 hours. long failureResetInterval = YarnServiceConf - .getLong(CONTAINER_FAILURE_WINDOW, 21600, + .getLong(CONTAINER_FAILURE_WINDOW, DEFAULT_CONTAINER_FAILURE_WINDOW, context.service.getConfiguration(), conf); executorService @@ -114,6 +112,8 @@ public void run() { new ComponentInstanceEvent(entry.getKey(), BECOME_READY)); } } else { + LOG.info("Readiness check failed for {}: {}", instance + .getCompInstanceName(), status); if (instance.getState() == READY) { instance.handle( new ComponentInstanceEvent(entry.getKey(), BECOME_NOT_READY)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/DefaultProbe.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/DefaultProbe.java new file mode 100644 index 00000000000..4077013392e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/DefaultProbe.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.service.monitor.probe; + +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.utils.ServiceRegistryUtils; +import org.apache.hadoop.yarn.service.utils.ServiceUtils; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; + +/** + * A probe that checks whether the AM has retrieved an IP for a container. + * Optional parameters enable a subsequent check for whether a DNS lookup can + * be performed for the container's hostname. Configurable properties include: + * + * dns.check.enabled - true if DNS check should be performed (default false) + * dns.address - optional IP:port address of DNS server to use for DNS check + */ +public class DefaultProbe extends Probe { + private final boolean dnsCheckEnabled; + private final String dnsAddress; + + public DefaultProbe(Map props) { + this("Default probe: IP presence", props); + } + + protected DefaultProbe(String name, Map props) { + this.dnsCheckEnabled = getPropertyBool(props, + DEFAULT_PROBE_DNS_CHECK_ENABLED, + DEFAULT_PROBE_DNS_CHECK_ENABLED_DEFAULT); + this.dnsAddress = props.get(DEFAULT_PROBE_DNS_ADDRESS); + String additionalName = ""; + if (dnsCheckEnabled) { + if (dnsAddress == null) { + additionalName = " with DNS checking"; + } else { + additionalName = " with DNS checking and DNS server address " + + dnsAddress; + } + } + setName(name + additionalName); + } + + public static DefaultProbe create() throws IOException { + return new DefaultProbe(Collections.emptyMap()); + } + + public static DefaultProbe create(Map props) throws + IOException { + return new DefaultProbe(props); + } + + @Override + public ProbeStatus ping(ComponentInstance instance) { + ProbeStatus status = new ProbeStatus(); + + ContainerStatus containerStatus = instance.getContainerStatus(); + if (containerStatus == null || ServiceUtils.isEmpty(containerStatus + .getIPs())) { + status.fail(this, new IOException( + instance.getCompInstanceName() + ": IP is not available yet")); + return status; + } + + String hostname = instance.getHostname(); + if (dnsCheckEnabled && !ServiceRegistryUtils.registryDNSLookupExists( + dnsAddress, hostname)) { + status.fail(this, new IOException( + instance.getCompInstanceName() + ": DNS checking is enabled, but " + + "lookup for " + hostname + " is not available yet")); + return status; + } + + status.succeed(this); + return status; + } + + protected boolean isDnsCheckEnabled() { + return dnsCheckEnabled; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/HttpProbe.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/HttpProbe.java index 1ed13a9c360..492a11b2c67 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/HttpProbe.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/HttpProbe.java @@ -17,11 +17,7 @@ package org.apache.hadoop.yarn.service.monitor.probe; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; -import org.apache.hadoop.yarn.service.utils.ServiceUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +26,20 @@ import java.net.URL; import java.util.Map; -public class HttpProbe extends Probe { +/** + * A probe that checks whether a successful HTTP response code can be obtained + * from a container. A well-formed URL must be provided. The URL is intended + * to contain a token ${THIS_HOST} that will be replaced by the IP of the + * container. This probe also performs the checks of the {@link DefaultProbe}. + * Additional configurable properties include: + * + * url - required URL for HTTP connection, e.g. http://${THIS_HOST}:8080 + * timeout - connection timeout (default 1000) + * min.success - minimum response code considered successful (default 200) + * max.success - maximum response code considered successful (default 299) + * + */ +public class HttpProbe extends DefaultProbe { protected static final Logger log = LoggerFactory.getLogger(HttpProbe.class); private static final String HOST_TOKEN = "${THIS_HOST}"; @@ -40,9 +49,9 @@ private final int min, max; - public HttpProbe(String url, int timeout, int min, int max, Configuration - conf) { - super("Http probe of " + url + " [" + min + "-" + max + "]", conf); + public HttpProbe(String url, int timeout, int min, int max, + Map props) { + super("Http probe of " + url + " [" + min + "-" + max + "]", props); this.urlString = url; this.timeout = timeout; this.min = min; @@ -59,7 +68,7 @@ public static HttpProbe create(Map props) WEB_PROBE_MIN_SUCCESS_DEFAULT); int maxSuccess = getPropertyInt(props, WEB_PROBE_MAX_SUCCESS, WEB_PROBE_MAX_SUCCESS_DEFAULT); - return new HttpProbe(urlString, timeout, minSuccess, maxSuccess, null); + return new HttpProbe(urlString, timeout, minSuccess, maxSuccess, props); } @@ -73,15 +82,11 @@ private static HttpURLConnection getConnection(URL url, int timeout) throws @Override public ProbeStatus ping(ComponentInstance instance) { - ProbeStatus status = new ProbeStatus(); - ContainerStatus containerStatus = instance.getContainerStatus(); - if (containerStatus == null || ServiceUtils.isEmpty(containerStatus.getIPs()) - || StringUtils.isEmpty(containerStatus.getHost())) { - status.fail(this, new IOException("IP is not available yet")); + ProbeStatus status = super.ping(instance); + if (!status.isSuccess()) { return status; } - - String ip = containerStatus.getIPs().get(0); + String ip = instance.getContainerStatus().getIPs().get(0); HttpURLConnection connection = null; try { URL url = new URL(urlString.replace(HOST_TOKEN, ip)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorKeys.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorKeys.java index 55b55f68eec..97770d4d2b1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorKeys.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorKeys.java @@ -23,6 +23,18 @@ public interface MonitorKeys { /** + * Default probing key : DNS check enabled {@value}. + */ + String DEFAULT_PROBE_DNS_CHECK_ENABLED = "dns.check.enabled"; + /** + * Default probing default : DNS check enabled {@value}. + */ + boolean DEFAULT_PROBE_DNS_CHECK_ENABLED_DEFAULT = false; + /** + * Default probing key : DNS checking address IP:port {@value}. + */ + String DEFAULT_PROBE_DNS_ADDRESS = "dns.address"; + /** * Port probing key : port to attempt to create a TCP connection to {@value}. */ String PORT_PROBE_PORT = "port"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorUtils.java index c4f63aee7ae..0b57e6c6bf5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/MonitorUtils.java @@ -61,20 +61,20 @@ public static String millisToHumanTime(long milliseconds) { } public static Probe getProbe(ReadinessCheck readinessCheck) { - if (readinessCheck == null) { - return null; - } - if (readinessCheck.getType() == null) { - return null; - } try { + if (readinessCheck == null) { + return DefaultProbe.create(); + } + if (readinessCheck.getType() == null) { + return DefaultProbe.create(readinessCheck.getProperties()); + } switch (readinessCheck.getType()) { case HTTP: return HttpProbe.create(readinessCheck.getProperties()); case PORT: return PortProbe.create(readinessCheck.getProperties()); default: - return null; + return DefaultProbe.create(readinessCheck.getProperties()); } } catch (Throwable t) { throw new IllegalArgumentException("Error creating readiness check " + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/PortProbe.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/PortProbe.java index 85569f86d4a..e62048a0aeb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/PortProbe.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/PortProbe.java @@ -19,7 +19,6 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; -import org.apache.hadoop.yarn.service.utils.ServiceUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,15 +28,20 @@ import java.util.Map; /** - * Probe for a port being open. + * A probe that checks whether a container has a specified port open. This + * probe also performs the checks of the {@link DefaultProbe}. Additional + * configurable properties include: + * + * port - required port for socket connection + * timeout - connection timeout (default 1000) */ -public class PortProbe extends Probe { +public class PortProbe extends DefaultProbe { protected static final Logger log = LoggerFactory.getLogger(PortProbe.class); private final int port; private final int timeout; - public PortProbe(int port, int timeout) { - super("Port probe of " + port + " for " + timeout + "ms", null); + public PortProbe(int port, int timeout, Map props) { + super("Port probe of " + port + " for " + timeout + "ms", props); this.port = port; this.timeout = timeout; } @@ -54,7 +58,7 @@ public static PortProbe create(Map props) int timeout = getPropertyInt(props, PORT_PROBE_CONNECT_TIMEOUT, PORT_PROBE_CONNECT_TIMEOUT_DEFAULT); - return new PortProbe(port, timeout); + return new PortProbe(port, timeout, props); } /** @@ -65,12 +69,8 @@ public static PortProbe create(Map props) */ @Override public ProbeStatus ping(ComponentInstance instance) { - ProbeStatus status = new ProbeStatus(); - - if (instance.getContainerStatus() == null || ServiceUtils - .isEmpty(instance.getContainerStatus().getIPs())) { - status.fail(this, new IOException( - instance.getCompInstanceName() + ": IP is not available yet")); + ProbeStatus status = super.ping(instance); + if (!status.isSuccess()) { return status; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/Probe.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/Probe.java index 3237a2bd499..341a0c8f46d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/Probe.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/monitor/probe/Probe.java @@ -18,7 +18,6 @@ package org.apache.hadoop.yarn.service.monitor.probe; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; import java.io.IOException; @@ -29,18 +28,18 @@ */ public abstract class Probe implements MonitorKeys { - protected final Configuration conf; private String name; + protected Probe() { + } + /** * Create a probe of a specific name * * @param name probe name - * @param conf configuration being stored. */ - public Probe(String name, Configuration conf) { + public Probe(String name) { this.name = name; - this.conf = conf; } @@ -82,6 +81,15 @@ public static int getPropertyInt(Map props, String name, return Integer.parseInt(value); } + public static boolean getPropertyBool(Map props, String name, + boolean defaultValue) { + String value = props.get(name); + if (StringUtils.isEmpty(value)) { + return defaultValue; + } + return Boolean.parseBoolean(value); + } + /** * perform any prelaunch initialization */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java index 6ac8de1e6b6..c3e2619245c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.yarn.service.provider.docker; -import org.apache.hadoop.registry.client.api.RegistryConstants; -import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; import org.apache.hadoop.yarn.service.provider.AbstractProviderService; import org.apache.hadoop.yarn.service.api.records.Service; @@ -26,7 +24,6 @@ import org.apache.hadoop.yarn.service.containerlaunch.AbstractLauncher; import java.io.IOException; -import java.text.MessageFormat; public class DockerProviderService extends AbstractProviderService implements DockerKeys { @@ -38,19 +35,7 @@ public void processArtifact(AbstractLauncher launcher, launcher.setDockerImage(compInstance.getCompSpec().getArtifact().getId()); launcher.setDockerNetwork(compInstance.getCompSpec().getConfiguration() .getProperty(DOCKER_NETWORK)); - String domain = compInstance.getComponent().getScheduler().getConfig() - .get(RegistryConstants.KEY_DNS_DOMAIN); - String hostname; - if (domain == null || domain.isEmpty()) { - hostname = MessageFormat - .format("{0}.{1}.{2}", compInstance.getCompInstanceName(), - service.getName(), RegistryUtils.currentUser()); - } else { - hostname = MessageFormat - .format("{0}.{1}.{2}.{3}", compInstance.getCompInstanceName(), - service.getName(), RegistryUtils.currentUser(), domain); - } - launcher.setDockerHostname(hostname); + launcher.setDockerHostname(compInstance.getHostname()); launcher.setRunPrivilegedContainer( compInstance.getCompSpec().getRunPrivilegedContainer()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceRegistryUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceRegistryUtils.java index dfc30f75e4d..30ba503732d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceRegistryUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceRegistryUtils.java @@ -20,9 +20,23 @@ import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.yarn.service.conf.YarnServiceConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.naming.Context; +import javax.naming.NameNotFoundException; +import javax.naming.NamingException; +import javax.naming.directory.Attributes; +import javax.naming.directory.DirContext; +import javax.naming.directory.InitialDirContext; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Hashtable; public class ServiceRegistryUtils { + private static final Logger LOG = + LoggerFactory.getLogger(ServiceRegistryUtils.class); public static final String SVC_USERS = "/services/yarn/users"; @@ -53,4 +67,50 @@ public static String mkServiceHomePath(String username, String serviceName) { public static String mkUserHomePath(String username) { return SVC_USERS + "/" + username; } + + /** + * Determine whether a DNS lookup exists for a given name. If a DNS server + * address is provided, the lookup will be performed against this DNS + * server. This option is provided because it may be desirable to perform + * the lookup against Registry DNS directly to avoid caching of negative + * responses that may be performed by other DNS servers, thereby allowing the + * lookup to succeed sooner. + * + * @param addr host:port dns address, or null + * @param name name to look up + * @return true if a lookup succeeds for the specified name + */ + public static boolean registryDNSLookupExists(String addr, String + name) { + if (addr == null) { + try { + InetAddress.getByName(name); + return true; + } catch (UnknownHostException e) { + return false; + } + } + + String dnsURI = String.format("dns://%s", addr); + Hashtable env = new Hashtable<>(); + env.put(Context.INITIAL_CONTEXT_FACTORY, + "com.sun.jndi.dns.DnsContextFactory"); + env.put(Context.PROVIDER_URL, dnsURI); + + try { + DirContext ictx = new InitialDirContext(env); + Attributes attrs = ictx.getAttributes(name, new String[]{"A"}); + + if (attrs.size() > 0) { + return true; + } + } catch (NameNotFoundException e) { + // this doesn't need to be logged + } catch (NamingException e) { + LOG.error("Got exception when performing DNS lookup", e); + } + + return false; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/monitor/probe/TestDefaultProbe.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/monitor/probe/TestDefaultProbe.java new file mode 100644 index 00000000000..8169e67b7c3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/monitor/probe/TestDefaultProbe.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.service.monitor.probe; + +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.service.api.records.ReadinessCheck; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Tests for default probe. + */ +@RunWith(Parameterized.class) +public class TestDefaultProbe { + private final DefaultProbe probe; + + public TestDefaultProbe(Probe probe) { + this.probe = (DefaultProbe) probe; + } + + @Parameterized.Parameters + public static Collection data() { + // test run 1: Default probe checks that container has an IP + Probe p1 = MonitorUtils.getProbe(null); + + // test run 2: Default probe with DNS check for component instance hostname + ReadinessCheck rc2 = new ReadinessCheck() + .type(ReadinessCheck.TypeEnum.DEFAULT) + .properties(Collections.singletonMap( + MonitorKeys.DEFAULT_PROBE_DNS_CHECK_ENABLED, "true")); + Probe p2 = MonitorUtils.getProbe(rc2); + + // test run 3: Default probe with DNS check using specific DNS server + Map props = new HashMap<>(); + props.put(MonitorKeys.DEFAULT_PROBE_DNS_CHECK_ENABLED, "true"); + props.put(MonitorKeys.DEFAULT_PROBE_DNS_ADDRESS, "8.8.8.8"); + ReadinessCheck rc3 = new ReadinessCheck() + .type(ReadinessCheck.TypeEnum.DEFAULT).properties(props); + Probe p3 = MonitorUtils.getProbe(rc3); + + return Arrays.asList(new Object[][] {{p1}, {p2}, {p3}}); + } + + @Test + public void testDefaultProbe() { + // component instance has a good hostname, so probe will eventually succeed + // whether or not DNS checking is enabled + ComponentInstance componentInstance = + createMockComponentInstance("example.com"); + checkPingResults(probe, componentInstance, false); + + // component instance has a bad hostname, so probe will fail when DNS + // checking is enabled + componentInstance = createMockComponentInstance("bad.dns.test"); + checkPingResults(probe, componentInstance, probe.isDnsCheckEnabled()); + } + + private static void checkPingResults(Probe probe, ComponentInstance + componentInstance, boolean expectDNSCheckFailure) { + // on the first ping, null container status results in failure + ProbeStatus probeStatus = probe.ping(componentInstance); + assertFalse("Expected failure for " + probeStatus.toString(), + probeStatus.isSuccess()); + assertTrue("Expected IP failure for " + probeStatus.toString(), + probeStatus.toString().contains( + componentInstance.getCompInstanceName() + ": IP is not available yet")); + + // on the second ping, container status is retrieved but there are no + // IPs, resulting in failure + probeStatus = probe.ping(componentInstance); + assertFalse("Expected failure for " + probeStatus.toString(), + probeStatus.isSuccess()); + assertTrue("Expected IP failure for " + probeStatus.toString(), + probeStatus.toString().contains(componentInstance + .getCompInstanceName() + ": IP is not available yet")); + + // on the third ping, IPs are retrieved and success depends on whether or + // not a DNS lookup can be performed for the component instance hostname + probeStatus = probe.ping(componentInstance); + if (expectDNSCheckFailure) { + assertFalse("Expected failure for " + probeStatus.toString(), + probeStatus.isSuccess()); + assertTrue("Expected DNS failure for " + probeStatus.toString(), + probeStatus.toString().contains(componentInstance + .getCompInstanceName() + ": DNS checking is enabled, but lookup" + + " for " + componentInstance.getHostname() + " is not available " + + "yet")); + } else { + assertTrue("Expected success for " + probeStatus.toString(), + probeStatus.isSuccess()); + } + } + + private static ComponentInstance createMockComponentInstance(String + hostname) { + ComponentInstance componentInstance = mock(ComponentInstance.class); + when(componentInstance.getHostname()).thenReturn(hostname); + when(componentInstance.getCompInstanceName()).thenReturn("comp-0"); + when(componentInstance.getContainerStatus()) + .thenAnswer(new Answer() { + private int count = 0; + + @Override + public ContainerStatus answer(InvocationOnMock invocationOnMock) { + count++; + if (count == 1) { + // first call to getContainerStatus returns null + return null; + } else if (count == 2) { + // second call returns a ContainerStatus with no IPs + ContainerStatus containerStatus = mock(ContainerStatus.class); + when(containerStatus.getIPs()).thenReturn(null); + return containerStatus; + } else { + // third call returns a ContainerStatus with one IP + ContainerStatus containerStatus = mock(ContainerStatus.class); + when(containerStatus.getIPs()) + .thenReturn(Collections.singletonList("1.2.3.4")); + return containerStatus; + } + } + }); + return componentInstance; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md index 7ec2ecb879d..8cd7910d7f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md @@ -109,28 +109,31 @@ Above config make the service AM to be retried at max 10 times. | Name | Description | | ------------ | ------------- | -|yarn.service.client-am.retry.max-wait-ms | the max retry time in milliseconds for the service client to talk to the service AM. By default, it is set to 0, which means no retry | -|yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds | -|yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever. -|yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds | -|yarn.service.container-failure.validity-interval-ms | the failure validity interval in milliseconds which when set to a value greater than 0, will not take the failures that happened outside of this interval into failure count. By default, it is set to -1, which means that all the failures so far will be included in failure count. | -|yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM -|yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024 -|yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue -|yarn.service.base.path | the root location for the service artifacts on hdfs for a user. By default, it is under ${user_home_dir}/.yarn/ -|yarn.service.container-failure-per-component.threshold | the max number of container failures for a given component before the AM exits. -|yarn.service.node-blacklist.threshold | Maximum number of container failures on a node before the node is blacklisted by the AM -|yarn.service.failure-count-reset.window | The interval in seconds when the `yarn.service.container-failure-per-component.threshold` and `yarn.service.node-blacklist.threshold` gets reset. By default, it is 21600, i.e. 6 hours -|yarn.service.readiness-check-interval.seconds | The interval in seconds between readiness checks. By default, it is 30 seconds -|yarn.service.log.include-pattern| The regex expression for including log files whose file name matches it when aggregating the logs after the application completes. -|yarn.service.log.exclude-pattern| The regex expression for excluding log files whose file name matches it when aggregating the logs after the application completes. If the log file name matches both include and exclude pattern, this file will be excluded. -|yarn.service.rolling-log.include-pattern| The regex expression for including log files whose file name matches it when aggregating the logs while app is running. -|yarn.service.rolling-log.exclude-pattern| The regex expression for excluding log files whose file name matches it when aggregating the logs while app is running. If the log file name matches both include and exclude pattern, this file will be excluded. -|yarn.service.container-recovery.timeout.ms| The timeout in milliseconds after which the service AM releases all the containers of previous attempt which are not yet recovered by the RM. By default, it is set to 120000, i.e. 2 minutes. +|yarn.service.client-am.retry.max-wait-ms | Max retry time in milliseconds for the service client to talk to the service AM (default 0, which means no retry).| +|yarn.service.client-am.retry-interval-ms | Retry interval in milliseconds for the service client to talk to the service AM (default 2000, i.e. 2 seconds).| +|yarn.service.container-failure.retry.max | Max number of retries for the container to be auto restarted if it fails (default -1, which means forever).| +|yarn.service.container-failure.retry-interval-ms | Retry interval in milliseconds for the container to be restarted (default 30000, i.e. 30 seconds).| +|yarn.service.container-failure.validity-interval-ms | Failure validity interval in milliseconds. When set to a value greater than 0, the container retry policy will not take the failures that happened outside of this interval into the failure count (default -1, which means that all the failures so far will be included in the failure count).| +|yarn.service.am-restart.max-attempts | Max number of times to start the service AM, after which the service will be killed (default 20).| +|yarn.service.am-resource.memory | Memory size in GB for the service AM (default 1024).| +|yarn.service.am.java.opts | Additional JVM options for the service AM.| +|yarn.service.queue | Queue to which the service will be submitted (default submits to the `default` queue).| +|yarn.service.base.path | HDFS parent directory where service artifacts will be stored (default ${user_home_dir}/.yarn/). +|yarn.service.framework.path | HDFS parent directory where the service AM dependency tarball can be found.| +|yarn.service.container-failure-per-component.threshold | Max number of container failures for a given component before the AM stops the service (default 10).| +|yarn.service.node-blacklist.threshold | Maximum number of container failures on a node before the node is blacklisted by the AM (default 3).| +|yarn.service.failure-count-reset.window | Interval in seconds after which the container failure counts that will be evaluated for the `yarn.service.container-failure-per-component.threshold` and `yarn.service.node-blacklist.threshold` are reset (default 21600, i.e. 6 hours).| +|yarn.service.readiness-check-interval.seconds | Interval in seconds between readiness checks (default 30 seconds).| +|yarn.service.default-readiness-check.enabled | Whether or not the default readiness check is enabled (default true).| +|yarn.service.log.include-pattern | Regex expression for including log files by name when aggregating the logs after the application completes (default includes all files).| +|yarn.service.log.exclude-pattern | Regex expression for excluding log files by name when aggregating the logs after the application completes. If the log file name matches both include and exclude pattern, this file will be excluded (default does not exclude any files).| +|yarn.service.rolling-log.include-pattern | Regex expression for including log files by name when aggregating the logs while app is running.| +|yarn.service.rolling-log.exclude-pattern | Regex expression for excluding log files by name when aggregating the logs while app is running. If the log file name matches both include and exclude pattern, this file will be excluded.| +|yarn.service.container-recovery.timeout.ms | Timeout in milliseconds after which a newly started service AM releases all the containers of previous AM attempts which are not yet recovered from the RM (default 120000, i.e. 2 minutes).| ## Constant variables for custom service The service framework provides some constant variables for user to configure their services. These variables are either dynamically generated by the system or are static ones such as service name defined by the user. -User can use these constants in their configurations to be dynamically substituted by the service AM.E.g. +User can use these constants in their configurations to be dynamically substituted by the service AM. E.g. ``` { "type" : "HADOOP_XML", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md index 429c8c1a0dd..68bf976cf81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md @@ -349,11 +349,13 @@ The type of placement - affinity/anti-affinity/affinity-with-cardinality with co ### ReadinessCheck -A custom command or a pluggable helper container to determine the readiness of a container of a component. Readiness for every service is different. Hence the need for a simple interface, with scope to support advanced usecases. +A check to be performed to determine the readiness of a component instance (a container). +If no readiness check is specified, the default readiness check will be used unless the yarn.service.default-readiness-check.enabled configuration property is set to false at the component or global level. +The artifact field is currently unsupported but may be implemented in the future, enabling a pluggable helper container to support advanced use cases. |Name|Description|Required|Schema|Default| |----|----|----|----|----| -|type|E.g. HTTP (YARN will perform a simple REST call at a regular interval and expect a 204 No content).|true|enum (HTTP, PORT)|| +|type|DEFAULT (AM checks whether the container has an IP and optionally performs a DNS lookup for the container hostname), HTTP (AM performs default checks, plus sends a REST call to the container and expects a response code between 200 and 299), or PORT (AM performs default checks, plus attempts to open a socket connection to the container on a specified port).|true|enum (DEFAULT, HTTP, PORT)|| |properties|A blob of key value pairs that will be used to configure the check.|false|object|| |artifact|Artifact of the pluggable readiness check helper container (optional). If specified, this helper container typically hosts the http uri and encapsulates the complex scripts required to perform actual container readiness check. At the end it is expected to respond a 204 No content just like the simplified use case. This pluggable framework benefits service owners who can run services without any packaging modifications. Note, artifacts of type docker only is supported for now. NOT IMPLEMENTED YET|false|Artifact||