diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index 022287a..41b89e1 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -45,6 +45,7 @@ function hadoop_usage hadoop_add_subcommand "nodemanager" daemon "run a nodemanager on each worker" hadoop_add_subcommand "proxyserver" daemon "run the web app proxy server" hadoop_add_subcommand "queue" client "prints queue information" + hadoop_add_subcommand "registrydns" daemon "run the registry DNS server" hadoop_add_subcommand "resourcemanager" daemon "run the ResourceManager" hadoop_add_subcommand "rmadmin" admin "admin tools" hadoop_add_subcommand "router" daemon "run the Router daemon" @@ -141,6 +142,11 @@ ${HADOOP_COMMON_HOME}/${HADOOP_COMMON_LIB_JARS_DIR}" queue) HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.QueueCLI ;; + registrydns) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_SECURE_CLASSNAME='org.apache.hadoop.registry.server.dns.PrivilegedRegistryDNSStarter' + HADOOP_CLASSNAME='org.apache.hadoop.registry.server.dns.RegistryDNSServer' + ;; resourcemanager) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' diff --git a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh index 90a87bf..4bd1d3e 100644 --- a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh +++ b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh @@ -160,3 +160,15 @@ # See ResourceManager for some examples # #export YARN_APISERVER_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-apiserver.log-$(date +'%Y%m%d%H%M')" + +### +# Registry DNS specific parameters +### +# For privileged registry DNS, user to run as after dropping privileges +# This will replace the hadoop.id.str Java property in secure mode. +# export YARN_REGISTRYDNS_SECURE_USER=yarn + +# Supplemental options for privileged registry DNS +# By default, Hadoop uses jsvc which needs to know to launch a +# server jvm. +# export YARN_REGISTRYDNS_SECURE_EXTRA_OPTS="-jvm server" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml index e083312..4e805cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml @@ -34,6 +34,21 @@ + org.slf4j + slf4j-api + + + + org.apache.hadoop + hadoop-auth + + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop hadoop-yarn-api @@ -70,14 +85,68 @@ + org.apache.zookeeper + zookeeper + + + + org.apache.curator + curator-client + + + org.apache.curator curator-framework org.apache.curator - curator-test - test + curator-recipes + + + + commons-cli + commons-cli + + + + commons-daemon + commons-daemon + + + + commons-io + commons-io + + + + commons-lang + commons-lang + + + + commons-net + commons-net + + + + com.fasterxml.jackson.core + jackson-annotations + + + + com.fasterxml.jackson.core + jackson-core + + + + com.fasterxml.jackson.core + jackson-databind + + + + com.google.guava + guava diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/PrivilegedRegistryDNSStarter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/PrivilegedRegistryDNSStarter.java new file mode 100644 index 0000000..dd4e1b8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/PrivilegedRegistryDNSStarter.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.registry.server.dns; + +import org.apache.commons.daemon.Daemon; +import org.apache.commons.daemon.DaemonContext; +import org.apache.hadoop.registry.client.api.DNSOperationsFactory; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.registry.client.api.RegistryConstants.DEFAULT_DNS_PORT; +import static org.apache.hadoop.registry.client.api.RegistryConstants.KEY_DNS_PORT; + +/** + * This class is used to allow the RegistryDNSServer to run on a privileged + * port (e.g. 53). + */ +public class PrivilegedRegistryDNSStarter implements Daemon { + private static final Logger LOG = + LoggerFactory.getLogger(PrivilegedRegistryDNSStarter.class); + + private YarnConfiguration conf; + private RegistryDNS registryDNS; + private RegistryDNSServer registryDNSServer; + + @Override + public void init(DaemonContext context) throws Exception { + String[] args = context.getArguments(); + StringUtils.startupShutdownMessage(RegistryDNSServer.class, args, LOG); + conf = new YarnConfiguration(); + new GenericOptionsParser(conf, args); + + int port = conf.getInt(KEY_DNS_PORT, DEFAULT_DNS_PORT); + if (port < 1 || port > 1023) { + throw new RuntimeException("Must start privileged registry DNS server " + + "with '" + KEY_DNS_PORT + "' configured to a privileged port."); + } + + try { + registryDNS = (RegistryDNS) DNSOperationsFactory.createInstance(conf); + registryDNS.initializeChannels(conf); + } catch (Exception e) { + LOG.error("Error initializing Registry DNS", e); + throw e; + } + } + + @Override + public void start() throws Exception { + registryDNSServer = RegistryDNSServer.launchDNSServer(conf, registryDNS); + } + + @Override + public void stop() throws Exception { + } + + @Override + public void destroy() { + registryDNSServer.stop(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java index 9ffc9db..2bd6d71 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java @@ -130,6 +130,8 @@ private ConcurrentMap zones = new ConcurrentHashMap<>(); private Name bindHost; + private boolean channelsInitialized = false; + /** * Construct the service. * @@ -150,6 +152,24 @@ public Thread newThread(Runnable r) { }); } + public void initializeChannels(Configuration conf) throws Exception { + if (channelsInitialized) { + return; + } + channelsInitialized = true; + int port = conf.getInt(KEY_DNS_PORT, DEFAULT_DNS_PORT); + InetAddress addr = InetAddress.getLocalHost(); + + String bindAddress = conf.get(KEY_DNS_BIND_ADDRESS); + if (bindAddress != null) { + addr = InetAddress.getByName(bindAddress); + } + + LOG.info("Opening TCP and UDP channels on {} port {}", addr, port); + addNIOUDP(addr, port); + addNIOTCP(addr, port); + } + /** * Initializes the registry. * @@ -164,17 +184,9 @@ protected void serviceInit(Configuration conf) throws Exception { try { setDomainName(conf); - int port = initializeZones(conf); - - InetAddress addr = InetAddress.getLocalHost(); - - String bindAddress = conf.get(KEY_DNS_BIND_ADDRESS); - if (bindAddress != null) { - addr = InetAddress.getByName(bindAddress); - } - addNIOUDP(addr, port); - addNIOTCP(addr, port); + initializeZones(conf); + initializeChannels(conf); } catch (IOException e) { LOG.error("Error initializing Registry DNS Server", e); throw e; @@ -189,8 +201,7 @@ protected void serviceInit(Configuration conf) throws Exception { * @return the listener port * @throws IOException */ - int initializeZones(Configuration conf) throws IOException { - int port = conf.getInt(KEY_DNS_PORT, DEFAULT_DNS_PORT); + void initializeZones(Configuration conf) throws IOException { ttl = conf.getTimeDuration(KEY_DNS_TTL, 1L, TimeUnit.SECONDS); RecordCreatorFactory.setTtl(ttl); @@ -202,8 +213,6 @@ int initializeZones(Configuration conf) throws IOException { zones.put(registryZone.getOrigin(), registryZone); initializeReverseLookupZone(conf); - - return port; } /** @@ -1412,7 +1421,7 @@ private void op(String path, ServiceRecord record, RegistryCommand command) } processor.manageDNSRecords(command); } else { - LOG.warn("Yarn Resgistry record {} does not contain {} attribute ", + LOG.warn("Yarn Registry record {} does not contain {} attribute ", record.toString(), YarnRegistryAttributes.YARN_PERSISTENCE); } } catch (Exception e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java index faa5fe1..a09faa8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java @@ -17,11 +17,6 @@ package org.apache.hadoop.registry.server.dns; import com.google.common.base.Preconditions; -import org.apache.commons.cli.BasicParser; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.PathNotFoundException; import org.apache.hadoop.registry.client.api.DNSOperationsFactory; @@ -65,9 +60,11 @@ /** * Creates the DNS server. * @param name the server name. + * @param registryDNS the registry DNS instance. */ - public RegistryDNSServer(String name) { + public RegistryDNSServer(String name, final RegistryDNS registryDNS) { super(name); + this.registryDNS = registryDNS; } /** @@ -83,8 +80,9 @@ protected void serviceInit(Configuration conf) throws Exception { registryOperations = new RegistryOperationsService("RegistryDNSOperations"); addService(registryOperations); - // probably need to populate with existing apps? - registryDNS = (RegistryDNS) DNSOperationsFactory.createInstance(conf); + if (registryDNS == null) { + registryDNS = (RegistryDNS) DNSOperationsFactory.createInstance(conf); + } addService(registryDNS); super.serviceInit(conf); @@ -231,24 +229,21 @@ private void processServiceRecord(String path, ServiceRecord record, /** * Launch the server. - * @param args command line args. + * @param conf configuration + * @param rdns registry dns instance * @return */ - static RegistryDNSServer launchDNSServer(String[] args) { + static RegistryDNSServer launchDNSServer(Configuration conf, + RegistryDNS rdns) { RegistryDNSServer dnsServer = null; Thread .setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); - StringUtils.startupShutdownMessage(RegistryDNSServer.class, args, - LOG); try { - dnsServer = new RegistryDNSServer("RegistryDNSServer"); + dnsServer = new RegistryDNSServer("RegistryDNSServer", rdns); ShutdownHookManager.get().addShutdownHook( new CompositeService.CompositeServiceShutdownHook(dnsServer), SHUTDOWN_HOOK_PRIORITY); - YarnConfiguration conf = new YarnConfiguration(); - processCommandLine(args, conf); - new GenericOptionsParser(conf, args); dnsServer.init(conf); dnsServer.start(); } catch (Throwable t) { @@ -259,32 +254,14 @@ static RegistryDNSServer launchDNSServer(String[] args) { } /** - * Process input command line arguments. - * @param args the command line argument array. - * @param conf the configuration. - */ - private static void processCommandLine(String[] args, - YarnConfiguration conf) { - Options options = new Options(); - options.addOption("p", "port", true, - "the server listening port (override)"); - - CommandLineParser parser = new BasicParser(); - try { - CommandLine cmd = parser.parse(options, args); - if (cmd.hasOption("p")) { - conf.set(RegistryConstants.KEY_DNS_PORT, cmd.getOptionValue("p")); - } - } catch (ParseException e) { - LOG.error("Error parsing the command line options", e); - } - } - - /** * Lanches the server instance. * @param args the command line args. + * @throws IOException */ - public static void main(String[] args) { - launchDNSServer(args); + public static void main(String[] args) throws IOException { + StringUtils.startupShutdownMessage(RegistryDNSServer.class, args, LOG); + YarnConfiguration conf = new YarnConfiguration(); + new GenericOptionsParser(conf, args); + launchDNSServer(conf, null); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnCommands.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnCommands.md index 8968f13..5f0886e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnCommands.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnCommands.md @@ -311,9 +311,17 @@ Usage: `yarn timelineserver` Start the TimeLineServer ### apiserver + Usage: `yarn apiserver` + Start the API-server for deploying/managing services on YARN +### registrydns + +Usage: `yarn registrydns` + +Start the RegistryDNS server + Files ----- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceDiscovery.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceDiscovery.md index 6318a07..55c68f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceDiscovery.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceDiscovery.md @@ -40,7 +40,8 @@ The following core functions are supported by the DNS-Server: 1. Supports creation of DNS records for end-points of the deployed YARN applications 2. Record names remain unchanged during restart of containers and/or applications -3. Supports reverse lookups (name based on IP). Note, this works only for Docker containers. +3. Supports reverse lookups (name based on IP). Note, this works only for +Docker containers because other containers share the IP of the host 4. Supports security using the standards defined by The Domain Name System Security Extensions (DNSSEC) 5. Highly available @@ -52,26 +53,34 @@ servers). 1. Supports integration with existing DNS assets (e.g. a corporate DNS server) by acting as a DNS server for a Hadoop cluster zone/domain. The server is not intended to act as a -primary DNS server and does not forward requests to other servers. +primary DNS server and does not forward requests to other servers. Rather, a +primary DNS server can be configured to forward a zone to the registry DNS +server. 2. The DNS Server exposes a port that can receive both TCP and UDP requests per -DNS standards. The default port for DNS protocols is in a restricted, administrative port -range (5353), so the port is configurable for deployments in which the service may -not be managed via an administrative account. +DNS standards. The default port for DNS protocols is not in the restricted +range (5353). However, existing DNS assets may only allow zone forwarding to +non-custom ports. To support this, the registry DNS server can be started in +privileged mode. ## DNS Record Name Structure -The DNS names of generated records are composed from the following elements (labels). Note that these elements must be compatible with DNS conventions (see “Preferred Name Syntax” in RFC 1035): +The DNS names of generated records are composed from the following elements +(labels). Note that these elements must be compatible with DNS conventions +(see “Preferred Name Syntax” in [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt)): * **domain** - the name of the cluster DNS domain. This name is provided as a configuration property. In addition, it is this name that is configured at a parent DNS -server as the zone name for the defined yDNS zone (the zone for which the parent DNS -server will forward requests to yDNS). E.g. yarncluster.com +server as the zone name for the defined registry DNS zone (the zone for which +the parent DNS server will forward requests to registry DNS). E.g. yarncluster.com * **username** - the name of the application deployer. This name is the simple short-name (for e.g. the primary component of the Kerberos principal) associated with the user launching the application. As the username is one of the elements of DNS names, it is expected -that this also confirms DNS name conventions (RFC 1035 linked above), so special translation is performed for names with special characters like hyphens and spaces. +that this also conforms to DNS name conventions (RFC 1035 linked above), so it +is converted to a valid DNS hostname entries using the punycode convention used +for internationalized DNS. * **application name** - the name of the deployed YARN application. This name is inferred -from the YARN registry path to the application's node. Application name, rather thn application id, was chosen as a way of making it easy for users to refer to human-readable DNS +from the YARN registry path to the application's node. Application name, +rather than application id, was chosen as a way of making it easy for users to refer to human-readable DNS names. This obviously mandates certain uniqueness properties on application names. * **container id** - the YARN assigned ID to a container (e.g. container_e3741_1454001598828_01_000004) @@ -79,7 +88,7 @@ container_e3741_1454001598828_01_000004) component). A component is a distributed element of an application or service that is launched in a YARN container (e.g. an HBase master). One can imagine multiple components within an application. A component name is not yet a first class concept in -YARN, but is a very useful one that we are introducing here for the sake of yDNS +YARN, but is a very useful one that we are introducing here for the sake of registry DNS entries. Many frameworks like MapReduce, Slider already have component names (though, as mentioned, they are not yet supported in YARN in a first class fashion). * **api** - the api designation for the exposed endpoint @@ -93,7 +102,7 @@ maps to the application user and so on. Wherever it is not easily distinguishabl “container” or suffix such as “api”. For example, an endpoint published as a management endpoint will be referenced with the name *management-api.griduser.yarncluster.com*. * Unique application name (per user) is not currently supported/guaranteed by YARN, but -it is supported by frameworks such as Apache Slider. The yDNS service currently +it is supported by frameworks such as Apache Slider. The registry DNS service currently leverages the last element of the ZK path entry for the application as an application name. These application names have to be unique for a given user. @@ -118,20 +127,27 @@ Similarly, record removal follows a similar sequence requiring similar parsing logic to identify the specific records that should be removed). ### DNS Service initialization -* The DNS service initializes both UDP and TCP listeners on a configured port. As -noted above, the default port of 5353 is in a restricted range that is only accessible to an -account with administrative privileges. +* The DNS service initializes both UDP and TCP listeners on a configured port. +If a port in the restricted range is desired (such as the standard DNS port +53), the DNS service can be launched using jsvc as described in the section +on starting the DNS server. * Subsequently, the DNS service listens for inbound DNS requests. Those requests are standard DNS requests from users or other DNS servers (for example, DNS servers that have the YARN DNS service configured as a forwarder). ## Start the DNS Server -By default, the DNS runs on non-privileged port `5353`. -If it is configured to use the standard privileged port `53`, the DNS server needs to be run as root: +By default, the DNS server runs on non-privileged port `5353`. Start the server +with: ``` -sudo su - -c "yarn org.apache.hadoop.registry.server.dns.RegistryDNSServer > /${HADOOP_LOG_FOLDER}/registryDNS.log 2>&1 &" root +yarn --daemon start registrydns ``` +If the DNS server is configured to use the standard privileged port `53`, the +environment variables YARN\_REGISTRYDNS\_SECURE\_USER and +YARN\_REGISTRYDNS\_SECURE\_EXTRA\_OPTS must be uncommented in the yarn-env.sh +file. The DNS server should then be launched as root and jsvc will be used to +reduce the privileges of the daemon after the port has been bound. + ## Configuration The YARN DNS server reads its configuration properties from the yarn-site.xml file. The following are the DNS associated configuration properties: @@ -140,7 +156,7 @@ The YARN DNS server reads its configuration properties from the yarn-site.xml fi | hadoop.registry.dns.enabled | The DNS functionality is enabled for the cluster. Default is false. | | hadoop.registry.dns.domain-name | The domain name for Hadoop cluster associated records. | | hadoop.registry.dns.bind-address | Address associated with the network interface to which the DNS listener should bind. | -| hadoop.registry.dns.bind-port | The port number for the DNS listener. The default port is 5353. However, since that port falls in a administrator-only range, typical deployments may need to specify an alternate port. | +| hadoop.registry.dns.bind-port | The port number for the DNS listener. The default port is 5353. | | hadoop.registry.dns.dnssec.enabled | Indicates whether the DNSSEC support is enabled. Default is false. | | hadoop.registry.dns.public-key | The base64 representation of the server’s public key. Leveraged for creating the DNSKEY Record provided for DNSSEC client requests. | | hadoop.registry.dns.private-key-file | The path to the standard DNSSEC private key file. Must only be readable by the DNS launching identity. See [dnssec-keygen](https://ftp.isc.org/isc/bind/cur/9.9/doc/arm/man.dnssec-keygen.html) documentation. |