commit ae025903ccf7d34abfe92b5ad4b5a4ba1dc52d60 Author: Wangda Tan Date: Sun Oct 8 18:15:51 2017 -0700 YARN-7224.002 Change-Id: Ie7e0fe5bcb07d6a58971107f8c75f72a020a2a84 diff --git a/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg b/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg index 023654b7dba..7a84d7648f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg +++ b/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg @@ -14,3 +14,4 @@ feature.tc.enabled=0 # docker.allowed.ro-mounts=## comma seperated volumes that can be mounted as read-only # docker.allowed.rw-mounts=## comma seperate volumes that can be mounted as read-write, add the yarn local and log dirs to this list to run Hadoop jobs # docker.privileged-containers.enabled=0 +# docker.allowed.volume-drivers=## comma seperated list of allowed volume-drivers diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 689b6f20072..0037ab97921 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1404,6 +1404,17 @@ public static boolean isAclEnabled(Configuration conf) { @Private public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = ""; + /** + * This setting controls end point of nvidia-docker-plugin + */ + @Private + public static final String NVIDIA_DOCKER_PLUGIN_ENDPOINT = + NM_GPU_RESOURCE_PREFIX + "nvidia-docker-plugin-endpoint"; + + @Private + public static final String DEFAULT_NVIDIA_DOCKER_PLUGIN_ENDPOINT = + "http://localhost:3476/v1.0/docker/cli"; + /** NM Webapp address.**/ public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 865d18df009..e8c46a20601 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -325,7 +324,7 @@ public void init(Context nmContext) throws IOException { if (linuxContainerRuntime == null) { LinuxContainerRuntime runtime = new DelegatingLinuxContainerRuntime(); - runtime.initialize(conf); + runtime.initialize(conf, nmContext); this.linuxContainerRuntime = runtime; } } catch (ContainerExecutionException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java index 7144bb2ae07..d6765d6974f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer; import java.util.ArrayList; @@ -96,33 +97,51 @@ public GpuResourceHandlerImpl(Context nmContext, // Create device cgroups for the container cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES, containerIdStr); - try { - // Execute c-e to setup GPU isolation before launch the container - PrivilegedOperation privilegedOperation = new PrivilegedOperation( - PrivilegedOperation.OperationType.GPU, Arrays - .asList(CONTAINER_ID_CLI_OPTION, containerIdStr)); - if (!allocation.getDeniedGPUs().isEmpty()) { - privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION, - StringUtils.join(",", allocation.getDeniedGPUs()))); + if (!DockerLinuxContainerRuntime.isDockerContainerRequested( + container.getLaunchContext().getEnvironment())) { + // Write to devices cgroup only for non-docker container. The reason is + // docker engine runtime runc do the devices cgroups initialize in the + // pre-hook, see: + // https://github.com/opencontainers/runc/blob/master/libcontainer/configs/device_defaults.go + // + // YARN by default runs docker container inside cgroup, if we setup cgroups + // devices.deny for the parent cgroup for launched container, we can see + // errors like: failed to write c *:* m to devices.allow: + // write path-to-parent-cgroup//devices.allow: + // operation not permitted. + // + // To avoid this happen, if docker is requested when container being + // launched, we will not setup devices.deny for the container. Instead YARN + // will pass --device parameter to docker engine. See GpuDockerCommandPlugin + try { + // Execute c-e to setup GPU isolation before launch the container + PrivilegedOperation privilegedOperation = new PrivilegedOperation( + PrivilegedOperation.OperationType.GPU, + Arrays.asList(CONTAINER_ID_CLI_OPTION, containerIdStr)); + if (!allocation.getDeniedGPUs().isEmpty()) { + privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION, + StringUtils.join(",", allocation.getDeniedGPUs()))); + } + + privilegedOperationExecutor.executePrivilegedOperation( + privilegedOperation, true); + } catch (PrivilegedOperationException e) { + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + LOG.warn("Could not update cgroup for container", e); + throw new ResourceHandlerException(e); } - privilegedOperationExecutor.executePrivilegedOperation( - privilegedOperation, true); - } catch (PrivilegedOperationException e) { - cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, - containerIdStr); - LOG.warn("Could not update cgroup for container", e); - throw new ResourceHandlerException(e); - } - - List ret = new ArrayList<>(); - ret.add(new PrivilegedOperation( - PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, - PrivilegedOperation.CGROUP_ARG_PREFIX - + cGroupsHandler.getPathForCGroupTasks( - CGroupsHandler.CGroupController.DEVICES, containerIdStr))); + List ret = new ArrayList<>(); + ret.add(new PrivilegedOperation( + PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, + PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupsHandler + .getPathForCGroupTasks(CGroupsHandler.CGroupController.DEVICES, + containerIdStr))); - return ret; + return ret; + } + return null; } @VisibleForTesting diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java index e9c58b83470..b50d56c0e01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; @@ -67,7 +68,7 @@ public DefaultLinuxContainerRuntime(PrivilegedOperationExecutor } @Override - public void initialize(Configuration conf) + public void initialize(Configuration conf, Context nmContext) throws ContainerExecutionException { this.conf = conf; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java index 517a4e2bad4..dd10617a81c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; @@ -57,7 +58,7 @@ EnumSet.noneOf(LinuxContainerRuntimeConstants.RuntimeType.class); @Override - public void initialize(Configuration conf) + public void initialize(Configuration conf, Context nmContext) throws ContainerExecutionException { String[] configuredRuntimes = conf.getTrimmedStrings( YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, @@ -77,19 +78,19 @@ public void initialize(Configuration conf) LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)) { javaSandboxLinuxContainerRuntime = new JavaSandboxLinuxContainerRuntime( PrivilegedOperationExecutor.getInstance(conf)); - javaSandboxLinuxContainerRuntime.initialize(conf); + javaSandboxLinuxContainerRuntime.initialize(conf, nmContext); } if (isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.DOCKER)) { dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime( PrivilegedOperationExecutor.getInstance(conf)); - dockerLinuxContainerRuntime.initialize(conf); + dockerLinuxContainerRuntime.initialize(conf, nmContext); } if (isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)) { defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime( PrivilegedOperationExecutor.getInstance(conf)); - defaultLinuxContainerRuntime.initialize(conf); + defaultLinuxContainerRuntime.initialize(conf, nmContext); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 20133062c4e..0a9e1f29ce9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -21,6 +21,9 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -172,6 +175,7 @@ "YARN_CONTAINER_RUNTIME_DOCKER_LOCAL_RESOURCE_MOUNTS"; private Configuration conf; + private Context nmContext; private DockerClient dockerClient; private PrivilegedOperationExecutor privilegedOperationExecutor; private Set allowedNetworks = new HashSet<>(); @@ -220,14 +224,14 @@ public DockerLinuxContainerRuntime(PrivilegedOperationExecutor * Create an instance using the given {@link PrivilegedOperationExecutor} * instance for performing operations and the given {@link CGroupsHandler} * instance. This constructor is intended for use in testing. - * - * @param privilegedOperationExecutor the {@link PrivilegedOperationExecutor} + * @param privilegedOperationExecutor the {@link PrivilegedOperationExecutor} * instance * @param cGroupsHandler the {@link CGroupsHandler} instance */ @VisibleForTesting - public DockerLinuxContainerRuntime(PrivilegedOperationExecutor - privilegedOperationExecutor, CGroupsHandler cGroupsHandler) { + public DockerLinuxContainerRuntime( + PrivilegedOperationExecutor privilegedOperationExecutor, + CGroupsHandler cGroupsHandler) { this.privilegedOperationExecutor = privilegedOperationExecutor; if (cGroupsHandler == null) { @@ -239,8 +243,9 @@ public DockerLinuxContainerRuntime(PrivilegedOperationExecutor } @Override - public void initialize(Configuration conf) + public void initialize(Configuration conf, Context nmContext) throws ContainerExecutionException { + this.nmContext = nmContext; this.conf = conf; dockerClient = new DockerClient(conf); allowedNetworks.clear(); @@ -623,6 +628,19 @@ public void launchContainer(ContainerRuntimeContext ctx) runCommand.groupAdd(groups); } + // use plugins to update docker run command. + if (nmContext != null + && nmContext.getResourcePluginManager().getNameToPlugins() != null) { + for (ResourcePlugin plugin : nmContext.getResourcePluginManager() + .getNameToPlugins().values()) { + DockerCommandPlugin dockerCommandPlugin = + plugin.getDockerCommandPluginInstance(); + if (dockerCommandPlugin != null) { + dockerCommandPlugin.updateDockerRunCommand(runCommand, container); + } + } + } + String commandFile = dockerClient.writeCommandToTempFile(runCommand, containerIdStr); PrivilegedOperation launchOp = buildLaunchOp(ctx, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/JavaSandboxLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/JavaSandboxLinuxContainerRuntime.java index cfafcdef56f..245b38faaf5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/JavaSandboxLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/JavaSandboxLinuxContainerRuntime.java @@ -25,6 +25,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.Groups; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; @@ -143,7 +144,7 @@ public JavaSandboxLinuxContainerRuntime( } @Override - public void initialize(Configuration conf) + public void initialize(Configuration conf, Context nmContext) throws ContainerExecutionException { this.configuration = conf; this.sandboxMode = @@ -151,7 +152,7 @@ public void initialize(Configuration conf) this.configuration.get(YARN_CONTAINER_SANDBOX, YarnConfiguration.DEFAULT_YARN_CONTAINER_SANDBOX)); - super.initialize(conf); + super.initialize(conf, nmContext); } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntime.java index cd7a2f3efee..e28e1cbb5be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntime.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntime; @@ -38,9 +39,10 @@ * Initialize the runtime. * * @param conf the {@link Configuration} to use + * @param nmContext * @throws ContainerExecutionException if an error occurs while initializing * the runtime */ - void initialize(Configuration conf) throws ContainerExecutionException; + void initialize(Configuration conf, Context nmContext) throws ContainerExecutionException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java index c7bf827f545..8734ba6b8de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java @@ -76,6 +76,11 @@ public DockerRunCommand addReadOnlyMountLocation(String sourcePath, String return this; } + public DockerRunCommand setVolumeDriver(String volumeDriver) { + super.addCommandArguments("volume-driver", volumeDriver); + return this; + } + public DockerRunCommand setCGroupParent(String parentPath) { super.addCommandArguments("cgroup-parent", parentPath); return this; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/DockerCommandPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/DockerCommandPlugin.java new file mode 100644 index 00000000000..7662a4cc9cf --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/DockerCommandPlugin.java @@ -0,0 +1,22 @@ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; + +/** + * Interface to make different resource plugins (e.g. GPU) can update docker run + * command without adding logic to Docker runtime. + */ +public interface DockerCommandPlugin { + /** + * Update docker run command + * @param dockerRunCommand docker run command + * @param container NM container + * @throws ContainerExecutionException if any issue occurs + */ + void updateDockerRunCommand(DockerRunCommand dockerRunCommand, + Container container) throws ContainerExecutionException; + + // Add support to other docker command when required. +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java index 6e134b34efb..99a18edf7dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java @@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime; /** * {@link ResourcePlugin} is an interface for node manager to easier support @@ -80,4 +81,14 @@ ResourceHandler createResourceHandler(Context nmContext, * @throws YarnException if any issue occurs */ void cleanup() throws YarnException; + + /** + * Plugin need to get {@link DockerCommandPlugin}. This will be invoked by + * {@link DockerLinuxContainerRuntime} when execute docker commands such as + * run/stop/pull, etc. + * + * @return DockerCommandPlugin instance. return null if plugin doesn't + * have requirement to update docker command. + */ + DockerCommandPlugin getDockerCommandPluginInstance(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPlugin.java new file mode 100644 index 00000000000..0b2f4063d16 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPlugin.java @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; + +import java.io.Serializable; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class GpuDockerCommandPlugin implements DockerCommandPlugin { + final static Log LOG = LogFactory + .getLog(GpuDockerCommandPlugin.class); + + private Configuration conf; + private Map> additionalCommands = null; + + // Known option + private String DEVICE_OPTION = "--device"; + private String VOLUME_DRIVER_OPTION = "--volume-driver"; + private String MOUNT_RO_OPTION = "--volume"; + + public GpuDockerCommandPlugin(Configuration conf) { + this.conf = conf; + } + + // Get value from key=value + // Throw exception if '=' not found + private String getValue(String input) throws IllegalArgumentException { + int index = input.indexOf('='); + if (index < 0) { + throw new IllegalArgumentException( + "Failed to locate '=' from input=" + input); + } + return input.substring(index + 1); + } + + private void addToCommand(String key, String value) { + if (additionalCommands == null) { + additionalCommands = new HashMap<>(); + } + if (!additionalCommands.containsKey(key)) { + additionalCommands.put(key, new HashSet<>()); + } + additionalCommands.get(key).add(value); + } + + private void init() throws ContainerExecutionException { + String endpoint = conf.get(YarnConfiguration.NVIDIA_DOCKER_PLUGIN_ENDPOINT, + YarnConfiguration.DEFAULT_NVIDIA_DOCKER_PLUGIN_ENDPOINT); + if (null == endpoint || endpoint.isEmpty()) { + LOG.info(YarnConfiguration.NVIDIA_DOCKER_PLUGIN_ENDPOINT + + " set to empty, skip init .."); + return; + } else { + String cliOptions; + try { + // Talk to plugin server and get options + URL url = new URL(endpoint); + URLConnection uc = url.openConnection(); + uc.setRequestProperty("X-Requested-With", "Curl"); + + StringWriter writer = new StringWriter(); + IOUtils.copy(uc.getInputStream(), writer, "utf-8"); + cliOptions = writer.toString(); + + LOG.info("Additional docker CLI options from plugin to run GPU " + + "containers:" + cliOptions); + + // Parse cli options + // Examples like: + // --device=/dev/nvidiactl --device=/dev/nvidia-uvm --device=/dev/nvidia0 + // --volume-driver=nvidia-docker + // --volume=nvidia_driver_352.68:/usr/local/nvidia:ro + + for (String str : cliOptions.split(" ")) { + str = str.trim(); + if (str.startsWith(DEVICE_OPTION)) { + addToCommand(DEVICE_OPTION, getValue(str)); + } else if (str.startsWith(VOLUME_DRIVER_OPTION)) { + addToCommand(VOLUME_DRIVER_OPTION, getValue(str)); + } else if (str.startsWith(MOUNT_RO_OPTION)) { + String mount = getValue(str); + if (!mount.endsWith(":ro")) { + throw new IllegalArgumentException( + "Should not have mount other than ro, command=" + str); + } + addToCommand(MOUNT_RO_OPTION, + mount.substring(0, mount.lastIndexOf(':'))); + } else { + throw new IllegalArgumentException("Unsupported option:" + str); + } + } + + // Verify options + for (Map.Entry> option : additionalCommands + .entrySet()) { + String key = option.getKey(); + Set values = option.getValue(); + if (key.equals(DEVICE_OPTION)) { + continue; + } else if (key.equals(MOUNT_RO_OPTION)) { + for (String value : values) { + int idx = value.indexOf(':'); + value.substring(0, idx); + value.substring(idx + 1); + continue; + } + } else if (key.equals(VOLUME_DRIVER_OPTION)) { + if (values.size() > 1) { + throw new ContainerExecutionException( + "Only support at most one " + key); + } + } else { + throw new ContainerExecutionException("Unsupported option:" + key); + } + } + } catch (Exception e) { + LOG.warn("Exception of " + this.getClass().getSimpleName() + " init:", + e); + throw new ContainerExecutionException(e); + } + } + } + + private int getGpuIndexFromDeviceName(String device) { + final String NVIDIA = "nvidia"; + int idx = device.lastIndexOf(NVIDIA); + if (idx < 0) { + return -1; + } + // Get last part + String str = device.substring(idx + NVIDIA.length()); + for (int i = 0; i < str.length(); i++) { + if (!Character.isDigit(str.charAt(i))) { + return -1; + } + } + return Integer.valueOf(str); + } + + @Override + public synchronized void updateDockerRunCommand( + DockerRunCommand dockerRunCommand, Container container) + throws ContainerExecutionException { + ResourceMappings resourceMappings = container.getResourceMappings(); + + // Copy of assigned Resources + Set assignedResources = null; + if (resourceMappings != null) { + assignedResources = new HashSet<>(); + for (Serializable s : resourceMappings.getAssignedResources( + ResourceInformation.GPU_URI)) { + assignedResources.add(Integer.parseInt((String) s)); + } + } + + if (assignedResources == null || assignedResources.isEmpty()) { + // When no GPU resource assigned, don't need to update docker command. + return; + } + + // Do lazy initialization of gpu-docker plugin + if (additionalCommands == null) { + init(); + } + + // Write to dockerRunCommand + for (Map.Entry> option : additionalCommands + .entrySet()) { + String key = option.getKey(); + Set values = option.getValue(); + if (key.equals(DEVICE_OPTION)) { + int foundGpuDevices = 0; + for (String value : values) { + Integer gpuIdx = getGpuIndexFromDeviceName(value); + if (gpuIdx >= 0) { + // When specified is a GPU card (device name like /dev/nvidia[n] + // Check if it is on allowed list + if (assignedResources.contains(gpuIdx)) { + foundGpuDevices++; + dockerRunCommand.addDevice(value, value); + } + } else { + // When gpuIdx < 0, it is a controller device (such as + // /dev/nvidiactl). In this case, add device directly. + dockerRunCommand.addDevice(value, value); + } + } + + // Cannot get all assigned Gpu devices from docker plugin output + if (foundGpuDevices < assignedResources.size()) { + // TODO: We can do better for this, instead directly compare device + // name, we should compare device's minor number with specified GPU + // minor number. + throw new ContainerExecutionException( + "Cannot get all assigned Gpu devices from docker plugin output"); + } + } else if (key.equals(MOUNT_RO_OPTION)) { + for (String value : values) { + int idx = value.indexOf(':'); + String source = value.substring(0, idx); + String target = value.substring(idx + 1); + dockerRunCommand.addReadOnlyMountLocation(source, target, true); + } + } else if (key.equals(VOLUME_DRIVER_OPTION)) { + for (String value : values) { + dockerRunCommand.setVolumeDriver(value); + } + } else { + throw new ContainerExecutionException("Unsupported option:" + key); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java index 9576ce7fec2..1fb35db67b7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -24,17 +24,20 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceHandlerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; public class GpuResourcePlugin implements ResourcePlugin { private ResourceHandler gpuResourceHandler = null; private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null; + private GpuDockerCommandPlugin dockerCommandPlugin = null; @Override public synchronized void initialize(Context context) throws YarnException { resourceDiscoverHandler = new GpuNodeResourceUpdateHandler(); GpuDiscoverer.getInstance().initialize(context.getConf()); + dockerCommandPlugin = new GpuDockerCommandPlugin(context.getConf()); } @Override @@ -58,4 +61,8 @@ public synchronized NodeResourceUpdaterPlugin getNodeResourceHandlerInstance() { public void cleanup() throws YarnException { // Do nothing. } + + public DockerCommandPlugin getDockerCommandPluginInstance() { + return dockerCommandPlugin; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c index 860320d907d..b76e45f22ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c @@ -206,6 +206,8 @@ const char *get_docker_error_message(const int error_code) { return "Mount access error"; case INVALID_DOCKER_DEVICE: return "Invalid docker device"; + case INVALID_DOCKER_VOLUME_DRIVER: + return "Invalid docker volume-driver"; default: return "Unknown error"; } @@ -532,6 +534,24 @@ static int set_hostname(const struct configuration *command_config, char *out, c return add_param_to_command(command_config, "hostname", "--hostname=", 1, out, outlen); } +static int set_volume_driver(const struct configuration *command_config, + const struct configuration *conf, + char *out, const size_t outlen) { + int ret = 0; + ret = add_param_to_command_if_allowed(command_config, conf, "volume-driver", + "docker.allowed.volume-drivers", + "--volume-driver=", + 0, 0, out, outlen); + if (ret != 0) { + fprintf(ERRORFILE, + "Could not find requested volume-driver in allowed volume-drivers\n"); + ret = INVALID_DOCKER_VOLUME_DRIVER; + memset(out, 0, outlen); + } + + return ret; +} + static int set_group_add(const struct configuration *command_config, char *out, const size_t outlen) { int i = 0, ret = 0; char **group_add = get_configuration_values_delimiter("group-add", DOCKER_COMMAND_FILE_SECTION, command_config, ","); @@ -929,6 +949,11 @@ int get_docker_run_command(const char *command_file, const struct configuration return ret; } + ret = set_volume_driver(&command_config, conf, out, outlen); + if (ret != 0) { + return ret; + } + ret = add_ro_mounts(&command_config, conf, out, outlen); if (ret != 0) { return ret; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h index 37ec88077c5..3f3b0fe07c6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h @@ -49,7 +49,8 @@ enum docker_error_codes { INVALID_DOCKER_RW_MOUNT, MOUNT_ACCESS_ERROR, INVALID_DOCKER_DEVICE, - INVALID_DOCKER_STOP_COMMAND + INVALID_DOCKER_STOP_COMMAND, + INVALID_DOCKER_VOLUME_DRIVER }; /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc index c627ca84e4f..cb6e42f641f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc @@ -1119,4 +1119,58 @@ namespace ContainerExecutor { } } + TEST_F(TestDockerUtil, test_set_volume_driver) { + struct configuration container_cfg; + const int buff_len = 1024; + char buff[buff_len]; + int ret = 0; + std::string container_executor_cfg_contents = "[docker]\n docker.allowed.volume-drivers=driver-1,driver-2"; + std::vector > file_cmd_vec; + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n volume-driver=driver-1", "--volume-driver='driver-1' ")); + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n volume-driver=driver-2", "--volume-driver='driver-2' ")); + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run", "")); + write_container_executor_cfg(container_executor_cfg_contents); + ret = read_config(container_executor_cfg_file.c_str(), &container_cfg); + + std::vector >::const_iterator itr; + if (ret != 0) { + FAIL(); + } + for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) { + struct configuration cmd_cfg; + memset(buff, 0, buff_len); + write_command_file(itr->first); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + ret = set_volume_driver(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(0, ret); + ASSERT_STREQ(itr->second.c_str(), buff); + } + struct configuration cmd_cfg_1; + write_command_file("[docker-command-execution]\n docker-command=run\n volume-driver=driver-3"); + ret = read_config(docker_command_file.c_str(), &cmd_cfg_1); + if (ret != 0) { + FAIL(); + } + strcpy(buff, "test string"); + ret = set_volume_driver(&cmd_cfg_1, &container_cfg, buff, buff_len); + ASSERT_EQ(INVALID_DOCKER_VOLUME_DRIVER, ret); + ASSERT_EQ(0, strlen(buff)); + + container_executor_cfg_contents = "[docker]\n"; + write_container_executor_cfg(container_executor_cfg_contents); + ret = read_config(container_executor_cfg_file.c_str(), &container_cfg); + if (ret != 0) { + FAIL(); + } + strcpy(buff, "test string"); + ret = set_volume_driver(&cmd_cfg_1, &container_cfg, buff, buff_len); + ASSERT_EQ(INVALID_DOCKER_VOLUME_DRIVER, ret); + ASSERT_EQ(0, strlen(buff)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 3dfa625ecd3..e1d9c69444d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -158,7 +158,7 @@ public void setup() throws IOException, ContainerExecutionException { mockPrivilegedExec); dirsHandler = new LocalDirsHandlerService(); dirsHandler.init(conf); - linuxContainerRuntime.initialize(conf); + linuxContainerRuntime.initialize(conf, null); mockExec = new LinuxContainerExecutor(linuxContainerRuntime); mockExec.setConf(conf); mockExecMockRuntime = new LinuxContainerExecutor(mockLinuxContainerRuntime); @@ -315,7 +315,7 @@ public void testContainerLaunchError() DefaultLinuxContainerRuntime(PrivilegedOperationExecutor.getInstance( conf)); - linuxContainerRuntime.initialize(conf); + linuxContainerRuntime.initialize(conf, null); exec = new LinuxContainerExecutor(linuxContainerRuntime); mockExec = spy(exec); @@ -545,7 +545,7 @@ public void testNoExitCodeFromPrivilegedOperation() throws Exception { any(File.class), any(Map.class), anyBoolean(), anyBoolean()); LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime( spyPrivilegedExecutor); - runtime.initialize(conf); + runtime.initialize(conf, null); mockExec = new LinuxContainerExecutor(runtime); mockExec.setConf(conf); LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java index 7f4bbc4d375..907b1222ed6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java @@ -50,7 +50,7 @@ public void testIsRuntimeAllowedDefault() throws Exception { YarnConfiguration.DEFAULT_LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES[0]); System.out.println(conf.get( YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES)); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( @@ -63,7 +63,7 @@ public void testIsRuntimeAllowedDefault() throws Exception { public void testIsRuntimeAllowedDocker() throws Exception { conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, "docker"); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( @@ -76,7 +76,7 @@ public void testIsRuntimeAllowedDocker() throws Exception { public void testIsRuntimeAllowedJavaSandbox() throws Exception { conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, "javasandbox"); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)); assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( @@ -89,7 +89,7 @@ public void testIsRuntimeAllowedJavaSandbox() throws Exception { public void testIsRuntimeAllowedMultiple() throws Exception { conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, "docker,javasandbox"); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( @@ -102,7 +102,7 @@ public void testIsRuntimeAllowedMultiple() throws Exception { public void testIsRuntimeAllowedAll() throws Exception { conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, "default,docker,javasandbox"); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( @@ -116,7 +116,7 @@ public void testJavaSandboxNotAllowedButPermissive() throws Exception { conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, "default,docker"); conf.set(YarnConfiguration.YARN_CONTAINER_SANDBOX, "permissive"); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); ContainerRuntime runtime = delegatingLinuxContainerRuntime.pickContainerRuntime(env); assertTrue(runtime instanceof DefaultLinuxContainerRuntime); @@ -129,7 +129,7 @@ public void testJavaSandboxNotAllowedButPermissiveDockerRequested() conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, "default,docker"); conf.set(YarnConfiguration.YARN_CONTAINER_SANDBOX, "permissive"); - delegatingLinuxContainerRuntime.initialize(conf); + delegatingLinuxContainerRuntime.initialize(conf, null); ContainerRuntime runtime = delegatingLinuxContainerRuntime.pickContainerRuntime(env); assertTrue(runtime instanceof DockerLinuxContainerRuntime); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index fbfee545f5d..f399ee9bb79 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -29,6 +29,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; @@ -36,6 +37,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; @@ -288,7 +292,7 @@ public void testDockerContainerLaunch() IOException { DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); runtime.launchContainer(builder.build()); PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs(); @@ -343,7 +347,7 @@ public void testContainerLaunchWithUserRemapping() DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); runtime.launchContainer(builder.build()); PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs(); @@ -425,7 +429,7 @@ public void testAllowedNetworksConfiguration() throws DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); //invalid default network configuration - sdn2 is included in allowed // networks @@ -441,7 +445,7 @@ public void testAllowedNetworksConfiguration() throws try { runtime = new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); Assert.fail("Invalid default network configuration should did not " + "trigger initialization failure."); } catch (ContainerExecutionException e) { @@ -457,7 +461,7 @@ public void testAllowedNetworksConfiguration() throws validDefaultNetwork); runtime = new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); } @Test @@ -467,7 +471,7 @@ public void testContainerLaunchWithNetworkingDefaults() PrivilegedOperationException { DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); Random randEngine = new Random(); String disallowedNetwork = "sdn" + Integer.toString(randEngine.nextInt()); @@ -557,7 +561,7 @@ public void testContainerLaunchWithCustomNetworks() customNetwork1); //this should cause no failures. - runtime.initialize(conf); + runtime.initialize(conf, null); runtime.launchContainer(builder.build()); PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs(); List args = op.getArguments(); @@ -661,7 +665,7 @@ public void testLaunchPrivilegedContainersInvalidEnvVar() IOException{ DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put(DockerLinuxContainerRuntime .ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "invalid-value"); @@ -690,7 +694,7 @@ public void testLaunchPrivilegedContainersWithDisabledSetting() IOException{ DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put(DockerLinuxContainerRuntime .ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true"); @@ -713,7 +717,7 @@ public void testLaunchPrivilegedContainersWithEnabledSettingAndDefaultACL() DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put(DockerLinuxContainerRuntime .ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true"); @@ -743,7 +747,7 @@ public void testLaunchPrivilegedContainersWithEnabledSettingAndDefaultACL() DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put(DockerLinuxContainerRuntime .ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true"); @@ -770,7 +774,7 @@ public void testLaunchPrivilegedContainersWithEnabledSettingAndDefaultACL() DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put(DockerLinuxContainerRuntime .ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true"); @@ -822,7 +826,7 @@ public void testCGroupParent() throws ContainerExecutionException { DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime (mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); String resourceOptionsNone = "cgroups=none"; DockerRunCommand command = Mockito.mock(DockerRunCommand.class); @@ -849,7 +853,7 @@ public void testCGroupParent() throws ContainerExecutionException { runtime = new DockerLinuxContainerRuntime (mockExecutor, null); - runtime.initialize(conf); + runtime.initialize(conf, null); runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr, command); @@ -866,7 +870,7 @@ public void testMountSourceOnly() IOException{ DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put( DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS, @@ -886,7 +890,7 @@ public void testMountSourceTarget() IOException{ DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put( DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS, @@ -935,7 +939,7 @@ public void testMountInvalid() IOException{ DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put( DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS, @@ -955,7 +959,7 @@ public void testMountMultiple() IOException{ DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( mockExecutor, mockCGroupsHandler); - runtime.initialize(conf); + runtime.initialize(conf, null); env.put( DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS, @@ -1011,7 +1015,7 @@ public void testContainerLivelinessCheck() .setExecutionAttribute(USER, user) .setExecutionAttribute(PID, signalPid) .setExecutionAttribute(SIGNAL, ContainerExecutor.Signal.NULL); - runtime.initialize(enableMockContainerExecutor(conf)); + runtime.initialize(enableMockContainerExecutor(conf), null); runtime.signalContainer(builder.build()); PrivilegedOperation op = capturePrivilegedOperation(); @@ -1071,7 +1075,7 @@ public void testDockerStopOnQuitSignal() .setExecutionAttribute(USER, user) .setExecutionAttribute(PID, signalPid) .setExecutionAttribute(SIGNAL, signal); - runtime.initialize(enableMockContainerExecutor(conf)); + runtime.initialize(enableMockContainerExecutor(conf), null); runtime.signalContainer(builder.build()); PrivilegedOperation op = capturePrivilegedOperation(); @@ -1148,4 +1152,81 @@ public void testDockerHostnamePattern() throws Exception { } } } + + @Test + public void testDockerCommandPlugin() throws Exception { + DockerLinuxContainerRuntime runtime = + new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler); + + Context nmContext = mock(Context.class); + ResourcePluginManager rpm = mock(ResourcePluginManager.class); + Map pluginsMap = new HashMap<>(); + ResourcePlugin plugin1 = mock(ResourcePlugin.class); + + // Create the docker command plugin logic, which will set volume driver + DockerCommandPlugin dockerCommandPlugin = new DockerCommandPlugin() { + @Override + public void updateDockerRunCommand(DockerRunCommand dockerRunCommand, + Container container) throws ContainerExecutionException { + dockerRunCommand.setVolumeDriver("driver-1"); + dockerRunCommand.addReadOnlyMountLocation("/source/path", + "/destination/path", true); + } + }; + + when(plugin1.getDockerCommandPluginInstance()).thenReturn( + dockerCommandPlugin); + ResourcePlugin plugin2 = mock(ResourcePlugin.class); + pluginsMap.put("plugin1", plugin1); + pluginsMap.put("plugin2", plugin2); + + when(rpm.getNameToPlugins()).thenReturn(pluginsMap); + + when(nmContext.getResourcePluginManager()).thenReturn(rpm); + + runtime.initialize(conf, nmContext); + + runtime.launchContainer(builder.build()); + PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs(); + List args = op.getArguments(); + String dockerCommandFile = args.get(11); + + List dockerCommands = Files.readAllLines(Paths.get + (dockerCommandFile), Charset.forName("UTF-8")); + + int expected = 15; + int counter = 0; + Assert.assertEquals(expected, dockerCommands.size()); + Assert.assertEquals("[docker-command-execution]", + dockerCommands.get(counter++)); + Assert.assertEquals(" cap-add=SYS_CHROOT,NET_BIND_SERVICE", + dockerCommands.get(counter++)); + Assert.assertEquals(" cap-drop=ALL", dockerCommands.get(counter++)); + Assert.assertEquals(" detach=true", dockerCommands.get(counter++)); + Assert.assertEquals(" docker-command=run", dockerCommands.get(counter++)); + Assert.assertEquals(" hostname=ctr-id", dockerCommands.get(counter++)); + Assert + .assertEquals(" image=busybox:latest", dockerCommands.get(counter++)); + Assert.assertEquals( + " launch-command=bash,/test_container_work_dir/launch_container.sh", + dockerCommands.get(counter++)); + Assert.assertEquals(" name=container_id", dockerCommands.get(counter++)); + Assert.assertEquals(" net=host", dockerCommands.get(counter++)); + Assert.assertEquals(" ro-mounts=/source/path:/destination/path", + dockerCommands.get(counter++)); + Assert.assertEquals( + " rw-mounts=/test_container_local_dir:/test_container_local_dir," + + "/test_filecache_dir:/test_filecache_dir," + + "/test_container_work_dir:/test_container_work_dir," + + "/test_container_log_dir:/test_container_log_dir," + + "/test_user_local_dir:/test_user_local_dir", + dockerCommands.get(counter++)); + Assert.assertEquals(" user=run_as_user", dockerCommands.get(counter++)); + + // Verify volume-driver is set to expected value. + Assert.assertEquals(" volume-driver=driver-1", + dockerCommands.get(counter++)); + Assert.assertEquals(" workdir=/test_container_work_dir", + dockerCommands.get(counter++)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java index bdd435eb2ae..67252ea6515 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java @@ -55,7 +55,6 @@ import java.util.regex.Pattern; import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.JAVA_HOME; -import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.LOG; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.MULTI_COMMAND_REGEX; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.CLEAN_CMD_REGEX; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.CONTAINS_JAVA_CMD; @@ -134,7 +133,7 @@ public void setup() throws Exception { mockExecutor = mock(PrivilegedOperationExecutor.class); runtime = new JavaSandboxLinuxContainerRuntime(mockExecutor); - runtime.initialize(conf); + runtime.initialize(conf, null); resources = new HashMap<>(); grantDir = new File(baseTestDirectory, "grantDir"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java index 05b44b8a93c..c362787b5c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java @@ -85,7 +85,8 @@ public void setUp() throws Exception { builder.setExecutionAttribute(CONTAINER_ID_STR, MOCK_CONTAINER_ID); runtime.initialize( - TestDockerContainerRuntime.enableMockContainerExecutor(configuration)); + TestDockerContainerRuntime.enableMockContainerExecutor(configuration), + null); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDockerCommandPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDockerCommandPlugin.java new file mode 100644 index 00000000000..8c170eb8975 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDockerCommandPlugin.java @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpHandler; +import com.sun.net.httpserver.HttpServer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestGpuDockerCommandPlugin { + private Map> copyCommandLine( + Map> map) { + Map> ret = new HashMap<>(); + for (Map.Entry> entry : map.entrySet()) { + ret.put(entry.getKey(), new ArrayList<>(entry.getValue())); + } + return ret; + } + + private boolean commandlinesEquals(Map> cli1, + Map> cli2) { + if (!Sets.symmetricDifference(cli1.keySet(), cli2.keySet()).isEmpty()) { + return false; + } + + for (String key : cli1.keySet()) { + List value1 = cli1.get(key); + List value2 = cli2.get(key); + if (!value1.equals(value2)) { + return false; + } + } + + return true; + } + + static class MyHandler implements HttpHandler { + String response = "This is the response"; + + @Override + public void handle(HttpExchange t) throws IOException { + t.sendResponseHeaders(200, response.length()); + OutputStream os = t.getResponseBody(); + os.write(response.getBytes()); + os.close(); + } + } + + @Test + public void testPlugin() throws Exception { + Configuration conf = new Configuration(); + + DockerRunCommand runCommand = new DockerRunCommand("container_1", "user", + "fakeimage"); + + Map> originalCommandline = copyCommandLine( + runCommand.getDockerCommandWithArguments()); + + GpuDockerCommandPlugin commandPlugin = new GpuDockerCommandPlugin(conf); + + Container nmContainer = mock(Container.class); + + // getResourceMapping is null, so commandline won't be updated + commandPlugin.updateDockerRunCommand(runCommand, nmContainer); + Assert.assertTrue(commandlinesEquals(originalCommandline, + runCommand.getDockerCommandWithArguments())); + + // no GPU resource assigned, so commandline won't be updated + ResourceMappings resourceMappings = new ResourceMappings(); + when(nmContainer.getResourceMappings()).thenReturn(resourceMappings); + commandPlugin.updateDockerRunCommand(runCommand, nmContainer); + Assert.assertTrue(commandlinesEquals(originalCommandline, + runCommand.getDockerCommandWithArguments())); + + // Assign GPU resource, init will be invoked + ResourceMappings.AssignedResources assigned = + new ResourceMappings.AssignedResources(); + assigned.updateAssignedResources(ImmutableList.of("0", "1")); + resourceMappings.addAssignedResources(ResourceInformation.GPU_URI, assigned); + + // Since there's no HTTP server running, so we will see exception + boolean caughtException = false; + try { + commandPlugin.updateDockerRunCommand(runCommand, nmContainer); + } catch (ContainerExecutionException e) { + caughtException = true; + } + Assert.assertTrue(caughtException); + + // Start HTTP server + MyHandler handler = new MyHandler(); + HttpServer server = HttpServer.create(new InetSocketAddress(60111), 0); + server.createContext("/test", handler); + server.start(); + + String hostName = server.getAddress().getHostName(); + int port = server.getAddress().getPort(); + String httpUrl = "http://" + hostName + ":" + port + "/test"; + + conf.set(YarnConfiguration.NVIDIA_DOCKER_PLUGIN_ENDPOINT, httpUrl); + + commandPlugin = new GpuDockerCommandPlugin(conf); + + // Start use invalid options + handler.response = "INVALID_RESPONSE"; + try { + commandPlugin.updateDockerRunCommand(runCommand, nmContainer); + } catch (ContainerExecutionException e) { + caughtException = true; + } + Assert.assertTrue(caughtException); + + // Start use invalid options + handler.response = "INVALID_RESPONSE"; + try { + commandPlugin.updateDockerRunCommand(runCommand, nmContainer); + } catch (ContainerExecutionException e) { + caughtException = true; + } + Assert.assertTrue(caughtException); + + handler.response = "--device=/dev/nvidiactl --device=/dev/nvidia-uvm " + + "--device=/dev/nvidia0 --device=/dev/nvidia1 " + + "--volume-driver=nvidia-docker " + + "--volume=nvidia_driver_352.68:/usr/local/nvidia:ro"; + commandPlugin.updateDockerRunCommand(runCommand, nmContainer); + Map> newCommandLine = + runCommand.getDockerCommandWithArguments(); + + // Command line will be updated + Assert.assertFalse(commandlinesEquals(originalCommandline, newCommandLine)); + Assert.assertTrue(newCommandLine.containsKey("devices")); + Assert.assertTrue(newCommandLine.containsKey("volume-driver")); + Assert.assertTrue(newCommandLine.containsKey("ro-mounts")); + } +}