diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 81cb8c6c453..9de46b405cb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1250,6 +1250,31 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT = NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit"; + /** + * Prefix for disk configurations. Work in progress: This configuration + * parameter may be changed/removed in the future. + */ + @Private + public static final String NM_GPU_RESOURCE_PREFIX = NM_PREFIX + + "resource.gpu."; + /** + * This setting controls if resource handling for GPU operations is enabled. + */ + @Private + public static final String NM_GPU_RESOURCE_ENABLED = + NM_GPU_RESOURCE_PREFIX + "enabled"; + + @Private + public static final String NM_GPU_ALLOWED_DEVICES = + NM_GPU_RESOURCE_PREFIX + "allowed-gpu-devices"; + + /** + * Disk as a resource is disabled by default. + **/ + @Private + public static final boolean DEFAULT_NM_GPU_RESOURCE_ENABLED = false; + + /** NM Webapp address.**/ public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address"; public static final int DEFAULT_NM_WEBAPP_PORT = 8042; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java index d09a25dd024..dc4cae14cd6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java @@ -39,7 +39,8 @@ CPU("cpu"), NET_CLS("net_cls"), BLKIO("blkio"), - MEMORY("memory"); + MEMORY("memory"), + DEVICES("devices"); private final String name; @@ -65,6 +66,8 @@ String getName() { String CGROUP_CPU_QUOTA_US = "cfs_quota_us"; String CGROUP_CPU_SHARES = "shares"; + String CGROUP_PARAM_DEVICE_DENY = "deny"; + /** * Mounts or initializes a cgroup controller. * @param controller - the controller being initialized diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceAllocator.java new file mode 100644 index 00000000000..3b45ec127ab --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceAllocator.java @@ -0,0 +1,116 @@ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +/** + * Allocate GPU resources according to requirements + */ +public class GpuResourceAllocator { + private Set allowedGpuDevices = new TreeSet<>(); + private Map usedDevices = new TreeMap<>(); + + /** + * Contains allowed and denied devices with minor number. + * Denied devices will be useful for cgroups devices module to do blacklisting + */ + static class GpuAllocation { + private Set allowed = Collections.emptySet(); + private Set denied = Collections.emptySet(); + + GpuAllocation(Set allowed, Set denied) { + if (allowed != null) { + this.allowed = ImmutableSet.copyOf(allowed); + } + if (denied != null) { + this.denied = ImmutableSet.copyOf(denied); + } + } + + public Set getAllowed() { + return allowed; + } + + public Set getDenied() { + return denied; + } + } + + /** + * Add GPU to allowed list + * @param minorNumber minor number of the GPU device. + */ + public synchronized void addGpu(int minorNumber) { + allowedGpuDevices.add(minorNumber); + } + + private String getResourceHandlerExceptionMessage(int numRequestedGpuDevices, + String requestorId) { + return "Failed to find enough GPUs, requestor=" + requestorId + + ", #RequestedGPUs=" + numRequestedGpuDevices + ", #availableGpus=" + + getAvailableGpus(); + } + + @VisibleForTesting + public synchronized int getAvailableGpus() { + return allowedGpuDevices.size() - usedDevices.size(); + } + + /** + * Assign GPU to requestor + * @param numRequestedGpuDevices How many GPU to request + * @param requestorId Id of requestor, such as containerId + * @return List of denied Gpus with minor numbers + * @throws ResourceHandlerException When failed to + */ + public synchronized GpuAllocation assignGpus(int numRequestedGpuDevices, + String requestorId) throws ResourceHandlerException { + // Assign Gpus to container if requested some. + if (numRequestedGpuDevices > 0) { + if (numRequestedGpuDevices > getAvailableGpus()) { + throw new ResourceHandlerException( + getResourceHandlerExceptionMessage(numRequestedGpuDevices, + requestorId)); + } + + Set assignedGpus = new HashSet<>(); + + for (int deviceNum : allowedGpuDevices) { + if (!usedDevices.containsKey(deviceNum)) { + usedDevices.put(deviceNum, requestorId); + assignedGpus.add(deviceNum); + if (assignedGpus.size() == numRequestedGpuDevices) { + break; + } + } + } + + return new GpuAllocation(assignedGpus, + Sets.difference(allowedGpuDevices, assignedGpus)); + } + return new GpuAllocation(null, allowedGpuDevices); + } + + /** + * Clean up all Gpus assigned to requestor + * @param requstorId Id of requestor + */ + public synchronized void cleanupAssignGpus(String requstorId) { + Iterator> iter = + usedDevices.entrySet().iterator(); + while (iter.hasNext()) { + if (iter.next().getValue().equals(requstorId)) { + iter.remove(); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceHandlerImpl.java new file mode 100644 index 00000000000..28c5edc5f93 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceHandlerImpl.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class GpuResourceHandlerImpl implements ResourceHandler { + final static Log LOG = LogFactory + .getLog(GpuResourceHandlerImpl.class); + + private final String REQUEST_GPU_NUM_ENV_KEY = "REQUESTED_GPU_NUM"; + + // This will be used by container-executor to add necessary clis + public static final String ALLOCATED_GPU_MINOR_NUMS_ENV_KEY = + "YARN_ALLOCATED_GPUS"; + + private GpuResourceAllocator gpuAllocator; + private CGroupsHandler cGroupsHandler; + + GpuResourceHandlerImpl(CGroupsHandler cGroupsHandler) { + this.cGroupsHandler = cGroupsHandler; + gpuAllocator = new GpuResourceAllocator(); + } + + @Override + public List bootstrap(Configuration configuration) + throws ResourceHandlerException { + String allowedDevicesStr = configuration.get( + YarnConfiguration.NM_GPU_ALLOWED_DEVICES); + + if (null != allowedDevicesStr) { + for (String s : allowedDevicesStr.split(",")) { + if (s.trim().length() > 0) { + Integer minorNum = Integer.valueOf(s.trim()); + gpuAllocator.addGpu(minorNum); + } + } + } + LOG.info("Allowed GPU devices with minor numbers:" + allowedDevicesStr); + + // And initialize cgroups + this.cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); + + return null; + } + + private int getRequestedGpu(Container container) { + // TODO, use YARN-3926 after it merged + ContainerLaunchContext clc = container.getLaunchContext(); + Map envs = clc.getEnvironment(); + if (null != envs.get(REQUEST_GPU_NUM_ENV_KEY)) { + return Integer.valueOf(envs.get(REQUEST_GPU_NUM_ENV_KEY)); + } + return 0; + } + + @Override + public synchronized List preStart(Container container) + throws ResourceHandlerException { + String containerIdStr = container.getContainerId().toString(); + + int requestedGpu = getRequestedGpu(container); + + // Assign Gpus to container if requested some. + GpuResourceAllocator.GpuAllocation allocation = gpuAllocator.assignGpus( + requestedGpu, containerIdStr); + + // Create device cgroups for the container + cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + try { + for (int device : allocation.getDenied()) { + cGroupsHandler.updateCGroupParam( + CGroupsHandler.CGroupController.DEVICES, containerIdStr, + CGroupsHandler.CGROUP_PARAM_DEVICE_DENY, + getDeviceDeniedValue(device)); + } + + // Set ALLOCATED_GPU_MINOR_NUMS_ENV_KEY to environment so later runtime + // can use it. + Map envs = container.getLaunchContext().getEnvironment(); + if (null == allocation.getAllowed() || allocation.getAllowed().isEmpty()) { + envs.put(ALLOCATED_GPU_MINOR_NUMS_ENV_KEY, ""); + } else { + envs.put(ALLOCATED_GPU_MINOR_NUMS_ENV_KEY, + StringUtils.join(",", allocation.getAllowed())); + } + } catch (ResourceHandlerException re) { + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + LOG.warn("Could not update cgroup for container", re); + throw re; + } + + List ret = new ArrayList<>(); + ret.add(new PrivilegedOperation( + PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, + PrivilegedOperation.CGROUP_ARG_PREFIX + + cGroupsHandler.getPathForCGroupTasks( + CGroupsHandler.CGroupController.DEVICES, containerIdStr))); + + return ret; + } + + @VisibleForTesting + public static String getDeviceDeniedValue(int deviceMinorNumber) { + String val = String.format("c 195:%d rwm", deviceMinorNumber); + LOG.info("Add denied devices to cgroups:" + val); + return val; + } + + @VisibleForTesting + public GpuResourceAllocator getGpuAllocator() { + return gpuAllocator; + } + + @Override + public List reacquireContainer(ContainerId containerId) + throws ResourceHandlerException { + // FIXME, need to make sure allocated containers and cgroups can be + // recovered when NM restarts. + return null; + } + + @Override + public synchronized List postComplete( + ContainerId containerId) throws ResourceHandlerException { + gpuAllocator.cleanupAssignGpus(containerId.toString()); + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerId.toString()); + return null; + } + + @Override + public List teardown() throws ResourceHandlerException { + return null; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java index 7fc04bdb41e..c0ea53def07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java @@ -87,6 +87,18 @@ public static CGroupsHandler getCGroupsHandler() { return cGroupsHandler; } + private static GpuResourceHandlerImpl getGpuResourceHandler( + Configuration conf) throws ResourceHandlerException { + boolean cgroupsGpuEnabled = conf.getBoolean( + YarnConfiguration.NM_GPU_RESOURCE_ENABLED, + YarnConfiguration.DEFAULT_NM_GPU_RESOURCE_ENABLED); + if (cgroupsGpuEnabled) { + return new GpuResourceHandlerImpl( + getInitializedCGroupsHandler(conf)); + } + return null; + } + private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler( Configuration conf) throws ResourceHandlerException { boolean cgroupsCpuEnabled = @@ -205,6 +217,7 @@ private static void initializeConfiguredResourceHandlerChain( addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf)); addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf)); addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf)); + addHandlerIfNotNull(handlerList, getGpuResourceHandler(conf)); resourceHandlerChain = new ResourceHandlerChain(handlerList); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index b70a4e19a62..cad0e2c8e11 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.GpuResourceHandlerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerInspectCommand; @@ -454,6 +455,18 @@ public void launchContainer(ContainerRuntimeContext ctx) .setCapabilities(capabilities) .addMountLocation(CGROUPS_ROOT_DIRECTORY, CGROUPS_ROOT_DIRECTORY + ":ro", false); + + // Handle GPU-related configs + String allocatedGpus = environment.get( + GpuResourceHandlerImpl.ALLOCATED_GPU_MINOR_NUMS_ENV_KEY); + if (null != allocatedGpus && !allocatedGpus.isEmpty()) { + // We allocated some GPUs, set YARN_CONTAINER_EXECUTOR_GPU_ENABLED + // to true. + runCommand.addDockerRunEnvars( + "YARN_CONTAINER_EXECUTOR_GPU_ENABLED", "true"); + runCommand.addDockerRunEnvars("PATH", "$PATH:/usr/local/nvidia/bin"); + } + List allDirs = new ArrayList<>(containerLocalDirs); allDirs.addAll(filecacheDirs); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java index f79f4ed08c8..aab44de0a25 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java @@ -31,6 +31,7 @@ private static final String RUN_COMMAND = "run"; private final String image; private List overrrideCommandWithArgs; + private List additionalEnvars = new ArrayList<>(); /** The following are mandatory: */ public DockerRunCommand(String containerId, String user, String image) { @@ -114,6 +115,10 @@ public DockerRunCommand setOverrideCommandWithArgs( return this; } + public void addDockerRunEnvars(String key, String value) { + additionalEnvars.add(key + "=" + value); + } + @Override public String getCommandWithArguments() { List argList = new ArrayList<>(); @@ -125,6 +130,10 @@ public String getCommandWithArguments() { argList.addAll(overrrideCommandWithArgs); } - return StringUtils.join(" ", argList); + String cmd = StringUtils.join(" ", argList); + for (String envar : additionalEnvars) { + cmd = cmd +"\n" + envar; + } + return cmd; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 9be8cf4de4c..0e9af45908f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -1139,7 +1139,7 @@ int initialize_app(const char *user, const char *app_id, return -1; } -char* parse_docker_command_file(const char* command_file) { +char* parse_docker_command_file(const char* command_file, char** docker_run_envars) { size_t len = 0; char *line = NULL; ssize_t read; @@ -1156,13 +1156,38 @@ char* parse_docker_command_file(const char* command_file) { fflush(ERRORFILE); exit(ERROR_READING_FILE); } + if (docker_run_envars) { + // Read envars to "k1=v1 k2=v2 ..." form. + int envLen = 0; + char* env = NULL; + + while ((read = getline(&env, &len, stream)) > 0) { + envLen += (read + 1); + + *docker_run_envars = realloc(*docker_run_envars, envLen); + if (envLen == read + 1) { + // initialize array for the first use + (*docker_run_envars)[0] = 0; + } + + if (NULL == *docker_run_envars) { + fprintf(ERRORFILE, "failed to allocate memory for docker run envars"); + } + + // Escape last delim + if ('\n' == env[read - 1]) { + env[read - 1] = ' '; + } + strcat(*docker_run_envars, env); + } + } fclose(stream); return line; } int run_docker(const char *command_file) { - char* docker_command = parse_docker_command_file(command_file); + char* docker_command = parse_docker_command_file(command_file, NULL); char* docker_binary = get_value(DOCKER_BINARY_KEY, &executor_cfg); docker_binary = check_docker_binary(docker_binary); @@ -1326,8 +1351,19 @@ int launch_docker_container_as_user(const char * user, const char *app_id, gid_t user_gid = getegid(); uid_t prev_uid = geteuid(); - char *docker_command = parse_docker_command_file(command_file); - char *docker_binary = get_value(DOCKER_BINARY_KEY, &executor_cfg); + // Additional envars for docker run .. + char *additional_envs = NULL; + + char *docker_command = parse_docker_command_file(command_file, &additional_envs); + char *docker_binary; + char *nvidia_docker_binary = NULL; + + if (additional_envs && + (NULL != strstr(additional_envs, NVIDIA_GPU_ENABLED_ENV_VAR))) { + nvidia_docker_binary = get_value(NVIDIA_DOCKER_BINARY_KEY, &executor_cfg); + } + + docker_binary = get_value(DOCKER_BINARY_KEY, &executor_cfg); docker_binary = check_docker_binary(docker_binary); fprintf(LOGFILE, "Creating script paths...\n"); @@ -1367,7 +1403,12 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } - snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command); + snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s %s", + additional_envs, + // Only use nvidia-docker in run if it is required + (nvidia_docker_binary ? nvidia_docker_binary : docker_binary), + docker_command); + fprintf(LOGFILE, "Docker run command, %s", docker_command_with_binary); fprintf(LOGFILE, "Launching docker container...\n"); FILE* start_docker = popen(docker_command_with_binary, "r"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index aa1c4a19775..2cde200404d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -95,8 +95,10 @@ enum operations { #define BANNED_USERS_KEY "banned.users" #define ALLOWED_SYSTEM_USERS_KEY "allowed.system.users" #define DOCKER_BINARY_KEY "docker.binary" +#define NVIDIA_DOCKER_BINARY_KEY "nvidia_docker.binary" #define DOCKER_SUPPORT_ENABLED_KEY "feature.docker.enabled" #define TC_SUPPORT_ENABLED_KEY "feature.tc.enabled" +#define GPU_ENABLED_ENV_VAR "YARN_CONTAINER_EXECUTOR_GPU_ENABLED=true" #define TMP_DIR "tmp" extern struct passwd *user_detail; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestGpuResourceHandler.java new file mode 100644 index 00000000000..babd5a80ffb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestGpuResourceHandler.java @@ -0,0 +1,159 @@ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestGpuResourceHandler { + private CGroupsHandler mockCGroupsHandler; + private GpuResourceHandlerImpl gpuResourceHandler; + + @Before + public void setup() { + mockCGroupsHandler = mock(CGroupsHandler.class); + gpuResourceHandler = new GpuResourceHandlerImpl( + mockCGroupsHandler); + } + + @Test + public void testBootStrap() throws ResourceHandlerException { + Configuration conf = new YarnConfiguration(); + + gpuResourceHandler.bootstrap(conf); + verify(mockCGroupsHandler, times(1)).initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); + } + + private static ContainerId getContainerId(int id) { + return ContainerId.newContainerId(ApplicationAttemptId + .newInstance(ApplicationId.newInstance(1234L, 1), 1), id); + } + + private static Container mockContainer(int id, int numGpuRequest) { + Container c = mock(Container.class); + when(c.getContainerId()).thenReturn(getContainerId(id)); + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + Map envs = new HashMap<>(); + when(clc.getEnvironment()).thenReturn(envs); + envs.put("REQUESTED_GPU_NUM", String.valueOf(numGpuRequest)); + when(c.getLaunchContext()).thenReturn(clc); + return c; + } + + private void verifyDeniedDevices(ContainerId containerId, int[] deniedDevices) + throws ResourceHandlerException { + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, containerId.toString()); + + if (null != deniedDevices && deniedDevices.length > 0) { + for (int i = 0; i < deniedDevices.length; i++) { + verify(mockCGroupsHandler, times(1)).updateCGroupParam( + CGroupsHandler.CGroupController.DEVICES, containerId.toString(), + CGroupsHandler.CGROUP_PARAM_DEVICE_DENY, + GpuResourceHandlerImpl + .getDeviceDeniedValue(deniedDevices[i])); + } + } + } + + @Test + public void testAllocation() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4"); + conf.setBoolean(YarnConfiguration.NM_GPU_RESOURCE_ENABLED, true); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 3 containers */ + gpuResourceHandler.preStart(mockContainer(1, 3)); + + // Only device=4 will be blocked. + verifyDeniedDevices(getContainerId(1), new int[] { 4 }); + + /* Start container 2, asks 2 containers. Excepted to fail */ + boolean failedToAllocate = false; + try { + gpuResourceHandler.preStart(mockContainer(2, 2)); + } catch (ResourceHandlerException e) { + failedToAllocate = true; + } + Assert.assertTrue(failedToAllocate); + + /* Start container 3, ask 1 container, succeeded */ + gpuResourceHandler.preStart(mockContainer(3, 1)); + + // devices = 0/1/3 will be blocked + verifyDeniedDevices(getContainerId(3), new int[] { 0, 1, 3 }); + + /* Start container 4, ask 0 container, succeeded */ + gpuResourceHandler.preStart(mockContainer(4, 0)); + + // All devices will be blocked + verifyDeniedDevices(getContainerId(4), new int[] { 0, 1, 3, 4 }); + + /* Release container-1, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(1)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); + Assert.assertEquals(3, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Release container-3, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(3)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(3).toString()); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } + + @Test + public void testAllocationWithoutAllowedGpus() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, ""); + conf.setBoolean(YarnConfiguration.NM_GPU_RESOURCE_ENABLED, true); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(0, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 0 containers */ + gpuResourceHandler.preStart(mockContainer(1, 0)); + verifyDeniedDevices(getContainerId(1), new int[] { }); + + /* Start container 2, asks 1 containers. Excepted to fail */ + boolean failedToAllocate = false; + try { + gpuResourceHandler.preStart(mockContainer(2, 1)); + } catch (ResourceHandlerException e) { + failedToAllocate = true; + } + Assert.assertTrue(failedToAllocate); + + /* Release container 1, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(1)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); + Assert.assertEquals(0, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } +}