commit e7b25d0e5a033dd0235f9b8ee72dc74d79022a1e Author: Wangda Tan Date: Tue Jun 27 16:06:09 2017 -0700 added gpu impl jun 27 Change-Id: Ie2356b23c01f41a83e7ef56f083736caf7180bd5 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index fa4d2e3d321..7ac7da5856e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1251,6 +1251,31 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT = NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit"; + /** + * Prefix for disk configurations. Work in progress: This configuration + * parameter may be changed/removed in the future. + */ + @Private + public static final String NM_GPU_RESOURCE_PREFIX = NM_PREFIX + + "resource.gpu."; + /** + * This setting controls if resource handling for GPU operations is enabled. + */ + @Private + public static final String NM_GPU_RESOURCE_ENABLED = + NM_GPU_RESOURCE_PREFIX + "enabled"; + + @Private + public static final String NM_GPU_ALLOWED_DEVICES = + NM_GPU_RESOURCE_PREFIX + "allowed-gpu-devices"; + + /** + * Disk as a resource is disabled by default. + **/ + @Private + public static final boolean DEFAULT_NM_GPU_RESOURCE_ENABLED = false; + + /** NM Webapp address.**/ public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address"; public static final int DEFAULT_NM_WEBAPP_PORT = 8042; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml index 1445d2a0d9e..ed2ab31f252 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml @@ -29,7 +29,7 @@ ${project.parent.parent.basedir} - ../etc/hadoop + /etc/hadoop/conf diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt index f7fe83d6ebc..e6476851e1d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt @@ -89,6 +89,11 @@ add_library(container main/native/container-executor/impl/configuration.c main/native/container-executor/impl/container-executor.c main/native/container-executor/impl/get_executable.c + main/native/container-executor/impl/utils/string-utils.c + main/native/container-executor/impl/utils/command-line-parser.c + main/native/container-executor/impl/modules/cgroups/cgroups-operations.c + main/native/container-executor/impl/modules/common/module-configs.c + main/native/container-executor/impl/modules/gpu/gpu-module.c ) add_executable(container-executor diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java index 8402a16339d..7e3a53a778b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java @@ -51,6 +51,7 @@ TC_READ_STATS("--tc-read-stats"), ADD_PID_TO_CGROUP(""), //no CLI switch supported yet. RUN_DOCKER_CMD("--run-docker"), + GPU("gpu"), LIST_AS_USER(""); //no CLI switch supported yet. private final String option; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java index d09a25dd024..6279a67131e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java @@ -39,6 +39,7 @@ CPU("cpu"), NET_CLS("net_cls"), BLKIO("blkio"), + DEVICES("devices"), MEMORY("memory"); private final String name; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceAllocator.java new file mode 100644 index 00000000000..5dcbeefd3fb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceAllocator.java @@ -0,0 +1,116 @@ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +/** + * Allocate GPU resources according to requirements + */ +public class GpuResourceAllocator { + private Set allowedGpuDevices = new TreeSet<>(); + private Map usedDevices = new TreeMap<>(); + + /** + * Contains allowed and denied devices with minor number. + * Denied devices will be useful for cgroups devices module to do blacklisting + */ + static class GpuAllocation { + private Set allowed = Collections.emptySet(); + private Set denied = Collections.emptySet(); + + GpuAllocation(Set allowed, Set denied) { + if (allowed != null) { + this.allowed = ImmutableSet.copyOf(allowed); + } + if (denied != null) { + this.denied = ImmutableSet.copyOf(denied); + } + } + + public Set getAllowedGPUs() { + return allowed; + } + + public Set getDeniedGPUs() { + return denied; + } + } + + /** + * Add GPU to allowed list + * @param minorNumber minor number of the GPU device. + */ + public synchronized void addGpu(int minorNumber) { + allowedGpuDevices.add(minorNumber); + } + + private String getResourceHandlerExceptionMessage(int numRequestedGpuDevices, + String requestorId) { + return "Failed to find enough GPUs, requestor=" + requestorId + + ", #RequestedGPUs=" + numRequestedGpuDevices + ", #availableGpus=" + + getAvailableGpus(); + } + + @VisibleForTesting + public synchronized int getAvailableGpus() { + return allowedGpuDevices.size() - usedDevices.size(); + } + + /** + * Assign GPU to requestor + * @param numRequestedGpuDevices How many GPU to request + * @param requestorId Id of requestor, such as containerId + * @return List of denied Gpus with minor numbers + * @throws ResourceHandlerException When failed to + */ + public synchronized GpuAllocation assignGpus(int numRequestedGpuDevices, + String requestorId) throws ResourceHandlerException { + // Assign Gpus to container if requested some. + if (numRequestedGpuDevices > 0) { + if (numRequestedGpuDevices > getAvailableGpus()) { + throw new ResourceHandlerException( + getResourceHandlerExceptionMessage(numRequestedGpuDevices, + requestorId)); + } + + Set assignedGpus = new HashSet<>(); + + for (int deviceNum : allowedGpuDevices) { + if (!usedDevices.containsKey(deviceNum)) { + usedDevices.put(deviceNum, requestorId); + assignedGpus.add(deviceNum); + if (assignedGpus.size() == numRequestedGpuDevices) { + break; + } + } + } + + return new GpuAllocation(assignedGpus, + Sets.difference(allowedGpuDevices, assignedGpus)); + } + return new GpuAllocation(null, allowedGpuDevices); + } + + /** + * Clean up all Gpus assigned to requestor + * @param requstorId Id of requestor + */ + public synchronized void cleanupAssignGpus(String requstorId) { + Iterator> iter = + usedDevices.entrySet().iterator(); + while (iter.hasNext()) { + if (iter.next().getValue().equals(requstorId)) { + iter.remove(); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceHandlerImpl.java new file mode 100644 index 00000000000..42f3016b11e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/GpuResourceHandlerImpl.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class GpuResourceHandlerImpl implements ResourceHandler { + final static Log LOG = LogFactory + .getLog(GpuResourceHandlerImpl.class); + + private final String REQUEST_GPU_NUM_ENV_KEY = "REQUESTED_GPU_NUM"; + + // This will be used by container-executor to add necessary clis + public static final String ALLOCATED_GPU_MINOR_NUMS_ENV_KEY = + "YARN_ALLOCATED_GPUS"; + public static final String EXCLUDED_GPUS_CLI_OPTION = "--excluded_gpus"; + public static final String CONTAINER_ID_CLI_OPTION = "--container_id"; + + private GpuResourceAllocator gpuAllocator; + private CGroupsHandler cGroupsHandler; + private PrivilegedOperationExecutor privilegedOperationExecutor; + + GpuResourceHandlerImpl(CGroupsHandler cGroupsHandler, + PrivilegedOperationExecutor privilegedOperationExecutor) { + this.cGroupsHandler = cGroupsHandler; + this.privilegedOperationExecutor = privilegedOperationExecutor; + gpuAllocator = new GpuResourceAllocator(); + } + + @Override + public List bootstrap(Configuration configuration) + throws ResourceHandlerException { + String allowedDevicesStr = configuration.get( + YarnConfiguration.NM_GPU_ALLOWED_DEVICES); + + if (null != allowedDevicesStr) { + for (String s : allowedDevicesStr.split(",")) { + if (s.trim().length() > 0) { + Integer minorNum = Integer.valueOf(s.trim()); + gpuAllocator.addGpu(minorNum); + } + } + } + LOG.info("Allowed GPU devices with minor numbers:" + allowedDevicesStr); + + // And initialize cgroups + this.cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); + + return null; + } + + private int getRequestedGpu(Container container) { + // TODO, use YARN-3926 after it merged + ContainerLaunchContext clc = container.getLaunchContext(); + Map envs = clc.getEnvironment(); + if (null != envs.get(REQUEST_GPU_NUM_ENV_KEY)) { + return Integer.valueOf(envs.get(REQUEST_GPU_NUM_ENV_KEY)); + } + return 0; + } + + @Override + public synchronized List preStart(Container container) + throws ResourceHandlerException { + String containerIdStr = container.getContainerId().toString(); + + int requestedGpu = getRequestedGpu(container); + + // Assign Gpus to container if requested some. + GpuResourceAllocator.GpuAllocation allocation = gpuAllocator.assignGpus( + requestedGpu, containerIdStr); + + // Create device cgroups for the container + cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + try { + // Execute c-e to setup GPU isolation before launch the container + PrivilegedOperation privilegedOperation = new PrivilegedOperation( + PrivilegedOperation.OperationType.GPU, Arrays + .asList(CONTAINER_ID_CLI_OPTION, containerIdStr)); + if (!allocation.getDeniedGPUs().isEmpty()) { + privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION, + StringUtils.join(",", allocation.getDeniedGPUs()))); + } + + privilegedOperationExecutor.executePrivilegedOperation( + privilegedOperation, true); + } catch (PrivilegedOperationException e) { + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + LOG.warn("Could not update cgroup for container", e); + throw new ResourceHandlerException(e); + } + + // Set ALLOCATED_GPU_MINOR_NUMS_ENV_KEY to environment so later runtime + // can use it. + Map envs = container.getLaunchContext().getEnvironment(); + if (null == allocation.getAllowedGPUs() || allocation.getAllowedGPUs().isEmpty()) { + envs.put(ALLOCATED_GPU_MINOR_NUMS_ENV_KEY, ""); + } else { + envs.put(ALLOCATED_GPU_MINOR_NUMS_ENV_KEY, + StringUtils.join(",", allocation.getAllowedGPUs())); + } + + List ret = new ArrayList<>(); + ret.add(new PrivilegedOperation( + PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, + PrivilegedOperation.CGROUP_ARG_PREFIX + + cGroupsHandler.getPathForCGroupTasks( + CGroupsHandler.CGroupController.DEVICES, containerIdStr))); + + return ret; + } + + @VisibleForTesting + public GpuResourceAllocator getGpuAllocator() { + return gpuAllocator; + } + + @Override + public List reacquireContainer(ContainerId containerId) + throws ResourceHandlerException { + // FIXME, need to make sure allocated containers and cgroups can be + // recovered when NM restarts. + return null; + } + + @Override + public synchronized List postComplete( + ContainerId containerId) throws ResourceHandlerException { + gpuAllocator.cleanupAssignGpus(containerId.toString()); + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerId.toString()); + return null; + } + + @Override + public List teardown() throws ResourceHandlerException { + return null; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java index 7fc04bdb41e..6cc4449738c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java @@ -87,6 +87,18 @@ public static CGroupsHandler getCGroupsHandler() { return cGroupsHandler; } + private static GpuResourceHandlerImpl getGpuResourceHandler( + Configuration conf) throws ResourceHandlerException { + boolean cgroupsGpuEnabled = conf.getBoolean( + YarnConfiguration.NM_GPU_RESOURCE_ENABLED, + YarnConfiguration.DEFAULT_NM_GPU_RESOURCE_ENABLED); + if (cgroupsGpuEnabled) { + return new GpuResourceHandlerImpl(getInitializedCGroupsHandler(conf), + PrivilegedOperationExecutor.getInstance(conf)); + } + return null; + } + private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler( Configuration conf) throws ResourceHandlerException { boolean cgroupsCpuEnabled = @@ -205,6 +217,7 @@ private static void initializeConfiguredResourceHandlerChain( addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf)); addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf)); addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf)); + addHandlerIfNotNull(handlerList, getGpuResourceHandler(conf)); resourceHandlerChain = new ResourceHandlerChain(handlerList); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 6b66b7b1358..b6590f822c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.GpuResourceHandlerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerInspectCommand; @@ -454,6 +455,18 @@ public void launchContainer(ContainerRuntimeContext ctx) .setCapabilities(capabilities) .addMountLocation(CGROUPS_ROOT_DIRECTORY, CGROUPS_ROOT_DIRECTORY + ":ro", false); + + // Handle GPU-related configs + String allocatedGpus = environment.get( + GpuResourceHandlerImpl.ALLOCATED_GPU_MINOR_NUMS_ENV_KEY); + if (null != allocatedGpus && !allocatedGpus.isEmpty()) { + // We allocated some GPUs, set YARN_CONTAINER_EXECUTOR_GPU_ENABLED + // to true. + runCommand.addDockerRunEnvars( + "YARN_CONTAINER_EXECUTOR_GPU_ENABLED", "true"); + runCommand.addDockerRunEnvars("PATH", "$PATH:/usr/local/nvidia/bin"); + } + List allDirs = new ArrayList<>(containerLocalDirs); allDirs.addAll(filecacheDirs); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java index f79f4ed08c8..aab44de0a25 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java @@ -31,6 +31,7 @@ private static final String RUN_COMMAND = "run"; private final String image; private List overrrideCommandWithArgs; + private List additionalEnvars = new ArrayList<>(); /** The following are mandatory: */ public DockerRunCommand(String containerId, String user, String image) { @@ -114,6 +115,10 @@ public DockerRunCommand setOverrideCommandWithArgs( return this; } + public void addDockerRunEnvars(String key, String value) { + additionalEnvars.add(key + "=" + value); + } + @Override public String getCommandWithArguments() { List argList = new ArrayList<>(); @@ -125,6 +130,10 @@ public String getCommandWithArguments() { argList.addAll(overrrideCommandWithArgs); } - return StringUtils.join(" ", argList); + String cmd = StringUtils.join(" ", argList); + for (String envar : additionalEnvars) { + cmd = cmd +"\n" + envar; + } + return cmd; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 9be8cf4de4c..9865f29ec39 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -79,6 +79,7 @@ static const char* TC_READ_STATS_OPTS [] = { "-s", "-b", NULL}; struct passwd *user_detail = NULL; FILE* LOGFILE = NULL; + FILE* ERRORFILE = NULL; static uid_t nm_uid = -1; @@ -1139,7 +1140,7 @@ int initialize_app(const char *user, const char *app_id, return -1; } -char* parse_docker_command_file(const char* command_file) { +char* parse_docker_command_file(const char* command_file, char** docker_run_envars) { size_t len = 0; char *line = NULL; ssize_t read; @@ -1156,13 +1157,38 @@ char* parse_docker_command_file(const char* command_file) { fflush(ERRORFILE); exit(ERROR_READING_FILE); } + if (docker_run_envars) { + // Read envars to "k1=v1 k2=v2 ..." form. + int envLen = 0; + char* env = NULL; + + while ((read = getline(&env, &len, stream)) > 0) { + envLen += (read + 1); + + *docker_run_envars = realloc(*docker_run_envars, envLen); + if (envLen == read + 1) { + // initialize array for the first use + (*docker_run_envars)[0] = 0; + } + + if (NULL == *docker_run_envars) { + fprintf(ERRORFILE, "failed to allocate memory for docker run envars"); + } + + // Escape last delim + if ('\n' == env[read - 1]) { + env[read - 1] = ' '; + } + strcat(*docker_run_envars, env); + } + } fclose(stream); return line; } int run_docker(const char *command_file) { - char* docker_command = parse_docker_command_file(command_file); + char* docker_command = parse_docker_command_file(command_file, NULL); char* docker_binary = get_value(DOCKER_BINARY_KEY, &executor_cfg); docker_binary = check_docker_binary(docker_binary); @@ -1326,8 +1352,19 @@ int launch_docker_container_as_user(const char * user, const char *app_id, gid_t user_gid = getegid(); uid_t prev_uid = geteuid(); - char *docker_command = parse_docker_command_file(command_file); - char *docker_binary = get_value(DOCKER_BINARY_KEY, &executor_cfg); + // Additional envars for docker run .. + char *additional_envs = NULL; + + char *docker_command = parse_docker_command_file(command_file, &additional_envs); + char *docker_binary; + char *nvidia_docker_binary = NULL; + + if (additional_envs && + (NULL != strstr(additional_envs, GPU_ENABLED_ENV_VAR))) { + nvidia_docker_binary = get_value(NVIDIA_DOCKER_BINARY_KEY, &executor_cfg); + } + + docker_binary = get_value(DOCKER_BINARY_KEY, &executor_cfg); docker_binary = check_docker_binary(docker_binary); fprintf(LOGFILE, "Creating script paths...\n"); @@ -1367,7 +1404,12 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } - snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command); + snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s %s", + additional_envs, + // Only use nvidia-docker in run if it is required + (nvidia_docker_binary ? nvidia_docker_binary : docker_binary), + docker_command); + fprintf(LOGFILE, "Docker run command, %s", docker_command_with_binary); fprintf(LOGFILE, "Launching docker container...\n"); FILE* start_docker = popen(docker_command_with_binary, "r"); @@ -2144,3 +2186,12 @@ int traffic_control_read_state(char *command_file) { int traffic_control_read_stats(char *command_file) { return run_traffic_control(TC_READ_STATS_OPTS, command_file); } + +/** + * FIXME: (wangda) it's better to move executor_cfg out of container-executor.c + * Now initialize of executor_cfg and data structures are stored inside + * container-executor which is not a good design. + */ +struct configuration* get_cfg() { + return &executor_cfg; +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index aa1c4a19775..d90b44ee273 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -82,7 +82,8 @@ enum operations { RUN_AS_USER_DELETE = 9, RUN_AS_USER_LAUNCH_DOCKER_CONTAINER = 10, RUN_DOCKER = 11, - RUN_AS_USER_LIST = 12 + RUN_AS_USER_LIST = 12, + UPDATE_CGROUPS_PARAM = 13, }; #define NM_GROUP_KEY "yarn.nodemanager.linux-container-executor.group" @@ -95,8 +96,10 @@ enum operations { #define BANNED_USERS_KEY "banned.users" #define ALLOWED_SYSTEM_USERS_KEY "allowed.system.users" #define DOCKER_BINARY_KEY "docker.binary" +#define NVIDIA_DOCKER_BINARY_KEY "nvidia_docker.binary" #define DOCKER_SUPPORT_ENABLED_KEY "feature.docker.enabled" #define TC_SUPPORT_ENABLED_KEY "feature.tc.enabled" +#define GPU_ENABLED_ENV_VAR "YARN_CONTAINER_EXECUTOR_GPU_ENABLED=true" #define TMP_DIR "tmp" extern struct passwd *user_detail; @@ -297,3 +300,5 @@ int is_docker_support_enabled(); * Run a docker command passing the command file as an argument */ int run_docker(const char *command_file); + +struct configuration* get_cfg(); \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index fdc0496986d..0c8883ce6c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -19,6 +19,8 @@ #include "config.h" #include "configuration.h" #include "container-executor.h" +#include "modules/gpu/gpu-module.h" +#include "modules/common/module-configs.h" #include #include @@ -234,6 +236,8 @@ static struct { int container_pid; int signal; const char *docker_command_file; + const char *cgroups_param_path; + const char* cgroups_param_value; } cmd_input; static int validate_run_as_user_commands(int argc, char **argv, int *operation); @@ -252,6 +256,18 @@ static int validate_arguments(int argc, char **argv , int *operation) { return INVALID_ARGUMENT_NUMBER; } + /* + * Check if it is a known module, if yes, redirect to module + */ + if (strcmp("gpu", argv[1]) == 0) { + // First check if the module is enabled + if (!module_enabled("gpu")) { + return -1; + } + + return handle_gpu_request("gpu", argc - 2, &argv[2]); + } + if (strcmp("--checksetup", argv[1]) == 0) { *operation = CHECK_SETUP; return 0; @@ -331,6 +347,7 @@ static int validate_arguments(int argc, char **argv , int *operation) { return FEATURE_DISABLED; } } + /* Now we have to validate 'run as user' operations that don't use a 'long option' - we should fix this at some point. The validation/argument parsing here is extensive enough that it done in a separate function */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c new file mode 100644 index 00000000000..6c5dbc1c69d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "configuration.h" +#include "container-executor.h" +#include "utils/command-line-parser.h" +#include "utils/string-utils.h" +#include "modules/common/module-configs.h" +#include "modules/common/constants.h" +#include "modules/cgroups/cgroups-operations.h" + +#include +#include +#include +#include +#include + +#define CGROUPS_ROOT_KEY CONFIGS_MODULES_PREFIX "cgroups.root" +#define CGROUPS_YARN_HIERARCHY_KEY CONFIGS_MODULES_PREFIX "cgroups.yarn-hierarchy" + +int update_cgroups_parameters( + const char* controller_name, + const char* param_name, + const char* group_id, + const char* value) { +#ifndef __linux + fprintf(LOGFILE, "Failed to update cgroups parameters, not supported\n"); + return -1; +#endif + + const char* cgroups_root = get_value(CGROUPS_ROOT_KEY, get_cfg()); + const char* yarn_hierarchy_name = get_value(CGROUPS_YARN_HIERARCHY_KEY, get_cfg()); + + // Make sure it is defined. + if (!cgroups_root || strlen(cgroups_root) == 0) { + fprintf(LOGFILE, "%s is not defined in container-executor.cfg", + CGROUPS_ROOT_KEY); + return -1; + } + + // Make a path. + // CGroups path should not be too long. + char full_path[4 * 1024]; + snprintf(full_path, 4 * 1024, "%s/%s/%s/%s/%s.%s", + cgroups_root, controller_name, yarn_hierarchy_name, + group_id, controller_name, param_name); + + // Make sure file exist + struct stat sb; + if (stat(full_path, &sb) != 0) { + fprintf(LOGFILE, "CGroups: Could not find path to write, %s", full_path); + return -1; + } + + fprintf(LOGFILE, "CGroups: Updating cgroups, path=%s, value=%s", + full_path, value); + + // Write values to file + FILE *f; + f = fopen(full_path, "a"); + if (!f) { + fprintf(LOGFILE, "CGroups: Failed to open cgroups file, %s", full_path); + return -1; + } + if (fprintf(f, "%s", value) < 0) { + fprintf(LOGFILE, "CGroups: Failed to write cgroups file, %s", full_path); + return -1; + } + if (fclose(f) != 0) { + fprintf(LOGFILE, "CGroups: Failed to close cgroups file, %s", full_path); + return -1; + } + + return 0; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h new file mode 100644 index 00000000000..4228a60e984 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _CGROUPS_OPERATIONS_H_ +#define _CGROUPS_OPERATIONS_H_ + +/** + * Handle gpu requests: + * - controller_name: e.g. devices + * - param_name: e.g. deny + * - group_id: e.g. container_x_y + * - value: e.g. "a *:* rwm" + * + * return 0 if succeeded + */ +int update_cgroups_parameters( + const char* controller_name, + const char* param_name, + const char* group_id, + const char* value); + +#endif \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/constants.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/constants.h new file mode 100644 index 00000000000..5c8c4e939ee --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/constants.h @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* FreeBSD protects the getline() prototype. See getline(3) for more */ +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _MODULES_COMMON_CONSTANTS_H_ +#define _MODULES_COMMON_CONSTANTS_H_ + +#define CONFIGS_MODULES_PREFIX "yarn.container-executor.modules." + +#endif \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/module-configs.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/module-configs.c new file mode 100644 index 00000000000..1b30c8a915c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/module-configs.c @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "configuration.h" +#include "container-executor.h" +#include "modules/common/constants.h" + +#include +#include +#include + +int module_enabled(const char* module_name) { + char* module_enabled_config_key; + int buffer_len = strlen(CONFIGS_MODULES_PREFIX) + strlen(module_name) + + strlen(".enabled") + 1; + + module_enabled_config_key = malloc(buffer_len); + + if (!module_enabled_config_key) { + fprintf(LOGFILE, "Failed to allocate memory for module_enabled_config_key"); + return 0; + } + + strcat(strcpy(module_enabled_config_key, CONFIGS_MODULES_PREFIX), module_name); + strcat(module_enabled_config_key, ".enabled"); + + char* enabled_str = get_value(module_enabled_config_key, get_cfg()); + int rc = 0; + if (enabled_str && 0 == strcmp(enabled_str, "true")) { + rc = 1; + } else { + fprintf(LOGFILE, "Module %s is disabled", module_name); + } + + free(enabled_str); + return rc; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/module-configs.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/module-configs.h new file mode 100644 index 00000000000..65e868738be --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/common/module-configs.h @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _MODULES_COMMON_MODULE_CONFIGS_H_ +#define _MODULES_COMMON_MODULE_CONFIGS_H_ + + +/** + * check if module enabled given name of module. + * return 0 if disabled + */ +int module_enabled(const char* module_name); + +#endif \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c new file mode 100644 index 00000000000..15dad289b59 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "configuration.h" +#include "container-executor.h" +#include "utils/command-line-parser.h" +#include "utils/string-utils.h" +#include "modules/gpu/gpu-module.h" +#include "modules/cgroups/cgroups-operations.h" +#include "modules/common/module-configs.h" +#include "modules/common/constants.h" + +#include +#include +#include + +#define EXCLUDED_GPUS_OPTION "excluded_gpus" +#define CONTAINER_ID_OPTION "container_id" +#define GPU_MAJOR_NUMBER_CONFIG_KEY CONFIGS_MODULES_PREFIX "gpu.major-device-number" +#define GPU_ALLOWED_DEVICES_MINOR_NUMBERS CONFIGS_MODULES_PREFIX "gpu.allowed-device-minor-numbers" + +static int internal_handle_gpu_request(int n_minor_devices_to_block, int minor_devices[], + const char* container_id) { + if (n_minor_devices_to_block <= 0) { + // no device to block, just return; + return 0; + } + + // Get major device number from cfg, if not set, 195 (Nvidia) will be the + // default value + int major_device_number; + char* major_number_str = get_value(GPU_MAJOR_NUMBER_CONFIG_KEY, get_cfg()); + if (!major_number_str || 0 == strlen(major_number_str)) { + // Default major number of Nvidia devices + major_device_number = 195; + } else { + major_device_number = atoi(major_number_str); + } + + // Get allowed minor device numbers from cfg, if not set, means all minor + // devices can be used by YARN + char* allowed_minor_numbers_str = get_value(GPU_ALLOWED_DEVICES_MINOR_NUMBERS, + get_cfg()); + int* allowed_minor_numbers; + int n_allowed_minor_numbers = 0; + if (!allowed_minor_numbers_str || strlen(allowed_minor_numbers_str)) { + allowed_minor_numbers = NULL; + } else { + int rc = get_numbers_split_by_comma(allowed_minor_numbers_str, + &allowed_minor_numbers, + &n_allowed_minor_numbers); + if (0 != rc) { + fprintf(LOGFILE, + "Failed to get allowed minor device numbers from cfg, value=%s\n", + allowed_minor_numbers_str); + return -1; + } + + // Make sure we're trying to black devices allowed in config + for (int i = 0; i < n_minor_devices_to_block; i++) { + int found = 0; + for (int j = 0; j < n_allowed_minor_numbers; j++) { + if (minor_devices[i] == allowed_minor_numbers[j]) { + found = 1; + break; + } + } + + if (!found) { + fprintf(LOGFILE, + "Trying to blacklist device with minor-number=%d which is not on allowed list\n", + minor_devices[i]); + return -1; + } + } + } + + // Use cgroup helpers to blacklist devices + for (int i = 0; i < sizeof(minor_devices) / sizeof(int); i++) { + char param_value[128]; + snprintf(param_value, 128, "c %d:%d rwm", major_device_number, i); + int rc = update_cgroups_parameters("devices", "deny", + container_id, param_value); + + if (0 != rc) { + fprintf(LOGFILE, "CGroups: Failed to update cgroups\n"); + return -1; + } + } + + return 0; +} + +/* + * Format of GPU request commandline: + * + * c-e gpu --excluded_gpus 0,1,3 --container_id container_x_y + */ +int handle_gpu_request(const char* module_name, int argc, char** argv) { + const char* known_parameters[] = {EXCLUDED_GPUS_OPTION, CONTAINER_ID_OPTION}; + const int required_parameters[] = {0, 1}; + const int has_values[] = {1, 1}; + + struct parsed_command_line* opts = parse_commandline_opts(argc, argv, + 2 /* 2 known parameters */, + known_parameters /* name of known parameters */, + required_parameters /* Are they required or optional? */, + has_values /* Is there any value followed by the key? */); + + if (!opts) { + fprintf(LOGFILE, "Failed to parse input command line\n"); + return -1; + } + + int* minor_devices; + char* container_id; + int n_minor_devices_to_block = 0; + + // Check command line parameters + for (int i = 0; i < opts->n_options; i++) { + if (0 == strcmp(EXCLUDED_GPUS_OPTION, opts->keys[i])) { + int rc = get_numbers_split_by_comma(opts->values[i], &minor_devices, &n_minor_devices_to_block); + if (0 != rc) { + fprintf(LOGFILE, + "Failed to get minor devices number from command line, value=%s\n", + opts->values[i]); + return -1; + } + } else if (0 == strcmp(CONTAINER_ID_OPTION, opts->keys[i])) { + if (!validate_container_id(opts->values[i])) { + fprintf(LOGFILE, + "Specified container_id=%s is invalid\n", opts->values[i]); + return -1; + } + container_id = opts->values[i]; + } + } + + return internal_handle_gpu_request(n_minor_devices_to_block, minor_devices, container_id); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h new file mode 100644 index 00000000000..90e5f4ec640 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _MODULES_GPU_GPU_MUDULE_H_ +#define _MODULES_GPU_GPU_MUDULE_H_ + +/** + * Handle gpu requests + */ +int handle_gpu_request(const char* module_name, int argc, char** argv); + +#endif \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/command-line-parser.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/command-line-parser.c new file mode 100644 index 00000000000..762674db25f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/command-line-parser.c @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command-line-parser.h" +#include "container-executor.h" + +#include +#include +#include + +struct parsed_command_line* parse_commandline_opts(int argc, char** argv, int n_known_parameters, + const char** known_parameters, const int required[], const int has_values[]) { + struct parsed_command_line* opts = malloc(sizeof(struct parsed_command_line)); + if (!opts) { + fprintf(LOGFILE, "Failed to malloc parsed_command_options\n"); + return NULL; + } + + opts->keys = malloc(sizeof(char*) * (argc + 1)); + opts->values = malloc(sizeof(char*) * (argc + 1)); + + if (!opts->keys || !opts->values) { + fprintf(LOGFILE, "Failed to malloc keys or values of opts\n"); + return NULL; + } + + // Validate inputs + // Make sure all option-related are not null + if (!(known_parameters && required && has_values)) { + fprintf(LOGFILE, + "Please make sure known_parameters / required / has_values are set\n"); + return NULL; + } + + // Start parse commandline + int input_argv_idx = 0; + opts->n_options = 0; + + while (input_argv_idx < argc) { + // get parameter_name + char* param_name = argv[input_argv_idx]; + + // make sure param_name start with "--" + if (0 != strncmp("--", param_name, 2)) { + fprintf(LOGFILE, "option %s is not started with \"--\"\n", param_name); + return NULL; + } + + // Exclude "--" prefix + param_name += 2; + + int param_idx = -1; + + for (int i = 0; i < n_known_parameters; i++) { + if (0 == strcmp(known_parameters[i], param_name)) { + param_idx = i; + break; + } + } + + if (param_idx < 0) { + fprintf(LOGFILE, "cannot find parameter %s from known parameters\n", param_name); + return NULL; + } + + opts->keys[opts->n_options] = param_name; + + // Check if we need value followed by the param + if (has_values[param_idx]) { + // Parse value + input_argv_idx++; + + if (input_argv_idx >= argc) { + fprintf(LOGFILE, "unexpected end of commandline while parsing param=%s\n", + param_name); + return NULL; + } + + opts->values[opts->n_options] = argv[input_argv_idx]; + } + + opts->n_options++; + input_argv_idx++; + } + + // Make sure all required parameters are set + for (int i = 0; i < n_known_parameters; i++) { + if (required[i]) { + const char* required_key = known_parameters[i]; + + int find = 0; + for (int j = 0; j < opts->n_options; j++) { + if (0 == strcmp(opts->keys[j], required_key)) { + find = 1; + break; + } + } + + if (!find) { + fprintf(LOGFILE, "%s is required but not specified in commandline.\n", + required_key); + return NULL; + } + } + } + + return opts; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/command-line-parser.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/command-line-parser.h new file mode 100644 index 00000000000..4a99061b866 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/command-line-parser.h @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _UTILS_COMMAND_LINE_PARSER_H_ +#define _UTILS_COMMAND_LINE_PARSER_H_ + +struct parsed_command_line { + int n_options; + char** keys; + char** values; +}; + +/* + * Return a parsed commandline options. + * As usual: + * - argc / argv + * + * In addition to that, you need to specify: + * - known_parameters, without "--" + * - if these parameters are required (1 is required) + * - is there any values followed by the option (1 means has value) + */ +struct parsed_command_line* parse_commandline_opts(int argc, char** argv, + int n_known_parameters, const char** known_parameters, + const int required[], const int has_values[]); + +#endif diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/string-utils.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/string-utils.c new file mode 100644 index 00000000000..8eae157cb76 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/string-utils.c @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +/* + * if all chars in the input str are numbers + * return true/false + */ +static int all_numbers(char* input) { + if (strlen(input) == 0) { + return 0; + } + + for (int i = 0; i < strlen(input); i++) { + if (input[i] < '0' || input[i] > '9') { + return 0; + } + } + return 1; +} + +int get_numbers_split_by_comma(char* input, int** numbers, int* ret_n_numbers) { + int n_numbers = 1; + for (int i = 0; i < strlen(input); i++) { + if (input[i] == ',') { + n_numbers++; + } + } + + int* arr = (*numbers); + arr = malloc(sizeof(int) * n_numbers); + if (!arr) { + return -1; + } + + char* input_cpy = malloc(strlen(input)); + strcpy(input_cpy, input); + + char* p = strtok(input_cpy, ","); + int idx = 0; + while (p != NULL) { + int n = atoi(p); + arr[idx] = n; + p = strtok(NULL, ","); + idx++; + } + + free(input_cpy); + *ret_n_numbers = n_numbers; + + return 0; +} + +int validate_container_id(char* input) { + /* + * Two different forms of container_id + * container_e17_1410901177871_0001_01_000005 + * container_1410901177871_0001_01_000005 + */ + char* input_cpy = malloc(strlen(input)); + strcpy(input_cpy, input); + char* p = strtok(input_cpy, "_"); + int idx = 0; + while (p != NULL) { + if (0 == idx) { + if (0 != strcmp("container", p)) { + return 0; + } + } else if (1 == idx) { + // this could be e[n][n], or [n][n]... + if (!all_numbers(p)) { + if (strlen(p) == 0) { + return 0; + } + if (p[0] != 'e') { + return 0; + } + if (!all_numbers(p + 1)) { + return 0; + } + } + } else { + // otherwise, should be all numbers + if (!all_numbers(p)) { + return 0; + } + } + + p = strtok(NULL, "_"); + idx++; + } + free(input_cpy); + + // We should at most 6 elements split by '_' + if (idx > 6) { + return 0; + } + return 1; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/string-utils.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/string-utils.h new file mode 100644 index 00000000000..25f54a2016c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/string-utils.h @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _UTILS_STRING_UTILS_H_ +#define _UTILS_STRING_UTILS_H_ + +/* + * Get numbers split by comma from a input string + * return 0 if succeeded + */ +int get_numbers_split_by_comma(char* input, int** numbers, int* n_numbers); + +/* + * Get numbers split by comma from a input string + * return false/true + */ +int validate_container_id(char* input); + +#endif diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestGpuResourceHandler.java new file mode 100644 index 00000000000..b294b7aae8a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestGpuResourceHandler.java @@ -0,0 +1,167 @@ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestGpuResourceHandler { + private CGroupsHandler mockCGroupsHandler; + private PrivilegedOperationExecutor mockPrivilegedExecutor; + private GpuResourceHandlerImpl gpuResourceHandler; + + @Before + public void setup() { + mockCGroupsHandler = mock(CGroupsHandler.class); + mockPrivilegedExecutor = mock(PrivilegedOperationExecutor.class); + + gpuResourceHandler = new GpuResourceHandlerImpl(mockCGroupsHandler, + mockPrivilegedExecutor); + } + + @Test + public void testBootStrap() throws ResourceHandlerException { + Configuration conf = new YarnConfiguration(); + + gpuResourceHandler.bootstrap(conf); + verify(mockCGroupsHandler, times(1)).initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); + } + + private static ContainerId getContainerId(int id) { + return ContainerId.newContainerId(ApplicationAttemptId + .newInstance(ApplicationId.newInstance(1234L, 1), 1), id); + } + + private static Container mockContainer(int id, int numGpuRequest) { + Container c = mock(Container.class); + when(c.getContainerId()).thenReturn(getContainerId(id)); + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + Map envs = new HashMap<>(); + when(clc.getEnvironment()).thenReturn(envs); + envs.put("REQUESTED_GPU_NUM", String.valueOf(numGpuRequest)); + when(c.getLaunchContext()).thenReturn(clc); + return c; + } + + private void verifyDeniedDevices(ContainerId containerId, List deniedDevices) + throws ResourceHandlerException, PrivilegedOperationException { + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, containerId.toString()); + + if (null != deniedDevices && !deniedDevices.isEmpty()) { + verify(mockPrivilegedExecutor, times(1)).executePrivilegedOperation( + new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, Arrays + .asList(GpuResourceHandlerImpl.EXCLUDED_GPUS_CLI_OPTION, + StringUtils.join(",", deniedDevices), + GpuResourceHandlerImpl.CONTAINER_ID_CLI_OPTION, + containerId.toString())), true); + } + } + + @Test + public void testAllocation() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4"); + conf.setBoolean(YarnConfiguration.NM_GPU_RESOURCE_ENABLED, true); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 3 containers */ + gpuResourceHandler.preStart(mockContainer(1, 3)); + + // Only device=4 will be blocked. + verifyDeniedDevices(getContainerId(1), Arrays.asList(4)); + + /* Start container 2, asks 2 containers. Excepted to fail */ + boolean failedToAllocate = false; + try { + gpuResourceHandler.preStart(mockContainer(2, 2)); + } catch (ResourceHandlerException e) { + failedToAllocate = true; + } + Assert.assertTrue(failedToAllocate); + + /* Start container 3, ask 1 container, succeeded */ + gpuResourceHandler.preStart(mockContainer(3, 1)); + + // devices = 0/1/3 will be blocked + verifyDeniedDevices(getContainerId(3), Arrays.asList(0, 1, 3)); + + /* Start container 4, ask 0 container, succeeded */ + gpuResourceHandler.preStart(mockContainer(4, 0)); + + // All devices will be blocked + verifyDeniedDevices(getContainerId(4), Arrays.asList(0, 1, 3, 4)); + + /* Release container-1, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(1)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); + Assert.assertEquals(3, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Release container-3, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(3)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(3).toString()); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } + + @Test + public void testAllocationWithoutAllowedGpus() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, ""); + conf.setBoolean(YarnConfiguration.NM_GPU_RESOURCE_ENABLED, true); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(0, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 0 containers */ + gpuResourceHandler.preStart(mockContainer(1, 0)); + verifyDeniedDevices(getContainerId(1), Arrays.asList()); + + /* Start container 2, asks 1 containers. Excepted to fail */ + boolean failedToAllocate = false; + try { + gpuResourceHandler.preStart(mockContainer(2, 1)); + } catch (ResourceHandlerException e) { + failedToAllocate = true; + } + Assert.assertTrue(failedToAllocate); + + /* Release container 1, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(1)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); + Assert.assertEquals(0, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } +}