diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
index 85ddca90dad..3b671e85ab2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
@@ -23,9 +23,6 @@
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
-import org.apache.hadoop.yarn.api.CsiAdaptorProtocol;
-import org.apache.hadoop.yarn.api.impl.pb.client.CsiAdaptorProtocolPBClientImpl;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context;
@@ -41,7 +38,6 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.volume.csi.ContainerVolumePublisher;
import org.apache.hadoop.yarn.util.DockerClientConfigHandler;
-import org.apache.hadoop.yarn.util.csi.CsiConfigUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -50,9 +46,6 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.registry.client.api.RegistryConstants;
import org.apache.hadoop.registry.client.binding.RegistryPathUtils;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.authorize.AccessControlList;
-import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
@@ -67,7 +60,6 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerInspectCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerExecContext;
@@ -75,27 +67,18 @@
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
-import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
-import java.nio.file.Files;
-import java.nio.file.Paths;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*;
/**
- *
This class is a {@link ContainerRuntime} implementation that uses the
+ *
This class is an extension of {@link OCIContainerRuntime} that uses the
* native {@code container-executor} binary via a
* {@link PrivilegedOperationExecutor} instance to launch processes inside
* Docker containers.
@@ -111,18 +94,6 @@
* be used. This environment variable is checked by the
* {@link #isDockerContainerRequested} method, which is called by the
* {@link DelegatingLinuxContainerRuntime}.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_IMAGE} names which image
- * will be used to launch the Docker container.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE} controls
- * whether the Docker container's default command is overridden. When set
- * to {@code true}, the Docker container's command will be
- * {@code bash }. When unset or set to {@code false}
- * the Docker container's default command is used.
- *
*
* {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK} sets the
* network type to be used by the Docker container. It must be a valid
@@ -130,167 +101,17 @@
* {@code yarn.nodemanager.runtime.linux.docker.allowed-container-networks}
* property.
*
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_PORTS_MAPPING} allows users to
- * specify ports mapping for the bridge network Docker container. The value
- * of the environment variable should be a comma-separated list of ports
- * mapping. It's the same to "-p" option for the Docker run command. If the
- * value is empty, "-P" will be added.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE}
- * controls which PID namespace will be used by the Docker container. By
- * default, each Docker container has its own PID namespace. To share the
- * namespace of the host, the
- * {@code yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed}
- * property must be set to {@code true}. If the host PID namespace is
- * allowed and this environment variable is set to {@code host}, the
- * Docker container will share the host's PID namespace. No other value is
- * allowed.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME} sets the
- * hostname to be used by the Docker container. If not specified, a
- * hostname will be derived from the container ID and set as default
- * hostname for networks other than 'host'.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER}
- * controls whether the Docker container is a privileged container. In order
- * to use privileged containers, the
- * {@code yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed}
- * property must be set to {@code true}, and the application owner must
- * appear in the value of the
- * {@code yarn.nodemanager.runtime.linux.docker.privileged-containers.acl}
- * property. If this environment variable is set to {@code true}, a
- * privileged Docker container will be used if allowed. No other value is
- * allowed, so the environment variable should be left unset rather than
- * setting it to false.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS} allows users to specify
- + additional volume mounts for the Docker container. The value of the
- * environment variable should be a comma-separated list of mounts.
- * All such mounts must be given as {@code source:dest[:mode]} and the mode
- * must be "ro" (read-only) or "rw" (read-write) to specify the type of
- * access being requested. If neither is specified, read-write will be
- * assumed. The mode may include a bind propagation option. In that case,
- * the mode should either be of the form [option], rw+[option], or
- * ro+[option]. Valid bind propagation options are shared, rshared, slave,
- * rslave, private, and rprivate. The requested mounts will be validated by
- * container-executor based on the values set in container-executor.cfg for
- * {@code docker.allowed.ro-mounts} and {@code docker.allowed.rw-mounts}.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS} allows users to
- * specify additional tmpfs mounts for the Docker container. The value of
- * the environment variable should be a comma-separated list of mounts.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL} allows a user
- * to request delayed deletion of the Docker containers on a per
- * container basis. If true, Docker containers will not be removed until
- * the duration defined by {@code yarn.nodemanager.delete.debug-delay-sec}
- * has elapsed. Administrators can disable this feature through the
- * yarn-site property
- * {@code yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed}.
- * This feature is disabled by default. When this feature is disabled or set
- * to false, the container will be removed as soon as it exits.
- *
- *
- * {@code YARN_CONTAINER_RUNTIME_YARN_SYSFS_ENABLE} allows export yarn
- * service json to docker container. This feature is disabled by default.
- * when this feature is set, app.json will be available in
- * /hadoop/yarn/sysfs/app.json.
- *
*
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
-public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
+public class DockerLinuxContainerRuntime extends OCIContainerRuntime {
private static final Logger LOG =
LoggerFactory.getLogger(DockerLinuxContainerRuntime.class);
- // This validates that the image is a proper docker image
- public static final String DOCKER_IMAGE_PATTERN =
- "^(([a-zA-Z0-9.-]+)(:\\d+)?/)?([a-z0-9_./-]+)(:[\\w.-]+)?$";
- private static final Pattern dockerImagePattern =
- Pattern.compile(DOCKER_IMAGE_PATTERN);
- public static final String HOSTNAME_PATTERN =
- "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$";
- private static final Pattern hostnamePattern = Pattern.compile(
- HOSTNAME_PATTERN);
- private static final Pattern USER_MOUNT_PATTERN = Pattern.compile(
- "(?<=^|,)([^:\\x00]+):([^:\\x00]+)" +
- "(:(r[ow]|(r[ow][+])?(r?shared|r?slave|r?private)))?(?:,|$)");
- private static final Pattern TMPFS_MOUNT_PATTERN = Pattern.compile(
- "^/[^:\\x00]+$");
- public static final String PORTS_MAPPING_PATTERN =
- "^:[0-9]+|^[0-9]+:[0-9]+|^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]" +
- "|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" +
- ":[0-9]+:[0-9]+$";
- private static final int HOST_NAME_LENGTH = 64;
- private static final String DEFAULT_PROCFS = "/proc";
-
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_IMAGE =
- "YARN_CONTAINER_RUNTIME_DOCKER_IMAGE";
@InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_RUN_OVERRIDE_DISABLE =
- "YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_NETWORK =
+ static final String ENV_DOCKER_CONTAINER_NETWORK =
"YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_PID_NAMESPACE =
- "YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_HOSTNAME =
- "YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER =
- "YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_MOUNTS =
- "YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_TMPFS_MOUNTS =
- "YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_DELAYED_REMOVAL =
- "YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_PORTS_MAPPING =
- "YARN_CONTAINER_RUNTIME_DOCKER_PORTS_MAPPING";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_YARN_SYSFS =
- "YARN_CONTAINER_RUNTIME_YARN_SYSFS_ENABLE";
- @InterfaceAudience.Private
- public static final String ENV_DOCKER_CONTAINER_DOCKER_RUNTIME =
- "YARN_CONTAINER_RUNTIME_DOCKER_RUNTIME";
- public static final String YARN_SYSFS_PATH =
- "/hadoop/yarn/sysfs";
- private Configuration conf;
- private Context nmContext;
- private DockerClient dockerClient;
- private Map csiClients = new HashMap<>();
- private PrivilegedOperationExecutor privilegedOperationExecutor;
- private String defaultImageName;
- private Boolean defaultImageUpdate;
- private Set allowedNetworks = new HashSet<>();
- private Set allowedRuntimes = new HashSet<>();
- private String defaultNetwork;
- private String defaultRuntime;
- private CGroupsHandler cGroupsHandler;
- private AccessControlList privilegedContainersAcl;
- private boolean enableUserReMapping;
- private int userRemappingUidThreshold;
- private int userRemappingGidThreshold;
- private Set capabilities;
- private boolean delayedRemovalAllowed;
- private Set defaultROMounts = new HashSet<>();
- private Set defaultRWMounts = new HashSet<>();
- private Set defaultTmpfsMounts = new HashSet<>();
/**
* Return whether the given environment variables indicate that the operation
@@ -338,93 +159,17 @@ public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
public DockerLinuxContainerRuntime(
PrivilegedOperationExecutor privilegedOperationExecutor,
CGroupsHandler cGroupsHandler) {
- this.privilegedOperationExecutor = privilegedOperationExecutor;
-
- if (cGroupsHandler == null) {
- LOG.info("cGroupsHandler is null - cgroups not in use.");
- } else {
- this.cGroupsHandler = cGroupsHandler;
- }
+ super(privilegedOperationExecutor, cGroupsHandler);
}
@Override
public void initialize(Configuration conf, Context nmContext)
throws ContainerExecutionException {
- this.nmContext = nmContext;
- this.conf = conf;
+ super.initialize(conf, nmContext);
dockerClient = new DockerClient();
- allowedNetworks.clear();
- allowedRuntimes.clear();
- defaultROMounts.clear();
- defaultRWMounts.clear();
- defaultTmpfsMounts.clear();
- defaultImageName = conf.getTrimmed(
- YarnConfiguration.NM_DOCKER_IMAGE_NAME, "");
- defaultImageUpdate = conf.getBoolean(
- YarnConfiguration.NM_DOCKER_IMAGE_UPDATE, false);
- allowedNetworks.addAll(Arrays.asList(
- conf.getTrimmedStrings(
- YarnConfiguration.NM_DOCKER_ALLOWED_CONTAINER_NETWORKS,
- YarnConfiguration.DEFAULT_NM_DOCKER_ALLOWED_CONTAINER_NETWORKS)));
- defaultNetwork = conf.getTrimmed(
- YarnConfiguration.NM_DOCKER_DEFAULT_CONTAINER_NETWORK,
- YarnConfiguration.DEFAULT_NM_DOCKER_DEFAULT_CONTAINER_NETWORK);
- allowedRuntimes.addAll(Arrays.asList(
- conf.getTrimmedStrings(
- YarnConfiguration.NM_DOCKER_ALLOWED_CONTAINER_RUNTIMES,
- YarnConfiguration.DEFAULT_NM_DOCKER_ALLOWED_CONTAINER_RUNTIMES)));
-
- if(!allowedNetworks.contains(defaultNetwork)) {
- String message = "Default network: " + defaultNetwork
- + " is not in the set of allowed networks: " + allowedNetworks;
-
- if (LOG.isWarnEnabled()) {
- LOG.warn(message + ". Please check configuration");
- }
-
- throw new ContainerExecutionException(message);
- }
// initialize csi adaptors if necessary
initiateCsiClients(conf);
-
- privilegedContainersAcl = new AccessControlList(conf.getTrimmed(
- YarnConfiguration.NM_DOCKER_PRIVILEGED_CONTAINERS_ACL,
- YarnConfiguration.DEFAULT_NM_DOCKER_PRIVILEGED_CONTAINERS_ACL));
-
- enableUserReMapping = conf.getBoolean(
- YarnConfiguration.NM_DOCKER_ENABLE_USER_REMAPPING,
- YarnConfiguration.DEFAULT_NM_DOCKER_ENABLE_USER_REMAPPING);
-
- userRemappingUidThreshold = conf.getInt(
- YarnConfiguration.NM_DOCKER_USER_REMAPPING_UID_THRESHOLD,
- YarnConfiguration.DEFAULT_NM_DOCKER_USER_REMAPPING_UID_THRESHOLD);
-
- userRemappingGidThreshold = conf.getInt(
- YarnConfiguration.NM_DOCKER_USER_REMAPPING_GID_THRESHOLD,
- YarnConfiguration.DEFAULT_NM_DOCKER_USER_REMAPPING_GID_THRESHOLD);
-
- capabilities = getDockerCapabilitiesFromConf();
-
- delayedRemovalAllowed = conf.getBoolean(
- YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL,
- YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL);
-
- defaultROMounts.addAll(Arrays.asList(
- conf.getTrimmedStrings(
- YarnConfiguration.NM_DOCKER_DEFAULT_RO_MOUNTS)));
-
- defaultRWMounts.addAll(Arrays.asList(
- conf.getTrimmedStrings(
- YarnConfiguration.NM_DOCKER_DEFAULT_RW_MOUNTS)));
-
- defaultTmpfsMounts.addAll(Arrays.asList(
- conf.getTrimmedStrings(
- YarnConfiguration.NM_DOCKER_DEFAULT_TMPFS_MOUNTS)));
- }
-
- public Map getCsiClients() {
- return csiClients;
}
@Override
@@ -432,28 +177,6 @@ public boolean isRuntimeRequested(Map env) {
return isDockerContainerRequested(conf, env);
}
- private Set getDockerCapabilitiesFromConf() throws
- ContainerExecutionException {
- Set caps = new HashSet<>(Arrays.asList(
- conf.getTrimmedStrings(
- YarnConfiguration.NM_DOCKER_CONTAINER_CAPABILITIES,
- YarnConfiguration.DEFAULT_NM_DOCKER_CONTAINER_CAPABILITIES)));
- if(caps.contains("none") || caps.contains("NONE")) {
- if(caps.size() > 1) {
- String msg = "Mixing capabilities with the none keyword is" +
- " not supported";
- throw new ContainerExecutionException(msg);
- }
- caps = Collections.emptySet();
- }
-
- return caps;
- }
-
- public Set getCapabilities() {
- return capabilities;
- }
-
private String runDockerVolumeCommand(DockerVolumeCommand dockerVolumeCommand,
Container container) throws ContainerExecutionException {
try {
@@ -482,11 +205,6 @@ private String runDockerVolumeCommand(DockerVolumeCommand dockerVolumeCommand,
}
- @Override
- public void prepareContainer(ContainerRuntimeContext ctx)
- throws ContainerExecutionException {
- }
-
private void checkDockerVolumeCreated(
DockerVolumeCommand dockerVolumeCreationCommand, Container container)
throws ContainerExecutionException {
@@ -527,87 +245,6 @@ private void checkDockerVolumeCreated(
throw new ContainerExecutionException(message);
}
- private void validateContainerNetworkType(String network)
- throws ContainerExecutionException {
- if (allowedNetworks.contains(network)) {
- return;
- }
-
- String msg = "Disallowed network: '" + network
- + "' specified. Allowed networks: are " + allowedNetworks
- .toString();
- throw new ContainerExecutionException(msg);
- }
-
- private void validateContainerRuntimeType(String runtime)
- throws ContainerExecutionException {
- if (runtime == null || runtime.isEmpty()
- || allowedRuntimes.contains(runtime)) {
- return;
- }
-
- String msg = "Disallowed runtime: '" + runtime
- + "' specified. Allowed networks: are " + allowedRuntimes
- .toString();
- throw new ContainerExecutionException(msg);
- }
-
- /**
- * Return whether the YARN container is allowed to run using the host's PID
- * namespace for the Docker container. For this to be allowed, the submitting
- * user must request the feature and the feature must be enabled on the
- * cluster.
- *
- * @param container the target YARN container
- * @return whether host pid namespace is requested and allowed
- * @throws ContainerExecutionException if host pid namespace is requested
- * but is not allowed
- */
- private boolean allowHostPidNamespace(Container container)
- throws ContainerExecutionException {
- Map environment = container.getLaunchContext()
- .getEnvironment();
- String pidNamespace = environment.get(ENV_DOCKER_CONTAINER_PID_NAMESPACE);
-
- if (pidNamespace == null) {
- return false;
- }
-
- if (!pidNamespace.equalsIgnoreCase("host")) {
- LOG.warn("NOT requesting PID namespace. Value of " +
- ENV_DOCKER_CONTAINER_PID_NAMESPACE + "is invalid: " + pidNamespace);
- return false;
- }
-
- boolean hostPidNamespaceEnabled = conf.getBoolean(
- YarnConfiguration.NM_DOCKER_ALLOW_HOST_PID_NAMESPACE,
- YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_HOST_PID_NAMESPACE);
-
- if (!hostPidNamespaceEnabled) {
- String message = "Host pid namespace being requested but this is not "
- + "enabled on this cluster";
- LOG.warn(message);
- throw new ContainerExecutionException(message);
- }
-
- return true;
- }
-
- public static void validateHostname(String hostname) throws
- ContainerExecutionException {
- if (hostname != null && !hostname.isEmpty()) {
- if (!hostnamePattern.matcher(hostname).matches()) {
- throw new ContainerExecutionException("Hostname '" + hostname
- + "' doesn't match docker hostname pattern");
- }
- if (hostname.length() > HOST_NAME_LENGTH) {
- throw new ContainerExecutionException(
- "Hostname can not be greater than " + HOST_NAME_LENGTH
- + " characters: " + hostname);
- }
- }
- }
-
/** Set a DNS friendly hostname.
* Only add hostname if network is not host or if hostname is
* specified via YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME
@@ -647,7 +284,7 @@ private void setHostname(DockerRunCommand runCommand,
* @param runCommand the command to set with the CGROUP parent
*/
@VisibleForTesting
- protected void addCGroupParentIfRequired(String resourcesOptions,
+ void addCGroupParentIfRequired(String resourcesOptions,
String containerIdStr, DockerRunCommand runCommand) {
if (cGroupsHandler == null) {
LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to"
@@ -671,148 +308,6 @@ protected void addCGroupParentIfRequired(String resourcesOptions,
}
}
- /**
- * Return whether the YARN container is allowed to run in a privileged
- * Docker container. For a privileged container to be allowed all of the
- * following three conditions must be satisfied:
- *
- *
- * - Submitting user must request for a privileged container
- * - Privileged containers must be enabled on the cluster
- * - Submitting user must be white-listed to run a privileged
- * container
- *
- *
- * @param container the target YARN container
- * @return whether privileged container execution is allowed
- * @throws ContainerExecutionException if privileged container execution
- * is requested but is not allowed
- */
- private boolean allowPrivilegedContainerExecution(Container container)
- throws ContainerExecutionException {
-
- if(!isContainerRequestedAsPrivileged(container)) {
- return false;
- }
-
- LOG.info("Privileged container requested for : " + container
- .getContainerId().toString());
-
- //Ok, so we have been asked to run a privileged container. Security
- // checks need to be run. Each violation is an error.
-
- //check if privileged containers are enabled.
- boolean privilegedContainersEnabledOnCluster = conf.getBoolean(
- YarnConfiguration.NM_DOCKER_ALLOW_PRIVILEGED_CONTAINERS,
- YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_PRIVILEGED_CONTAINERS);
-
- if (!privilegedContainersEnabledOnCluster) {
- String message = "Privileged container being requested but privileged "
- + "containers are not enabled on this cluster";
- LOG.warn(message);
- throw new ContainerExecutionException(message);
- }
-
- //check if submitting user is in the whitelist.
- String submittingUser = container.getUser();
- UserGroupInformation submitterUgi = UserGroupInformation
- .createRemoteUser(submittingUser);
-
- if (!privilegedContainersAcl.isUserAllowed(submitterUgi)) {
- String message = "Cannot launch privileged container. Submitting user ("
- + submittingUser + ") fails ACL check.";
- LOG.warn(message);
- throw new ContainerExecutionException(message);
- }
-
- LOG.info("All checks pass. Launching privileged container for : "
- + container.getContainerId().toString());
-
- return true;
- }
-
- /**
- * This function only returns whether a privileged container was requested,
- * not whether the container was or will be launched as privileged.
- * @param container
- * @return
- */
- private boolean isContainerRequestedAsPrivileged(
- Container container) {
- String runPrivilegedContainerEnvVar = container.getLaunchContext()
- .getEnvironment().get(ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER);
- return Boolean.parseBoolean(runPrivilegedContainerEnvVar);
- }
-
- @VisibleForTesting
- private String mountReadOnlyPath(String mount,
- Map> localizedResources)
- throws ContainerExecutionException {
- for (Entry> resource : localizedResources.entrySet()) {
- if (resource.getValue().contains(mount)) {
- java.nio.file.Path path = Paths.get(resource.getKey().toString());
- if (!path.isAbsolute()) {
- throw new ContainerExecutionException("Mount must be absolute: " +
- mount);
- }
- if (Files.isSymbolicLink(path)) {
- throw new ContainerExecutionException("Mount cannot be a symlink: " +
- mount);
- }
- return path.toString();
- }
- }
- throw new ContainerExecutionException("Mount must be a localized " +
- "resource: " + mount);
- }
-
- private String getUserIdInfo(String userName)
- throws ContainerExecutionException {
- String id = "";
- Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
- new String[]{"id", "-u", userName});
- try {
- shexec.execute();
- id = shexec.getOutput().replaceAll("[^0-9]", "");
- } catch (Exception e) {
- throw new ContainerExecutionException(e);
- }
- return id;
- }
-
- private String[] getGroupIdInfo(String userName)
- throws ContainerExecutionException {
- String[] id = null;
- Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
- new String[]{"id", "-G", userName});
- try {
- shexec.execute();
- id = shexec.getOutput().replace("\n", "").split(" ");
- } catch (Exception e) {
- throw new ContainerExecutionException(e);
- }
- return id;
- }
-
- /**
- * Check if system is default to disable docker override or
- * user requested a Docker container with ENTRY_POINT support.
- *
- * @param environment - Docker container environment variables
- * @return true if Docker launch command override is disabled
- */
- private boolean checkUseEntryPoint(Map environment) {
- boolean overrideDisable = false;
- String overrideDisableKey = Environment.
- YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE.
- name();
- String overrideDisableValue = (environment.get(overrideDisableKey) != null)
- ? environment.get(overrideDisableKey) :
- System.getenv(overrideDisableKey);
- overrideDisable = Boolean.parseBoolean(overrideDisableValue);
- return overrideDisable;
- }
-
@Override
public void launchContainer(ContainerRuntimeContext ctx)
throws ContainerExecutionException {
@@ -1141,49 +636,6 @@ public void relaunchContainer(ContainerRuntimeContext ctx)
}
- /**
- * Signal the docker container.
- *
- * Signals are used to check the liveliness of the container as well as to
- * stop/kill the container. The following outlines the docker container
- * signal handling.
- *
- *
- * - If the null signal is sent, run kill -0 on the pid. This is used
- * to check if the container is still alive, which is necessary for
- * reacquiring containers on NM restart.
- * - If SIGTERM, SIGKILL is sent, attempt to stop and remove the docker
- * container.
- * - If the docker container exists and is running, execute docker
- * stop.
- * - If any other signal is sent, signal the container using docker
- * kill.
- *
- *
- * @param ctx the {@link ContainerRuntimeContext}.
- * @throws ContainerExecutionException if the signaling fails.
- */
- @Override
- public void signalContainer(ContainerRuntimeContext ctx)
- throws ContainerExecutionException {
- ContainerExecutor.Signal signal = ctx.getExecutionAttribute(SIGNAL);
- Map env =
- ctx.getContainer().getLaunchContext().getEnvironment();
- try {
- if (ContainerExecutor.Signal.NULL.equals(signal)) {
- executeLivelinessCheck(ctx);
- } else if (ContainerExecutor.Signal.TERM.equals(signal)) {
- ContainerId containerId = ctx.getContainer().getContainerId();
- handleContainerStop(containerId, env);
- } else {
- handleContainerKill(ctx, env, signal);
- }
- } catch (ContainerExecutionException e) {
- throw new ContainerExecutionException("Signal docker container failed",
- e.getExitCode(), e.getOutput(), e.getErrorOutput());
- }
- }
-
/**
* Reap the docker container.
*
@@ -1322,8 +774,7 @@ public IOStreamPair execContainer(ContainerExecContext ctx)
}
@Override
- public String getExposedPorts(Container container)
- throws ContainerExecutionException {
+ public String getExposedPorts(Container container) {
ContainerId containerId = container.getContainerId();
String containerIdStr = containerId.toString();
DockerInspectCommand inspectCommand =
@@ -1391,19 +842,7 @@ private PrivilegedOperation buildLaunchOp(ContainerRuntimeContext ctx,
return launchOp;
}
- public static void validateImageName(String imageName)
- throws ContainerExecutionException {
- if (imageName == null || imageName.isEmpty()) {
- throw new ContainerExecutionException(
- ENV_DOCKER_CONTAINER_IMAGE + " not set!");
- }
- if (!dockerImagePattern.matcher(imageName).matches()) {
- throw new ContainerExecutionException("Image name '" + imageName
- + "' doesn't match docker image name pattern");
- }
- }
-
- public void pullImageFromRemote(String containerIdStr, String imageName)
+ private void pullImageFromRemote(String containerIdStr, String imageName)
throws ContainerExecutionException {
long start = System.currentTimeMillis();
DockerPullCommand dockerPullCommand = new DockerPullCommand(imageName);
@@ -1421,21 +860,6 @@ public void pullImageFromRemote(String containerIdStr, String imageName)
+ " container: {}", pullImageTimeMs, imageName, containerIdStr);
}
- private void executeLivelinessCheck(ContainerRuntimeContext ctx)
- throws ContainerExecutionException {
- String procFs = ctx.getExecutionAttribute(PROCFS);
- if (procFs == null || procFs.isEmpty()) {
- procFs = DEFAULT_PROCFS;
- }
- String pid = ctx.getExecutionAttribute(PID);
- if (!new File(procFs + File.separator + pid).exists()) {
- String msg = "Liveliness check failed for PID: " + pid
- + ". Container may have already completed.";
- throw new ContainerExecutionException(msg,
- PrivilegedOperation.ResultCode.INVALID_CONTAINER_PID.getValue());
- }
- }
-
/**
* Handles a docker container stop by first finding the {@code STOPSIGNAL}
* using docker inspect and then executing
@@ -1452,7 +876,7 @@ private void executeLivelinessCheck(ContainerRuntimeContext ctx)
* @param env env
* @throws ContainerExecutionException
*/
- private void handleContainerStop(ContainerId containerId,
+ void handleContainerStop(ContainerId containerId,
Map env)
throws ContainerExecutionException {
@@ -1506,7 +930,7 @@ private String executeDockerInspect(ContainerId containerId,
return output;
}
- private void handleContainerKill(ContainerRuntimeContext ctx,
+ void handleContainerKill(ContainerRuntimeContext ctx,
Map env,
ContainerExecutor.Signal signal) throws ContainerExecutionException {
Container container = ctx.getContainer();
@@ -1561,7 +985,7 @@ private void handleContainerKill(ContainerRuntimeContext ctx,
}
}
- private void handleContainerRemove(String containerId,
+ void handleContainerRemove(String containerId,
Map env) throws ContainerExecutionException {
String delayedRemoval = env.get(ENV_DOCKER_CONTAINER_DELAYED_REMOVAL);
if (delayedRemovalAllowed && delayedRemoval != null
@@ -1613,34 +1037,4 @@ private void addDockerClientConfigToRunCommand(ContainerRuntimeContext ctx,
}
}
}
-
- /**
- * Initiate CSI clients to talk to the CSI adaptors on this node and
- * cache the clients for easier fetch.
- * @param config configuration
- * @throws ContainerExecutionException
- */
- private void initiateCsiClients(Configuration config)
- throws ContainerExecutionException {
- String[] driverNames = CsiConfigUtils.getCsiDriverNames(config);
- if (driverNames != null && driverNames.length > 0) {
- for (String driverName : driverNames) {
- try {
- // find out the adaptors service address
- InetSocketAddress adaptorServiceAddress =
- CsiConfigUtils.getCsiAdaptorAddressForDriver(driverName, config);
- LOG.info("Initializing a csi-adaptor-client for csi-adaptor {},"
- + " csi-driver {}", adaptorServiceAddress.toString(), driverName);
- CsiAdaptorProtocolPBClientImpl client =
- new CsiAdaptorProtocolPBClientImpl(1L, adaptorServiceAddress,
- config);
- csiClients.put(driverName, client);
- } catch (IOException e1) {
- throw new ContainerExecutionException(e1.getMessage());
- } catch (YarnException e2) {
- throw new ContainerExecutionException(e2.getMessage());
- }
- }
- }
- }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/OCIContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/OCIContainerRuntime.java
new file mode 100644
index 00000000000..a8ad7ce539b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/OCIContainerRuntime.java
@@ -0,0 +1,686 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.AccessControlList;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.CsiAdaptorProtocol;
+import org.apache.hadoop.yarn.api.impl.pb.client.CsiAdaptorProtocolPBClientImpl;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntime;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
+import org.apache.hadoop.yarn.util.csi.CsiConfigUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*;
+
+/**
+ * This class is a {@link ContainerRuntime} implementation that uses the
+ * native {@code container-executor} binary via a
+ * {@link PrivilegedOperationExecutor} instance to launch processes inside
+ * Docker containers.
+ *
+ * The following environment variables are used to configure the OCI Container
+ * engine:
+ *
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_IMAGE} names which image
+ * will be used to launch the Docker container.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_PORTS_MAPPING} allows users to
+ * specify ports mapping for the bridge network Docker container. The value
+ * of the environment variable should be a comma-separated list of ports
+ * mapping. It's the same to "-p" option for the Docker run command. If the
+ * value is empty, "-P" will be added.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE}
+ * controls which PID namespace will be used by the Docker container. By
+ * default, each Docker container has its own PID namespace. To share the
+ * namespace of the host, the
+ * {@code yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed}
+ * property must be set to {@code true}. If the host PID namespace is
+ * allowed and this environment variable is set to {@code host}, the
+ * Docker container will share the host's PID namespace. No other value is
+ * allowed.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME} sets the
+ * hostname to be used by the Docker container. If not specified, a
+ * hostname will be derived from the container ID and set as default
+ * hostname for networks other than 'host'.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER}
+ * controls whether the Docker container is a privileged container. In order
+ * to use privileged containers, the
+ * {@code yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed}
+ * property must be set to {@code true}, and the application owner must
+ * appear in the value of the
+ * {@code yarn.nodemanager.runtime.linux.docker.privileged-containers.acl}
+ * property. If this environment variable is set to {@code true}, a
+ * privileged Docker container will be used if allowed. No other value is
+ * allowed, so the environment variable should be left unset rather than
+ * setting it to false.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS} allows users to specify
+ + additional volume mounts for the Docker container. The value of the
+ * environment variable should be a comma-separated list of mounts.
+ * All such mounts must be given as {@code source:dest[:mode]} and the mode
+ * must be "ro" (read-only) or "rw" (read-write) to specify the type of
+ * access being requested. If neither is specified, read-write will be
+ * assumed. The mode may include a bind propagation option. In that case,
+ * the mode should either be of the form [option], rw+[option], or
+ * ro+[option]. Valid bind propagation options are shared, rshared, slave,
+ * rslave, private, and rprivate. The requested mounts will be validated by
+ * container-executor based on the values set in container-executor.cfg for
+ * {@code docker.allowed.ro-mounts} and {@code docker.allowed.rw-mounts}.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS} allows users to
+ * specify additional tmpfs mounts for the Docker container. The value of
+ * the environment variable should be a comma-separated list of mounts.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL} allows a user
+ * to request delayed deletion of the Docker containers on a per
+ * container basis. If true, Docker containers will not be removed until
+ * the duration defined by {@code yarn.nodemanager.delete.debug-delay-sec}
+ * has elapsed. Administrators can disable this feature through the
+ * yarn-site property
+ * {@code yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed}.
+ * This feature is disabled by default. When this feature is disabled or set
+ * to false, the container will be removed as soon as it exits.
+ *
+ * -
+ * {@code YARN_CONTAINER_RUNTIME_YARN_SYSFS_ENABLE} allows export yarn
+ * service json to docker container. This feature is disabled by default.
+ * when this feature is set, app.json will be available in
+ * /hadoop/yarn/sysfs/app.json.
+ *
+ *
+ */
+
+public abstract class OCIContainerRuntime implements LinuxContainerRuntime {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(OCIContainerRuntime.class);
+
+ // This validates that the image is a proper docker image
+ private static final String DOCKER_IMAGE_PATTERN =
+ "^(([a-zA-Z0-9.-]+)(:\\d+)?/)?([a-z0-9_./-]+)(:[\\w.-]+)?$";
+ private static final Pattern dockerImagePattern =
+ Pattern.compile(DOCKER_IMAGE_PATTERN);
+ private static final String HOSTNAME_PATTERN =
+ "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$";
+ private static final Pattern hostnamePattern = Pattern.compile(
+ HOSTNAME_PATTERN);
+ static final Pattern USER_MOUNT_PATTERN = Pattern.compile(
+ "(?<=^|,)([^:\\x00]+):([^:\\x00]+)" +
+ "(:(r[ow]|(r[ow][+])?(r?shared|r?slave|r?private)))?(?:,|$)");
+ static final Pattern TMPFS_MOUNT_PATTERN = Pattern.compile(
+ "^/[^:\\x00]+$");
+ static final String PORTS_MAPPING_PATTERN =
+ "^:[0-9]+|^[0-9]+:[0-9]+|^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]" +
+ "|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" +
+ ":[0-9]+:[0-9]+$";
+ private static final int HOST_NAME_LENGTH = 64;
+ private static final String DEFAULT_PROCFS = "/proc";
+
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_IMAGE =
+ "YARN_CONTAINER_RUNTIME_DOCKER_IMAGE";
+
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_PID_NAMESPACE =
+ "YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_HOSTNAME =
+ "YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER =
+ "YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_MOUNTS =
+ "YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_TMPFS_MOUNTS =
+ "YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_DELAYED_REMOVAL =
+ "YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_PORTS_MAPPING =
+ "YARN_CONTAINER_RUNTIME_DOCKER_PORTS_MAPPING";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_YARN_SYSFS =
+ "YARN_CONTAINER_RUNTIME_YARN_SYSFS_ENABLE";
+ @InterfaceAudience.Private
+ static final String ENV_DOCKER_CONTAINER_DOCKER_RUNTIME =
+ "YARN_CONTAINER_RUNTIME_DOCKER_RUNTIME";
+
+ Configuration conf;
+ Context nmContext;
+ DockerClient dockerClient;
+ private Map csiClients = new HashMap<>();
+ PrivilegedOperationExecutor privilegedOperationExecutor;
+ String defaultImageName;
+ Boolean defaultImageUpdate;
+ private Set allowedNetworks = new HashSet<>();
+ private Set allowedRuntimes = new HashSet<>();
+ String defaultNetwork;
+ CGroupsHandler cGroupsHandler;
+ private AccessControlList privilegedContainersAcl;
+ boolean enableUserReMapping;
+ int userRemappingUidThreshold;
+ int userRemappingGidThreshold;
+ Set capabilities;
+ boolean delayedRemovalAllowed;
+ Set defaultROMounts = new HashSet<>();
+ Set defaultRWMounts = new HashSet<>();
+ Set defaultTmpfsMounts = new HashSet<>();
+
+ public OCIContainerRuntime(PrivilegedOperationExecutor
+ privilegedOperationExecutor) {
+ this(privilegedOperationExecutor, ResourceHandlerModule
+ .getCGroupsHandler());
+ }
+
+ public OCIContainerRuntime(PrivilegedOperationExecutor
+ privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
+ this.privilegedOperationExecutor = privilegedOperationExecutor;
+
+ if (cGroupsHandler == null) {
+ LOG.info("cGroupsHandler is null - cgroups not in use.");
+ } else {
+ this.cGroupsHandler = cGroupsHandler;
+ }
+ }
+
+ public void initialize(Configuration conf, Context nmContext)
+ throws ContainerExecutionException {
+ this.nmContext = nmContext;
+ this.conf = conf;
+
+ allowedNetworks.clear();
+ allowedRuntimes.clear();
+ defaultROMounts.clear();
+ defaultRWMounts.clear();
+ defaultTmpfsMounts.clear();
+ defaultImageName = conf.getTrimmed(
+ YarnConfiguration.NM_DOCKER_IMAGE_NAME, "");
+ defaultImageUpdate = conf.getBoolean(
+ YarnConfiguration.NM_DOCKER_IMAGE_UPDATE, false);
+ allowedNetworks.addAll(Arrays.asList(
+ conf.getTrimmedStrings(
+ YarnConfiguration.NM_DOCKER_ALLOWED_CONTAINER_NETWORKS,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ALLOWED_CONTAINER_NETWORKS)));
+ defaultNetwork = conf.getTrimmed(
+ YarnConfiguration.NM_DOCKER_DEFAULT_CONTAINER_NETWORK,
+ YarnConfiguration.DEFAULT_NM_DOCKER_DEFAULT_CONTAINER_NETWORK);
+ allowedRuntimes.addAll(Arrays.asList(
+ conf.getTrimmedStrings(
+ YarnConfiguration.NM_DOCKER_ALLOWED_CONTAINER_RUNTIMES,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ALLOWED_CONTAINER_RUNTIMES)));
+
+ if(!allowedNetworks.contains(defaultNetwork)) {
+ String message = "Default network: " + defaultNetwork
+ + " is not in the set of allowed networks: " + allowedNetworks;
+
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(message + ". Please check configuration");
+ }
+
+ throw new ContainerExecutionException(message);
+ }
+
+ privilegedContainersAcl = new AccessControlList(conf.getTrimmed(
+ YarnConfiguration.NM_DOCKER_PRIVILEGED_CONTAINERS_ACL,
+ YarnConfiguration.DEFAULT_NM_DOCKER_PRIVILEGED_CONTAINERS_ACL));
+
+ enableUserReMapping = conf.getBoolean(
+ YarnConfiguration.NM_DOCKER_ENABLE_USER_REMAPPING,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ENABLE_USER_REMAPPING);
+
+ userRemappingUidThreshold = conf.getInt(
+ YarnConfiguration.NM_DOCKER_USER_REMAPPING_UID_THRESHOLD,
+ YarnConfiguration.DEFAULT_NM_DOCKER_USER_REMAPPING_UID_THRESHOLD);
+
+ userRemappingGidThreshold = conf.getInt(
+ YarnConfiguration.NM_DOCKER_USER_REMAPPING_GID_THRESHOLD,
+ YarnConfiguration.DEFAULT_NM_DOCKER_USER_REMAPPING_GID_THRESHOLD);
+
+ capabilities = getDockerCapabilitiesFromConf();
+
+ delayedRemovalAllowed = conf.getBoolean(
+ YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL);
+
+ defaultROMounts.addAll(Arrays.asList(
+ conf.getTrimmedStrings(
+ YarnConfiguration.NM_DOCKER_DEFAULT_RO_MOUNTS)));
+
+ defaultRWMounts.addAll(Arrays.asList(
+ conf.getTrimmedStrings(
+ YarnConfiguration.NM_DOCKER_DEFAULT_RW_MOUNTS)));
+
+ defaultTmpfsMounts.addAll(Arrays.asList(
+ conf.getTrimmedStrings(
+ YarnConfiguration.NM_DOCKER_DEFAULT_TMPFS_MOUNTS)));
+ }
+
+ private Set getDockerCapabilitiesFromConf() throws
+ ContainerExecutionException {
+ Set caps = new HashSet<>(Arrays.asList(
+ conf.getTrimmedStrings(
+ YarnConfiguration.NM_DOCKER_CONTAINER_CAPABILITIES,
+ YarnConfiguration.DEFAULT_NM_DOCKER_CONTAINER_CAPABILITIES)));
+ if(caps.contains("none") || caps.contains("NONE")) {
+ if(caps.size() > 1) {
+ String msg = "Mixing capabilities with the none keyword is" +
+ " not supported";
+ throw new ContainerExecutionException(msg);
+ }
+ caps = Collections.emptySet();
+ }
+
+ return caps;
+ }
+
+ public Set getCapabilities() {
+ return capabilities;
+ }
+
+ @VisibleForTesting
+ String mountReadOnlyPath(String mount,
+ Map> localizedResources)
+ throws ContainerExecutionException {
+ for (Map.Entry> resource : localizedResources.entrySet()) {
+ if (resource.getValue().contains(mount)) {
+ java.nio.file.Path path = Paths.get(resource.getKey().toString());
+ if (!path.isAbsolute()) {
+ throw new ContainerExecutionException("Mount must be absolute: " +
+ mount);
+ }
+ if (Files.isSymbolicLink(path)) {
+ throw new ContainerExecutionException("Mount cannot be a symlink: " +
+ mount);
+ }
+ return path.toString();
+ }
+ }
+ throw new ContainerExecutionException("Mount must be a localized " +
+ "resource: " + mount);
+ }
+
+ /**
+ * Signal the docker container.
+ *
+ * Signals are used to check the liveliness of the container as well as to
+ * stop/kill the container. The following outlines the docker container
+ * signal handling.
+ *
+ *
+ * - If the null signal is sent, run kill -0 on the pid. This is used
+ * to check if the container is still alive, which is necessary for
+ * reacquiring containers on NM restart.
+ * - If SIGTERM, SIGKILL is sent, attempt to stop and remove the docker
+ * container.
+ * - If the docker container exists and is running, execute docker
+ * stop.
+ * - If any other signal is sent, signal the container using docker
+ * kill.
+ *
+ *
+ * @param ctx the {@link ContainerRuntimeContext}.
+ * @throws ContainerExecutionException if the signaling fails.
+ */
+ @Override
+ public void signalContainer(ContainerRuntimeContext ctx)
+ throws ContainerExecutionException {
+ ContainerExecutor.Signal signal = ctx.getExecutionAttribute(SIGNAL);
+ Map env =
+ ctx.getContainer().getLaunchContext().getEnvironment();
+ try {
+ if (ContainerExecutor.Signal.NULL.equals(signal)) {
+ executeLivelinessCheck(ctx);
+ } else if (ContainerExecutor.Signal.TERM.equals(signal)) {
+ ContainerId containerId = ctx.getContainer().getContainerId();
+ handleContainerStop(containerId, env);
+ } else {
+ handleContainerKill(ctx, env, signal);
+ }
+ } catch (ContainerExecutionException e) {
+ throw new ContainerExecutionException("Signal docker container failed",
+ e.getExitCode(), e.getOutput(), e.getErrorOutput());
+ }
+ }
+
+ abstract void handleContainerStop(ContainerId containerId,
+ Map env)
+ throws ContainerExecutionException;
+
+
+ abstract void handleContainerKill(ContainerRuntimeContext ctx,
+ Map env,
+ ContainerExecutor.Signal signal) throws ContainerExecutionException;
+
+ @Override
+ public void prepareContainer(ContainerRuntimeContext ctx)
+ throws ContainerExecutionException {
+ }
+
+ abstract void handleContainerRemove(String containerId,
+ Map env) throws ContainerExecutionException;
+
+ String getUserIdInfo(String userName)
+ throws ContainerExecutionException {
+ String id;
+ Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
+ new String[]{"id", "-u", userName});
+ try {
+ shexec.execute();
+ id = shexec.getOutput().replaceAll("[^0-9]", "");
+ } catch (Exception e) {
+ throw new ContainerExecutionException(e);
+ }
+ return id;
+ }
+
+ String[] getGroupIdInfo(String userName)
+ throws ContainerExecutionException {
+ String[] id;
+ Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
+ new String[]{"id", "-G", userName});
+ try {
+ shexec.execute();
+ id = shexec.getOutput().replace("\n", "").split(" ");
+ } catch (Exception e) {
+ throw new ContainerExecutionException(e);
+ }
+ return id;
+ }
+
+ void validateContainerNetworkType(String network)
+ throws ContainerExecutionException {
+ if (allowedNetworks.contains(network)) {
+ return;
+ }
+
+ String msg = "Disallowed network: '" + network
+ + "' specified. Allowed networks: are " + allowedNetworks
+ .toString();
+ throw new ContainerExecutionException(msg);
+ }
+
+ void validateContainerRuntimeType(String runtime)
+ throws ContainerExecutionException {
+ if (runtime == null || runtime.isEmpty()
+ || allowedRuntimes.contains(runtime)) {
+ return;
+ }
+
+ String msg = "Disallowed runtime: '" + runtime
+ + "' specified. Allowed networks: are " + allowedRuntimes
+ .toString();
+ throw new ContainerExecutionException(msg);
+ }
+
+ /**
+ * Return whether the YARN container is allowed to run using the host's PID
+ * namespace for the Docker container. For this to be allowed, the submitting
+ * user must request the feature and the feature must be enabled on the
+ * cluster.
+ *
+ * @param container the target YARN container
+ * @return whether host pid namespace is requested and allowed
+ * @throws ContainerExecutionException if host pid namespace is requested
+ * but is not allowed
+ */
+ boolean allowHostPidNamespace(Container container)
+ throws ContainerExecutionException {
+ Map environment = container.getLaunchContext()
+ .getEnvironment();
+ String pidNamespace = environment.get(ENV_DOCKER_CONTAINER_PID_NAMESPACE);
+
+ if (pidNamespace == null) {
+ return false;
+ }
+
+ if (!pidNamespace.equalsIgnoreCase("host")) {
+ LOG.warn("NOT requesting PID namespace. Value of " +
+ ENV_DOCKER_CONTAINER_PID_NAMESPACE + "is invalid: " + pidNamespace);
+ return false;
+ }
+
+ boolean hostPidNamespaceEnabled = conf.getBoolean(
+ YarnConfiguration.NM_DOCKER_ALLOW_HOST_PID_NAMESPACE,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_HOST_PID_NAMESPACE);
+
+ if (!hostPidNamespaceEnabled) {
+ String message = "Host pid namespace being requested but this is not "
+ + "enabled on this cluster";
+ LOG.warn(message);
+ throw new ContainerExecutionException(message);
+ }
+
+ return true;
+ }
+
+ static void validateHostname(String hostname) throws
+ ContainerExecutionException {
+ if (hostname != null && !hostname.isEmpty()) {
+ if (!hostnamePattern.matcher(hostname).matches()) {
+ throw new ContainerExecutionException("Hostname '" + hostname
+ + "' doesn't match docker hostname pattern");
+ }
+ if (hostname.length() > HOST_NAME_LENGTH) {
+ throw new ContainerExecutionException(
+ "Hostname can not be greater than " + HOST_NAME_LENGTH
+ + " characters: " + hostname);
+ }
+ }
+ }
+
+ /**
+ * Return whether the YARN container is allowed to run in a privileged
+ * Docker container. For a privileged container to be allowed all of the
+ * following three conditions must be satisfied:
+ *
+ *
+ * - Submitting user must request for a privileged container
+ * - Privileged containers must be enabled on the cluster
+ * - Submitting user must be white-listed to run a privileged
+ * container
+ *
+ *
+ * @param container the target YARN container
+ * @return whether privileged container execution is allowed
+ * @throws ContainerExecutionException if privileged container execution
+ * is requested but is not allowed
+ */
+ boolean allowPrivilegedContainerExecution(Container container)
+ throws ContainerExecutionException {
+
+ if(!isContainerRequestedAsPrivileged(container)) {
+ return false;
+ }
+
+ LOG.info("Privileged container requested for : " + container
+ .getContainerId().toString());
+
+ //Ok, so we have been asked to run a privileged container. Security
+ // checks need to be run. Each violation is an error.
+
+ //check if privileged containers are enabled.
+ boolean privilegedContainersEnabledOnCluster = conf.getBoolean(
+ YarnConfiguration.NM_DOCKER_ALLOW_PRIVILEGED_CONTAINERS,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_PRIVILEGED_CONTAINERS);
+
+ if (!privilegedContainersEnabledOnCluster) {
+ String message = "Privileged container being requested but privileged "
+ + "containers are not enabled on this cluster";
+ LOG.warn(message);
+ throw new ContainerExecutionException(message);
+ }
+
+ //check if submitting user is in the whitelist.
+ String submittingUser = container.getUser();
+ UserGroupInformation submitterUgi = UserGroupInformation
+ .createRemoteUser(submittingUser);
+
+ if (!privilegedContainersAcl.isUserAllowed(submitterUgi)) {
+ String message = "Cannot launch privileged container. Submitting user ("
+ + submittingUser + ") fails ACL check.";
+ LOG.warn(message);
+ throw new ContainerExecutionException(message);
+ }
+
+ LOG.info("All checks pass. Launching privileged container for : "
+ + container.getContainerId().toString());
+
+ return true;
+ }
+
+ /**
+ * This function only returns whether a privileged container was requested,
+ * not whether the container was or will be launched as privileged.
+ * @param container
+ * @return
+ */
+ boolean isContainerRequestedAsPrivileged(
+ Container container) {
+ String runPrivilegedContainerEnvVar = container.getLaunchContext()
+ .getEnvironment().get(ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER);
+ return Boolean.parseBoolean(runPrivilegedContainerEnvVar);
+ }
+
+ /**
+ * Check if system is default to disable docker override or
+ * user requested a Docker container with ENTRY_POINT support.
+ *
+ * @param environment - Docker container environment variables
+ * @return true if Docker launch command override is disabled
+ */
+ boolean checkUseEntryPoint(Map environment) {
+ boolean overrideDisable = false;
+ String overrideDisableKey = ApplicationConstants.Environment.
+ YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE.
+ name();
+ String overrideDisableValue = (environment.get(overrideDisableKey) != null)
+ ? environment.get(overrideDisableKey) :
+ System.getenv(overrideDisableKey);
+ overrideDisable = Boolean.parseBoolean(overrideDisableValue);
+ return overrideDisable;
+ }
+
+ static void validateImageName(String imageName)
+ throws ContainerExecutionException {
+ if (imageName == null || imageName.isEmpty()) {
+ throw new ContainerExecutionException(
+ ENV_DOCKER_CONTAINER_IMAGE + " not set!");
+ }
+ if (!dockerImagePattern.matcher(imageName).matches()) {
+ throw new ContainerExecutionException("Image name '" + imageName
+ + "' doesn't match docker image name pattern");
+ }
+ }
+
+ private void executeLivelinessCheck(ContainerRuntimeContext ctx)
+ throws ContainerExecutionException {
+ String procFs = ctx.getExecutionAttribute(PROCFS);
+ if (procFs == null || procFs.isEmpty()) {
+ procFs = DEFAULT_PROCFS;
+ }
+ String pid = ctx.getExecutionAttribute(PID);
+ if (!new File(procFs + File.separator + pid).exists()) {
+ String msg = "Liveliness check failed for PID: " + pid
+ + ". Container may have already completed.";
+ throw new ContainerExecutionException(msg,
+ PrivilegedOperation.ResultCode.INVALID_CONTAINER_PID.getValue());
+ }
+ }
+
+ public Map getCsiClients() {
+ return csiClients;
+ }
+
+ /**
+ * Initiate CSI clients to talk to the CSI adaptors on this node and
+ * cache the clients for easier fetch.
+ * @param config configuration
+ * @throws ContainerExecutionException
+ */
+ void initiateCsiClients(Configuration config)
+ throws ContainerExecutionException {
+ String[] driverNames = CsiConfigUtils.getCsiDriverNames(config);
+ if (driverNames != null && driverNames.length > 0) {
+ for (String driverName : driverNames) {
+ try {
+ // find out the adaptors service address
+ InetSocketAddress adaptorServiceAddress =
+ CsiConfigUtils.getCsiAdaptorAddressForDriver(driverName, config);
+ LOG.info("Initializing a csi-adaptor-client for csi-adaptor {},"
+ + " csi-driver {}", adaptorServiceAddress.toString(), driverName);
+ CsiAdaptorProtocolPBClientImpl client =
+ new CsiAdaptorProtocolPBClientImpl(1L, adaptorServiceAddress,
+ config);
+ csiClients.put(driverName, client);
+ } catch (IOException | YarnException e1) {
+ throw new ContainerExecutionException(e1.getMessage());
+ }
+ }
+ }
+ }
+}