diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index e57f988..2a3b5eb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1811,6 +1811,20 @@ public static boolean isAclEnabled(Configuration conf) { public static final String DEFAULT_NM_DOCKER_DEFAULT_CONTAINER_NETWORK = "host"; + /** + * Whether or not users are allowed to request that Docker containers honor + * the debug deletion delay. This is useful for troubleshooting Docker + * container related launch failures. + */ + public static final String NM_DOCKER_ALLOW_DELAYED_REMOVAL = + DOCKER_CONTAINER_RUNTIME_PREFIX + "delayed-removal.allowed"; + + /** + * The default value on whether or not a user can request that Docker + * containers honor the debug deletion delay. + */ + public static final boolean DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL = false; + /** The mode in which the Java Container Sandbox should run detailed by * the JavaSandboxLinuxContainerRuntime. */ public static final String YARN_CONTAINER_SANDBOX = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 192f62e..a05b5fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1697,6 +1697,14 @@ + Whether or not users are allowed to request that Docker + containers honor the debug deletion delay. This is useful for + troubleshooting Docker container related launch failures. + yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed + false + + + The mode in which the Java Container Sandbox should run detailed by the JavaSandboxLinuxContainerRuntime. yarn.nodemanager.runtime.linux.sandbox-mode diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index 7ee3e52..f4279a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; @@ -190,6 +191,16 @@ public abstract boolean signalContainer(ContainerSignalContext ctx) throws IOException; /** + * Perform the steps necessary to reap the container. + * + * @param ctx Encapsulates information necessary for reaping containers. + * @return returns true if the operation succeeded. + * @throws IOException if reaping the container fails. + */ + public abstract boolean reapContainer(ContainerReapContext ctx) + throws IOException; + + /** * Delete specified directories as a given user. * * @param ctx Encapsulates information necessary for deletion. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 6774cf0..d7e3b52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -128,4 +128,11 @@ ResourcePluginManager getResourcePluginManager(); NodeManagerMetrics getNodeManagerMetrics(); + + /** + * Get the {@code DeletionService} associated with the NM. + * + * @return the NM {@code DeletionService}. + */ + DeletionService getDeletionService(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java index 5772403..828b8fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java @@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; @@ -565,6 +566,17 @@ public boolean signalContainer(ContainerSignalContext ctx) return true; } + /** + * No-op for reaping containers within the DefaultContainerExecutor. + * + * @param ctx Encapsulates information necessary for reaping containers. + * @return true given no operations are needed. + */ + @Override + public boolean reapContainer(ContainerReapContext ctx) { + return true; + } + @Override public boolean isContainerAlive(ContainerLivenessContext ctx) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index eaf664f..fc5a04d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -44,12 +44,15 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DelegatingLinuxContainerRuntime; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRmCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; @@ -731,6 +734,39 @@ public boolean signalContainer(ContainerSignalContext ctx) return true; } + /** + * Performs the tasks necessary to reap the container. + * + * @param ctx Encapsulates information necessary for reaping containers. + * @return true if the reaping was successful. + * @throws IOException if an error occurs while reaping the container. + */ + @Override + public boolean reapContainer(ContainerReapContext ctx) throws IOException { + Container container = ctx.getContainer(); + String user = ctx.getUser(); + String runAsUser = getRunAsUser(user); + ContainerRuntimeContext runtimeContext = new ContainerRuntimeContext + .Builder(container) + .setExecutionAttribute(RUN_AS_USER, runAsUser) + .setExecutionAttribute(USER, user) + .build(); + try { + linuxContainerRuntime.reapContainer(runtimeContext); + } catch (ContainerExecutionException e) { + int retCode = e.getExitCode(); + if (retCode != 0) { + return false; + } + LOG.warn("Error in reaping container " + + container.getContainerId().toString() + " exit = " + retCode, e); + logOutput(e.getOutput()); + throw new IOException("Error in reaping container " + + container.getContainerId().toString() + " exit = " + retCode, e); + } + return true; + } + @Override public void deleteAsUser(DeletionAsUserContext ctx) { String user = ctx.getUser(); @@ -875,4 +911,26 @@ public void mountCgroups(List cgroupKVs, String hierarchy) public ResourceHandler getResourceHandler() { return resourceHandlerChain; } + + /** + * Remove the docker container referenced in the context. + * + * @param containerId the containerId for the container. + */ + public void removeDockerContainer(String containerId) { + try { + PrivilegedOperationExecutor privOpExecutor = + PrivilegedOperationExecutor.getInstance(super.getConf()); + if (DockerCommandExecutor.isRemovable( + DockerCommandExecutor.getContainerStatus(containerId, + super.getConf(), privOpExecutor))) { + LOG.info("Removing Docker container : " + containerId); + DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId); + DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId, + null, super.getConf(), privOpExecutor, false); + } + } catch (ContainerExecutionException e) { + LOG.warn("Unable to remove docker container: " + containerId); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 179b01e..92ff048 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -400,6 +400,7 @@ protected void serviceInit(Configuration conf) throws Exception { ((NMContext)context).setContainerExecutor(exec); + ((NMContext)context).setDeletionService(del); nodeLabelsProvider = createNodeLabelsProvider(conf); @@ -610,6 +611,7 @@ protected void reregisterCollectors() { logAggregationReportForApps; private NodeStatusUpdater nodeStatusUpdater; private final boolean isDistSchedulingEnabled; + private DeletionService deletionService; private OpportunisticContainerAllocator containerAllocator; @@ -844,6 +846,24 @@ public void setResourcePluginManager( ResourcePluginManager resourcePluginManager) { this.resourcePluginManager = resourcePluginManager; } + + /** + * Return the NM's {@link DeletionService}. + * + * @return the NM's {@link DeletionService}. + */ + public DeletionService getDeletionService() { + return this.deletionService; + } + + /** + * Set the NM's {@link DeletionService}. + * + * @param deletionService the {@link DeletionService} to add to the Context. + */ + public void setDeletionService(DeletionService deletionService) { + this.deletionService = deletionService; + } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java index f9b762a..dd129f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java @@ -22,6 +22,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.recovery.DeletionTaskRecoveryInfo; import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DeletionTask; import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DeletionTaskType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionTask; import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.FileDeletionTask; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,18 +52,29 @@ public static DeletionTask convertProtoToDeletionTask( int taskId = proto.getId(); if (proto.hasTaskType() && proto.getTaskType() != null) { if (proto.getTaskType().equals(DeletionTaskType.FILE.name())) { - LOG.debug("Converting recovered FileDeletionTask"); + if (LOG.isDebugEnabled()) { + LOG.debug("Converting recovered FileDeletionTask"); + } return convertProtoToFileDeletionTask(proto, deletionService, taskId); + } else if (proto.getTaskType().equals( + DeletionTaskType.DOCKER_CONTAINER.name())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Converting recovered DockerContainerDeletionTask"); + } + return convertProtoToDockerContainerDeletionTask(proto, deletionService, + taskId); } } - LOG.debug("Unable to get task type, trying FileDeletionTask"); + if (LOG.isDebugEnabled()) { + LOG.debug("Unable to get task type, trying FileDeletionTask"); + } return convertProtoToFileDeletionTask(proto, deletionService, taskId); } /** * Convert the Protobuf representation into the {@link FileDeletionTask}. * - * @param proto the Protobuf representation of the {@link FileDeletionTask} + * @param proto the Protobuf representation of the {@link FileDeletionTask}. * @param deletionService the {@link DeletionService}. * @param taskId the ID of the {@link DeletionTask}. * @return the populated {@link FileDeletionTask}. @@ -88,6 +100,25 @@ public static FileDeletionTask convertProtoToFileDeletionTask( } /** + * Convert the Protobuf format into the {@link DockerContainerDeletionTask}. + * + * @param proto Protobuf format of the {@link DockerContainerDeletionTask}. + * @param deletionService the {@link DeletionService}. + * @param taskId the ID of the {@link DeletionTask}. + * @return the populated {@link DockerContainerDeletionTask}. + */ + public static DockerContainerDeletionTask + convertProtoToDockerContainerDeletionTask( + DeletionServiceDeleteTaskProto proto, DeletionService deletionService, + int taskId) { + String user = proto.hasUser() ? proto.getUser() : null; + String containerId = + proto.hasDockerContainerId() ? proto.getDockerContainerId() : null; + return new DockerContainerDeletionTask(taskId, deletionService, user, + containerId); + } + + /** * Convert the Protobuf representation to the {@link DeletionTaskRecoveryInfo} * representation. * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index f95c2a0..1255316 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -60,13 +60,16 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionTask; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceSet; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationCleanupEvent; @@ -1512,6 +1515,11 @@ public void transition(ContainerImpl container, ContainerEvent event) { // TODO: Add containerWorkDir to the deletion service. + if (DockerLinuxContainerRuntime.isDockerContainerRequested( + container.getLaunchContext().getEnvironment())) { + removeDockerContainer(container); + } + if (clCleanupRequired) { container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, @@ -1547,6 +1555,11 @@ public void transition(ContainerImpl container, ContainerEvent event) { // TODO: Add containerWorkDir to the deletion service. // TODO: Add containerOuputDir to the deletion service. + if (DockerLinuxContainerRuntime.isDockerContainerRequested( + container.getLaunchContext().getEnvironment())) { + removeDockerContainer(container); + } + if (clCleanupRequired) { container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, @@ -1841,6 +1854,11 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.addDiagnostics(exitEvent.getDiagnosticInfo() + "\n"); } + if (DockerLinuxContainerRuntime.isDockerContainerRequested( + container.getLaunchContext().getEnvironment())) { + removeDockerContainer(container); + } + // The process/process-grp is killed. Decrement reference counts and // cleanup resources container.cleanup(); @@ -2178,4 +2196,12 @@ public boolean isRecovering() { public ResourceMappings getResourceMappings() { return resourceMappings; } + + private static void removeDockerContainer(ContainerImpl container) { + DeletionService deletionService = container.context.getDeletionService(); + DockerContainerDeletionTask deletionTask = + new DockerContainerDeletionTask(deletionService, container.user, + container.getContainerId().toString()); + deletionService.delete(deletionTask); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java index 676c71b..9918503 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java @@ -20,5 +20,5 @@ * Available types of {@link DeletionTask}s. */ public enum DeletionTaskType { - FILE + FILE, DOCKER_CONTAINER } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java new file mode 100644 index 0000000..70b918a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task; + +import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; + +/** + * {@link DeletionTask} handling the removal of Docker containers. + */ +public class DockerContainerDeletionTask extends DeletionTask + implements Runnable { + private String containerId; + + public DockerContainerDeletionTask(DeletionService deletionService, + String user, String containerId) { + this(INVALID_TASK_ID, deletionService, user, containerId); + } + + public DockerContainerDeletionTask(int taskId, + DeletionService deletionService, String user, String containerId) { + super(taskId, deletionService, user, DeletionTaskType.DOCKER_CONTAINER); + this.containerId = containerId; + } + + /** + * Get the id of the container to delete. + * + * @return the id of the container to delete. + */ + public String getContainerId() { + return containerId; + } + + /** + * Delete the specified Docker container. + */ + @Override + public void run() { + if (LOG.isDebugEnabled()) { + String msg = String.format("Running DeletionTask : %s", toString()); + LOG.debug(msg); + } + LinuxContainerExecutor exec = ((LinuxContainerExecutor) + getDeletionService().getContainerExecutor()); + exec.removeDockerContainer(containerId); + } + + /** + * Convert the DockerContainerDeletionTask to a String representation. + * + * @return String representation of the DockerContainerDeletionTask. + */ + @Override + public String toString() { + StringBuffer sb = new StringBuffer("DockerContainerDeletionTask : "); + sb.append(" id : ").append(this.getTaskId()); + sb.append(" containerId : ").append(this.containerId); + return sb.toString().trim(); + } + + /** + * Convert the DockerContainerDeletionTask to the Protobuf representation for + * storing in the state store and recovery. + * + * @return the protobuf representation of the DockerContainerDeletionTask. + */ + public DeletionServiceDeleteTaskProto convertDeletionTaskToProto() { + DeletionServiceDeleteTaskProto.Builder builder = + getBaseDeletionTaskProtoBuilder(); + builder.setTaskType(DeletionTaskType.DOCKER_CONTAINER.name()); + if (getContainerId() != null) { + builder.setDockerContainerId(getContainerId()); + } + return builder.build(); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index db90215..e07a824 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -80,6 +80,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader; @@ -766,6 +767,36 @@ public void cleanupContainer() throws IOException { lfs.delete(pidFilePath.suffix(EXIT_CODE_FILE_SUFFIX), false); } } + + final int sleepMsec = 100; + int msecLeft = 2000; + if (pidFilePath != null) { + File file = new File(getExitCodeFile(pidFilePath.toString())); + while (!file.exists() && msecLeft >= 0) { + try { + Thread.sleep(sleepMsec); + } catch (InterruptedException e) { + } + msecLeft -= sleepMsec; + } + if (msecLeft < 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Timeout while waiting for the exit code file: " + + file.getAbsolutePath()); + } + } + } + + // Reap the container + boolean result = exec.reapContainer( + new ContainerReapContext.Builder() + .setContainer(container) + .setUser(container.getUser()) + .build()); + if (!result) { + throw new IOException("Reap container failed for container " + + containerIdStr); + } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 20359ea..6799ce2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -22,6 +22,9 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerKillCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRmCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; @@ -145,6 +148,17 @@ * container-executor based on the values set in container-executor.cfg for * {@code docker.allowed.ro-mounts} and {@code docker.allowed.rw-mounts}. * + *
  • + * {@code YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL} allows a user + * to request delayed deletion of the Docker containers on a per + * container basis. If true, Docker containers will not be removed until + * the duration defined by {@code yarn.nodemanager.delete.debug-delay-sec} + * has elapsed. Administrators can disable this feature through the + * yarn-site property + * {@code yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed}. + * This feature is disabled by default. When this feature is disabled or set + * to false, the container will be removed as soon as it exits. + *
  • * */ @InterfaceAudience.Private @@ -192,6 +206,9 @@ @InterfaceAudience.Private public static final String ENV_DOCKER_CONTAINER_MOUNTS = "YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS"; + @InterfaceAudience.Private + public static final String ENV_DOCKER_CONTAINER_DELAYED_REMOVAL = + "YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL"; private Configuration conf; private Context nmContext; @@ -206,6 +223,7 @@ private int userRemappingUidThreshold; private int userRemappingGidThreshold; private Set capabilities; + private boolean delayedRemovalAllowed; /** * Return whether the given environment variables indicate that the operation @@ -306,6 +324,10 @@ public void initialize(Configuration conf, Context nmContext) YarnConfiguration.DEFAULT_NM_DOCKER_USER_REMAPPING_GID_THRESHOLD); capabilities = getDockerCapabilitiesFromConf(); + + delayedRemovalAllowed = conf.getBoolean( + YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL, + YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL); } private Set getDockerCapabilitiesFromConf() throws @@ -833,49 +855,66 @@ public void launchContainer(ContainerRuntimeContext ctx) } } + /** + * Signal the docker container. + * + * Signals are used to check the liveliness of the container as well as to + * stop/kill the container. The following outlines the docker container + * signal handling. + * + *
      + *
    1. If the null signal is sent, run kill -0 on the pid. This is used + * to check if the container is still alive, which is necessary for + * reacquiring containers on NM restart.
    2. + *
    3. If SIGTERM, SIGKILL is sent, attempt to stop and remove the docker + * container.
    4. + *
    5. If the docker container exists and is running, execute docker + * stop.
    6. + *
    7. If any other signal is sent, signal the container using docker + * kill.
    8. + *
    + * + * @param ctx the {@link ContainerRuntimeContext}. + * @throws ContainerExecutionException if the signaling fails. + */ @Override public void signalContainer(ContainerRuntimeContext ctx) throws ContainerExecutionException { ContainerExecutor.Signal signal = ctx.getExecutionAttribute(SIGNAL); - - PrivilegedOperation privOp = null; - // Handle liveliness checks, send null signal to pid - if(ContainerExecutor.Signal.NULL.equals(signal)) { - privOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.SIGNAL_CONTAINER); - privOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), - ctx.getExecutionAttribute(USER), - Integer.toString(PrivilegedOperation.RunAsUserCommand - .SIGNAL_CONTAINER.getValue()), - ctx.getExecutionAttribute(PID), - Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue())); - - // All other signals handled as docker stop - } else { - String containerId = ctx.getContainer().getContainerId().toString(); - DockerStopCommand stopCommand = new DockerStopCommand(containerId); - String commandFile = dockerClient.writeCommandToTempFile(stopCommand, - containerId); - privOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.RUN_DOCKER_CMD); - privOp.appendArgs(commandFile); - } - - //Some failures here are acceptable. Let the calling executor decide. - privOp.disableFailureLogging(); - + String containerId = ctx.getContainer().getContainerId().toString(); + Map env = + ctx.getContainer().getLaunchContext().getEnvironment(); try { - privilegedOperationExecutor.executePrivilegedOperation(null, - privOp, null, null, false, false); - } catch (PrivilegedOperationException e) { - throw new ContainerExecutionException("Signal container failed", e - .getExitCode(), e.getOutput(), e.getErrorOutput()); + if (ContainerExecutor.Signal.NULL.equals(signal)) { + executeLivelinessCheck(ctx); + } else { + if (ContainerExecutor.Signal.KILL.equals(signal) + || ContainerExecutor.Signal.TERM.equals(signal)) { + handleContainerStop(containerId, env); + } else { + handleContainerKill(containerId, env, signal); + } + } + } catch (ContainerExecutionException e) { + LOG.warn("Signal docker container failed. Exception: ", e); + throw new ContainerExecutionException("Signal docker container failed", + e.getExitCode(), e.getOutput(), e.getErrorOutput()); } } + /** + * Reap the docker container. + * + * @param ctx the {@link ContainerRuntimeContext}. + * @throws ContainerExecutionException if the removal fails. + */ @Override public void reapContainer(ContainerRuntimeContext ctx) throws ContainerExecutionException { + // Clean up the Docker container + handleContainerRemove(ctx.getContainer().getContainerId().toString(), + ctx.getContainer().getLaunchContext().getEnvironment()); + // Cleanup volumes when needed. if (nmContext != null && nmContext.getResourcePluginManager().getNameToPlugins() != null) { @@ -993,4 +1032,83 @@ public static void validateImageName(String imageName) + "' doesn't match docker image name pattern"); } } + + private void executeLivelinessCheck(ContainerRuntimeContext ctx) + throws ContainerExecutionException { + PrivilegedOperation signalOp = new PrivilegedOperation( + PrivilegedOperation.OperationType.SIGNAL_CONTAINER); + signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), + ctx.getExecutionAttribute(USER), Integer.toString( + PrivilegedOperation.RunAsUserCommand.SIGNAL_CONTAINER.getValue()), + ctx.getExecutionAttribute(PID), + Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue())); + signalOp.disableFailureLogging(); + try { + privilegedOperationExecutor.executePrivilegedOperation(null, signalOp, + null, ctx.getContainer().getLaunchContext().getEnvironment(), false, + false); + } catch (PrivilegedOperationException e) { + String msg = "Liveliness check failed for PID: " + + ctx.getExecutionAttribute(PID) + + ". Container may have already completed."; + throw new ContainerExecutionException(msg, e.getExitCode(), e.getOutput(), + e.getErrorOutput()); + } + } + + private void handleContainerStop(String containerId, Map env) + throws ContainerExecutionException { + DockerCommandExecutor.DockerContainerStatus containerStatus = + DockerCommandExecutor.getContainerStatus(containerId, conf, + privilegedOperationExecutor); + if (DockerCommandExecutor.isStoppable(containerStatus)) { + DockerStopCommand dockerStopCommand = new DockerStopCommand(containerId); + DockerCommandExecutor.executeDockerCommand(dockerStopCommand, containerId, + env, conf, privilegedOperationExecutor, false); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Container status is " + containerStatus.getName() + + ", skipping stop - " + containerId); + } + } + } + + private void handleContainerKill(String containerId, Map env, + ContainerExecutor.Signal signal) throws ContainerExecutionException { + DockerCommandExecutor.DockerContainerStatus containerStatus = + DockerCommandExecutor.getContainerStatus(containerId, conf, + privilegedOperationExecutor); + if (DockerCommandExecutor.isKillable(containerStatus)) { + DockerKillCommand dockerKillCommand = + new DockerKillCommand(containerId).setSignal(signal.name()); + DockerCommandExecutor.executeDockerCommand(dockerKillCommand, containerId, + env, conf, privilegedOperationExecutor, false); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Container status is " + containerStatus.getName() + + ", skipping kill - " + containerId); + } + } + } + + private void handleContainerRemove(String containerId, + Map env) throws ContainerExecutionException { + String delayedRemoval = env.get(ENV_DOCKER_CONTAINER_DELAYED_REMOVAL); + if (delayedRemovalAllowed && delayedRemoval != null + && delayedRemoval.equalsIgnoreCase("true")) { + LOG.info("Delayed removal requested and allowed, skipping removal - " + + containerId); + } else { + DockerCommandExecutor.DockerContainerStatus containerStatus = + DockerCommandExecutor.getContainerStatus(containerId, conf, + privilegedOperationExecutor); + if (DockerCommandExecutor.isRemovable(containerStatus)) { + DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId); + DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId, + env, conf, privilegedOperationExecutor, false); + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java index 76b53af..fd1812b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java @@ -183,9 +183,46 @@ private static String executeStatusCommand(String containerId, new DockerInspectCommand(containerId).getContainerStatus(); try { return DockerCommandExecutor.executeDockerCommand(dockerInspectCommand, - containerId, null, conf, privilegedOperationExecutor, false); + containerId, null, conf, privilegedOperationExecutor, true); } catch (ContainerExecutionException e) { throw new ContainerExecutionException(e); } } + + /** + * Is the container in a stoppable state? + * + * @param containerStatus the container's {@link DockerContainerStatus}. + * @return is the container in a stoppable state. + */ + public static boolean isStoppable(DockerContainerStatus containerStatus) { + if (containerStatus.equals(DockerContainerStatus.RUNNING) + || containerStatus.equals(DockerContainerStatus.RESTARTING)) { + return true; + } + return false; + } + + /** + * Is the container in a killable state? + * + * @param containerStatus the container's {@link DockerContainerStatus}. + * @return is the container in a killable state. + */ + public static boolean isKillable(DockerContainerStatus containerStatus) { + return isStoppable(containerStatus); + } + + /** + * Is the container in a removable state? + * + * @param containerStatus the container's {@link DockerContainerStatus}. + * @return is the container in a removable state. + */ + public static boolean isRemovable(DockerContainerStatus containerStatus) { + return !containerStatus.equals(DockerContainerStatus.NONEXISTENT) + && !containerStatus.equals(DockerContainerStatus.UNKNOWN) + && !containerStatus.equals(DockerContainerStatus.REMOVING) + && !containerStatus.equals(DockerContainerStatus.RUNNING); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java new file mode 100644 index 0000000..d37370e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker; + +/** + * Encapsulates the docker kill command and its command line arguments. + */ +public class DockerKillCommand extends DockerCommand { + private static final String KILL_COMMAND = "kill"; + + public DockerKillCommand(String containerName) { + super(KILL_COMMAND); + super.addCommandArguments("name", containerName); + } + + /** + * Set the signal for the {@link DockerKillCommand}. + * + * @param signal the signal to send to the container. + * @return the {@link DockerKillCommand} with the signal set. + */ + public DockerKillCommand setSignal(String signal) { + super.addCommandArguments("signal", signal); + return this; + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java new file mode 100644 index 0000000..83aeef8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.executor; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; + +/** + * Encapsulate the details needed to reap a container. + */ +public final class ContainerReapContext { + + private final Container container; + private final String user; + + /** + * Builder for the ContainerReapContext. + */ + public static final class Builder { + private Container builderContainer; + private String builderUser; + + public Builder() { + } + + /** + * Set the container within the context. + * + * @param container the {@link Container}. + * @return the Builder with the container set. + */ + public Builder setContainer(Container container) { + this.builderContainer = container; + return this; + } + + /** + * Set the set within the context. + * + * @param user the user. + * @return the Builder with the user set. + */ + public Builder setUser(String user) { + this.builderUser = user; + return this; + } + + /** + * Builds the context with the attributes set. + * + * @return the context. + */ + public ContainerReapContext build() { + return new ContainerReapContext(this); + } + } + + private ContainerReapContext(Builder builder) { + this.container = builder.builderContainer; + this.user = builder.builderUser; + } + + /** + * Get the container set for the context. + * + * @return the {@link Container} set in the context. + */ + public Container getContainer() { + return container; + } + + /** + * Get the user set for the context. + * + * @return the user set in the context. + */ + public String getUser() { + return user; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 3b04f88..11d2076 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -19,6 +19,7 @@ #include "configuration.h" #include "container-executor.h" #include "utils/docker-util.h" +#include "utils/path-utils.h" #include "util.h" #include "config.h" @@ -71,6 +72,8 @@ static const char* DEFAULT_BANNED_USERS[] = {"yarn", "mapred", "hdfs", "bin", 0} static const int DEFAULT_DOCKER_SUPPORT_ENABLED = 0; static const int DEFAULT_TC_SUPPORT_ENABLED = 0; +static const char* PROC_PATH = "/proc"; + //location of traffic control binary static const char* TC_BIN = "/sbin/tc"; static const char* TC_MODIFY_STATE_OPTS [] = { "-b" , NULL}; @@ -1320,6 +1323,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, char *docker_logs_command = NULL; char *docker_inspect_command = NULL; char *docker_rm_command = NULL; + char *docker_inspect_exitcode_command = NULL; int container_file_source =-1; int cred_file_source = -1; int BUFFER_SIZE = 4096; @@ -1332,6 +1336,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, docker_logs_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_inspect_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_rm_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); + docker_inspect_exitcode_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); gid_t user_gid = getegid(); uid_t prev_uid = geteuid(); @@ -1382,6 +1387,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id, snprintf(docker_command_with_binary, command_size, "%s %s", docker_binary, docker_command); fprintf(LOGFILE, "Launching docker container...\n"); + fprintf(LOGFILE, "Docker run command: %s\n", docker_command_with_binary); FILE* start_docker = popen(docker_command_with_binary, "r"); if (pclose (start_docker) != 0) { @@ -1397,9 +1403,11 @@ int launch_docker_container_as_user(const char * user, const char *app_id, docker_binary, container_id); fprintf(LOGFILE, "Inspecting docker container...\n"); + fprintf(LOGFILE, "Docker inspect command: %s\n", docker_inspect_command); FILE* inspect_docker = popen(docker_inspect_command, "r"); int pid = 0; int res = fscanf (inspect_docker, "%d", &pid); + fprintf(LOGFILE, "pid from docker inspect: %d\n", pid); if (pclose (inspect_docker) != 0 || res <= 0) { fprintf (ERRORFILE, @@ -1437,17 +1445,45 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } - snprintf(docker_wait_command, command_size, - "%s wait %s", docker_binary, container_id); + fprintf(LOGFILE, "Waiting for docker container to finish.\n"); +#ifdef __linux + size_t command_size = MIN(sysconf(_SC_ARG_MAX), 128*1024); + char* proc_pid_path = alloc_and_clear_memory(command_size, sizeof(char)); + snprintf(proc_pid_path, command_size, "%s/%d", PROC_PATH, pid); + while (dir_exists(proc_pid_path) == 0) { + sleep(1); + } + if (dir_exists(proc_pid_path) == -1) { + fprintf(ERRORFILE, "Error occurred checking %s\n", proc_pid_path); + fflush(ERRORFILE); + } +#else + while (kill(pid,0) == 0) { + sleep(1); + } +#endif - fprintf(LOGFILE, "Waiting for docker container to finish...\n"); - FILE* wait_docker = popen(docker_wait_command, "r"); - res = fscanf (wait_docker, "%d", &exit_code); - if (pclose (wait_docker) != 0 || res <= 0) { - fprintf (ERRORFILE, - "Could not attach to docker; is container dead? %s.\n", docker_wait_command); + sprintf(docker_inspect_exitcode_command, + "%s inspect --format {{.State.ExitCode}} %s", + docker_binary, container_id); + fprintf(LOGFILE, "Obtaining the exit code...\n"); + fprintf(LOGFILE, "Docker inspect command: %s\n", docker_inspect_exitcode_command); + FILE* inspect_exitcode_docker = popen(docker_inspect_exitcode_command, "r"); + if(inspect_exitcode_docker == NULL) { + fprintf(ERRORFILE, "Done with inspect_exitcode, inspect_exitcode_docker is null\n"); fflush(ERRORFILE); + exit_code = -1; + goto cleanup; } + res = fscanf (inspect_exitcode_docker, "%d", &exit_code); + if (pclose (inspect_exitcode_docker) != 0 || res <= 0) { + fprintf (ERRORFILE, + "Could not inspect docker to get exitcode: %s.\n", docker_inspect_exitcode_command); + fflush(ERRORFILE); + exit_code = -1; + goto cleanup; + } + fprintf(LOGFILE, "Exit code from docker inspect: %d\n", exit_code); if(exit_code != 0) { fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n", exit_code); @@ -1480,19 +1516,6 @@ int launch_docker_container_as_user(const char * user, const char *app_id, } } - fprintf(LOGFILE, "Removing docker container post-exit...\n"); - snprintf(docker_rm_command, command_size, - "%s rm %s", docker_binary, container_id); - FILE* rm_docker = popen(docker_rm_command, "w"); - if (pclose (rm_docker) != 0) - { - fprintf (ERRORFILE, - "Could not remove container %s.\n", docker_rm_command); - fflush(ERRORFILE); - exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT; - goto cleanup; - } - cleanup: if (exit_code_file != NULL && write_exit_code_file_as_nm(exit_code_file, exit_code) < 0) { @@ -1500,6 +1523,7 @@ cleanup: "Could not write exit code to file %s.\n", exit_code_file); fflush(ERRORFILE); } + fprintf(LOGFILE, "Wrote the exit code %d to %s\n", exit_code, exit_code_file); // Drop root privileges if (change_effective_user(prev_uid, user_gid) != 0) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c index a0138d1..b5cb551 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c @@ -253,6 +253,8 @@ int get_docker_command(const char *command_file, const struct configuration *con char *command = get_configuration_value("docker-command", DOCKER_COMMAND_FILE_SECTION, &command_config); if (strcmp(DOCKER_INSPECT_COMMAND, command) == 0) { return get_docker_inspect_command(command_file, conf, out, outlen); + } else if (strcmp(DOCKER_KILL_COMMAND, command) == 0) { + return get_docker_kill_command(command_file, conf, out, outlen); } else if (strcmp(DOCKER_LOAD_COMMAND, command) == 0) { return get_docker_load_command(command_file, conf, out, outlen); } else if (strcmp(DOCKER_PULL_COMMAND, command) == 0) { @@ -661,6 +663,66 @@ int get_docker_stop_command(const char *command_file, const struct configuration return BUFFER_TOO_SMALL; } +int get_docker_kill_command(const char *command_file, const struct configuration *conf, + char *out, const size_t outlen) { + int ret = 0; + size_t len = 0, i = 0; + char *value = NULL; + char *container_name = NULL; + struct configuration command_config = {0, NULL}; + ret = read_and_verify_command_file(command_file, DOCKER_KILL_COMMAND, &command_config); + if (ret != 0) { + return ret; + } + + container_name = get_configuration_value("name", DOCKER_COMMAND_FILE_SECTION, &command_config); + if (container_name == NULL || validate_container_name(container_name) != 0) { + return INVALID_DOCKER_CONTAINER_NAME; + } + + memset(out, 0, outlen); + + ret = add_docker_config_param(&command_config, out, outlen); + if (ret != 0) { + return BUFFER_TOO_SMALL; + } + + ret = add_to_buffer(out, outlen, DOCKER_KILL_COMMAND); + if (ret == 0) { + value = get_configuration_value("signal", DOCKER_COMMAND_FILE_SECTION, &command_config); + if (value != NULL) { + len = strlen(value); + for (i = 0; i < len; ++i) { + if (isupper(value[i]) == 0) { + fprintf(ERRORFILE, "Value for signal contains non-uppercase characters '%s'\n", value); + free(container_name); + memset(out, 0, outlen); + return INVALID_DOCKER_KILL_COMMAND; + } + } + ret = add_to_buffer(out, outlen, " --signal="); + if (ret == 0) { + ret = add_to_buffer(out, outlen, value); + } + if (ret != 0) { + free(container_name); + return BUFFER_TOO_SMALL; + } + } + ret = add_to_buffer(out, outlen, " "); + if (ret == 0) { + ret = add_to_buffer(out, outlen, container_name); + } + free(container_name); + if (ret != 0) { + return BUFFER_TOO_SMALL; + } + return 0; + } + free(container_name); + return BUFFER_TOO_SMALL; +} + static int detach_container(const struct configuration *command_config, char *out, const size_t outlen) { return add_param_to_command(command_config, "detach", "-d ", 0, out, outlen); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h index 9c42abe..f98800c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h @@ -30,6 +30,7 @@ #define DOCKER_RM_COMMAND "rm" #define DOCKER_RUN_COMMAND "run" #define DOCKER_STOP_COMMAND "stop" +#define DOCKER_KILL_COMMAND "kill" #define DOCKER_VOLUME_COMMAND "volume" @@ -51,6 +52,7 @@ enum docker_error_codes { MOUNT_ACCESS_ERROR, INVALID_DOCKER_DEVICE, INVALID_DOCKER_STOP_COMMAND, + INVALID_DOCKER_KILL_COMMAND, INVALID_DOCKER_VOLUME_DRIVER, INVALID_DOCKER_VOLUME_NAME, INVALID_DOCKER_VOLUME_COMMAND @@ -135,6 +137,16 @@ int get_docker_run_command(const char* command_file, const struct configuration* int get_docker_stop_command(const char* command_file, const struct configuration* conf, char *out, const size_t outlen); /** + * Get the Docker kill command line string. The function will verify that the params file is meant for the kill command. + * @param command_file File containing the params for the Docker kill command + * @param conf Configuration struct containing the container-executor.cfg details + * @param out Buffer to fill with the kill command + * @param outlen Size of the output buffer + * @return Return code with 0 indicating success and non-zero codes indicating error + */ +int get_docker_kill_command(const char* command_file, const struct configuration* conf, char *out, const size_t outlen); + +/** * Get the Docker volume command line string. The function will verify that the * params file is meant for the volume command. * @param command_file File containing the params for the Docker volume command diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c index dea656b..d8219bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c @@ -18,6 +18,8 @@ #include "util.h" +#include +#include #include #include #include @@ -49,4 +51,16 @@ int verify_path_safety(const char* path) { free(dup); return succeeded; +} + +int dir_exists(const char* path) { + DIR* dir = opendir(path); + if (dir) { + closedir(dir); + return 0; + } else if (ENOENT == errno) { + return 1; + } else { + return -1; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h index a42f936..041ec95 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h @@ -32,4 +32,11 @@ */ int verify_path_safety(const char* path); +/* + * Verify that a given directory exists. + * return 0 if the directory exists, 1 if the directory does not exist, and -1 + * for all other errors. + */ +int dir_exists(const char* path); + #endif diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc index a24c0c7..05e17cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc @@ -64,4 +64,16 @@ ASSERT_TRUE(flag) << "Should succeeded\n"; } +TEST_F(TestPathUtils, test_dir_exists) { + const char* input = "/non/existent/dir"; + int flag = dir_exists(input); + std::cout << "Testing input=" << input << "\n"; + ASSERT_NE(flag, 0) << "Should failed\n"; + + input = "/"; + flag = dir_exists(input); + std::cout << "Testing input=" << input << "\n"; + ASSERT_EQ(flag, 0) << "Should succeeded\n"; +} + } // namespace ContainerExecutor \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc index 0c1c4bf..416bf38 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc @@ -312,6 +312,32 @@ namespace ContainerExecutor { run_docker_command_test(file_cmd_vec, bad_file_cmd_vec, get_docker_stop_command); } + TEST_F(TestDockerUtil, test_docker_kill) { + std::vector > file_cmd_vec; + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=kill\n name=container_e1_12312_11111_02_000001", + "kill container_e1_12312_11111_02_000001")); + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=kill\n name=container_e1_12312_11111_02_000001\nsignal=SIGQUIT", + "kill --signal=SIGQUIT container_e1_12312_11111_02_000001")); + + std::vector > bad_file_cmd_vec; + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n name=container_e1_12312_11111_02_000001", + static_cast(INCORRECT_COMMAND))); + bad_file_cmd_vec.push_back(std::make_pair( + "docker-command=kill\n name=ctr-id", static_cast(INCORRECT_COMMAND))); + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=kill\n name=", static_cast(INVALID_DOCKER_CONTAINER_NAME))); + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=kill", static_cast(INVALID_DOCKER_CONTAINER_NAME))); + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=kill\n name=container_e1_12312_11111_02_000001\n signal=foo | bar", + static_cast(INVALID_DOCKER_KILL_COMMAND))); + + run_docker_command_test(file_cmd_vec, bad_file_cmd_vec, get_docker_kill_command); + } + TEST_F(TestDockerUtil, test_detach_container) { std::vector > file_cmd_vec; file_cmd_vec.push_back(std::make_pair( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto index 7212953..4eee9b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto @@ -42,6 +42,7 @@ message DeletionServiceDeleteTaskProto { repeated string basedirs = 5; repeated int32 successorIds = 6; optional string taskType = 7; + optional string dockerContainerId = 8; } message LocalizedResourceProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java index 396c8f4..3bcdc87 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java @@ -25,7 +25,8 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; - +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.junit.Assert; @@ -33,6 +34,7 @@ import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; @SuppressWarnings("deprecation") public class TestContainerExecutor { @@ -158,4 +160,12 @@ public void testRunCommandWithCpuAndMemoryResources() { expected[6] = String.valueOf(cpuRate); Assert.assertEquals(Arrays.toString(expected), Arrays.toString(command)); } + + @Test + public void testReapContainer() throws Exception { + Container container = mock(Container.class); + ContainerReapContext.Builder builder = new ContainerReapContext.Builder(); + builder.setContainer(container).setUser("foo"); + assertTrue(containerExecutor.reapContainer(builder.build())); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java index dcec4c3..31be858 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java @@ -26,7 +26,11 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; + +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -649,6 +653,28 @@ public void testPostExecuteAfterReacquisition() throws Exception { TestResourceHandler.postExecContainers.contains(cid)); } + @Test + public void testRemoveDockerContainer() throws Exception { + ApplicationId appId = ApplicationId.newInstance(12345, 67890); + ApplicationAttemptId attemptId = + ApplicationAttemptId.newInstance(appId, 54321); + String cid = ContainerId.newContainerId(attemptId, 9876).toString(); + LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class); + lce.removeDockerContainer(cid); + verify(lce, times(1)).removeDockerContainer(cid); + } + + @Test + public void testReapContainer() throws Exception { + Container container = mock(Container.class); + LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class); + ContainerReapContext.Builder builder = new ContainerReapContext.Builder(); + builder.setContainer(container).setUser("foo"); + ContainerReapContext ctx = builder.build(); + lce.reapContainer(ctx); + verify(lce, times(1)).reapContainer(ctx); + } + private static class TestResourceHandler implements LCEResourcesHandler { static Set postExecContainers = new HashSet(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 18d1dce..9602142 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeResourceMonitor; @@ -808,5 +809,10 @@ public ResourcePluginManager getResourcePluginManager() { public NodeManagerMetrics getNodeManagerMetrics() { return null; } + + @Override + public DeletionService getDeletionService() { + return null; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/TestNMProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/TestNMProtoUtils.java index 69e01bc..11a69e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/TestNMProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/TestNMProtoUtils.java @@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.recovery.DeletionTaskRecoveryInfo; import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DeletionTask; import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DeletionTaskType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionTask; import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.FileDeletionTask; import org.junit.Test; @@ -77,6 +78,29 @@ public void testConvertProtoToFileDeletionTask() throws Exception { } @Test + public void testConvertProtoToDockerContainerDeletionTask() throws Exception { + DeletionService deletionService = mock(DeletionService.class); + int id = 0; + String user = "user"; + String dockerContainerId = "container_e123_12321231_00001"; + DeletionServiceDeleteTaskProto.Builder protoBuilder = + DeletionServiceDeleteTaskProto.newBuilder(); + protoBuilder + .setId(id) + .setUser(user) + .setDockerContainerId(dockerContainerId); + DeletionServiceDeleteTaskProto proto = protoBuilder.build(); + DeletionTask deletionTask = + NMProtoUtils.convertProtoToDockerContainerDeletionTask(proto, + deletionService, id); + assertEquals(DeletionTaskType.DOCKER_CONTAINER.name(), + deletionTask.getDeletionTaskType().name()); + assertEquals(id, deletionTask.getTaskId()); + assertEquals(dockerContainerId, + ((DockerContainerDeletionTask) deletionTask).getContainerId()); + } + + @Test public void testConvertProtoToDeletionTaskRecoveryInfo() throws Exception { long delTime = System.currentTimeMillis(); List successorTaskIds = Arrays.asList(1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 64e6cf0..5afbd94 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -25,6 +25,7 @@ import static org.mockito.Matchers.argThat; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.mockito.Mockito.atLeastOnce; @@ -73,12 +74,14 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionMatcher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; @@ -94,7 +97,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; - +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType; @@ -210,6 +213,37 @@ public void testExternalKill() throws Exception { @Test @SuppressWarnings("unchecked") // mocked generic + public void testDockerContainerExternalKill() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(13, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + int running = metrics.getRunningContainers(); + wc.launchContainer(); + assertEquals(running + 1, metrics.getRunningContainers()); + reset(wc.localizerBus); + wc.containerKilledOnRequest(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + int failed = metrics.getFailedContainers(); + wc.dockerContainerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(failed + 1, metrics.getFailedContainers()); + assertEquals(running, metrics.getRunningContainers()); + } + finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test + @SuppressWarnings("unchecked") // mocked generic public void testContainerPauseAndResume() throws Exception { WrappedContainer wc = null; try { @@ -266,6 +300,30 @@ public void testCleanupOnFailure() throws Exception { } } } + + @Test + @SuppressWarnings("unchecked") // mocked generic + public void testDockerContainerCleanupOnFailure() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(10, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + wc.launchContainer(); + reset(wc.localizerBus); + wc.containerFailed(ExitCode.FORCE_KILLED.getExitCode()); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + wc.dockerContainerResourcesCleanup(); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test @SuppressWarnings("unchecked") // mocked generic @@ -322,6 +380,36 @@ public void testCleanupOnSuccess() throws Exception { @Test @SuppressWarnings("unchecked") // mocked generic + public void testDockerContainerCleanupOnSuccess() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(11, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + int running = metrics.getRunningContainers(); + wc.launchContainer(); + assertEquals(running + 1, metrics.getRunningContainers()); + reset(wc.localizerBus); + wc.containerSuccessful(); + assertEquals(ContainerState.EXITED_WITH_SUCCESS, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + int completed = metrics.getCompletedContainers(); + wc.dockerContainerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(completed + 1, metrics.getCompletedContainers()); + assertEquals(running, metrics.getRunningContainers()); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test + @SuppressWarnings("unchecked") // mocked generic public void testInitWhileDone() throws Exception { WrappedContainer wc = null; try { @@ -350,6 +438,35 @@ public void testInitWhileDone() throws Exception { } @Test + @SuppressWarnings("unchecked") // mocked generic + public void testDockerContainerInitWhileDone() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(6, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + wc.launchContainer(); + reset(wc.localizerBus); + wc.containerSuccessful(); + wc.dockerContainerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + verifyOutofBandHeartBeat(wc); + assertNull(wc.c.getLocalizedResources()); + // Now in DONE, issue INIT + wc.initContainer(); + // Verify still in DONE + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test @SuppressWarnings("unchecked") // mocked generic public void testLocalizationFailureAtDone() throws Exception { @@ -377,6 +494,36 @@ public void testLocalizationFailureAtDone() throws Exception { } } } + + @Test + @SuppressWarnings("unchecked") + // mocked generic + public void testDockerContainerLocalizationFailureAtDone() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(6, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + wc.launchContainer(); + reset(wc.localizerBus); + wc.containerSuccessful(); + wc.dockerContainerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + verifyOutofBandHeartBeat(wc); + assertNull(wc.c.getLocalizedResources()); + // Now in DONE, issue RESOURCE_FAILED as done by LocalizeRunner + wc.resourceFailedContainer(); + // Verify still in DONE + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test @SuppressWarnings("unchecked") // mocked generic @@ -517,6 +664,38 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerKilled() } @Test + public void testDockerKillOnLocalizedWhenContainerNotLaunchedContainerKilled() + throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); + ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + launcher.call(); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyDockerContainerCleanupCall(wc); + int killed = metrics.getKilledContainers(); + wc.c.handle(new ContainerEvent(wc.c.getContainerId(), + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(killed + 1, metrics.getKilledContainers()); + assertEquals(0, metrics.getRunningContainers()); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test public void testKillOnLocalizedWhenContainerNotLaunchedContainerSuccess() throws Exception { WrappedContainer wc = null; @@ -573,6 +752,35 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerFailure() } @Test + public void testDockerKillOnLocalizedContainerNotLaunchedContainerFailure() + throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + wc.containerFailed(ExitCode.FORCE_KILLED.getExitCode()); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyDockerContainerCleanupCall(wc); + wc.c.handle(new ContainerEvent(wc.c.getContainerId(), + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(0, metrics.getRunningContainers()); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test public void testKillOnLocalizedWhenContainerLaunched() throws Exception { WrappedContainer wc = null; try { @@ -596,6 +804,33 @@ public void testKillOnLocalizedWhenContainerLaunched() throws Exception { } } } + + @Test + public void testDockerKillOnLocalizedWhenContainerLaunched() + throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); + ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); + launcher.call(); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyDockerContainerCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test public void testResourceLocalizedOnLocalizationFailed() throws Exception { @@ -733,6 +968,29 @@ public void testLaunchAfterKillRequest() throws Exception { } } } + + @Test + public void testDockerContainerLaunchAfterKillRequest() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(14, 314159265358979L, 4344, "yak"); + wc.setupDockerContainerEnv(); + wc.initContainer(); + wc.localizeResources(); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + wc.launchContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + wc.containerKilledOnRequest(); + verifyDockerContainerCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test public void testContainerRetry() throws Exception{ @@ -843,6 +1101,14 @@ private void verifyOutofBandHeartBeat(WrappedContainer wc) { verify(wc.context.getNodeStatusUpdater()).sendOutofBandHeartBeat(); } + private void verifyDockerContainerCleanupCall(WrappedContainer wc) + throws Exception { + DeletionService delService = wc.context.getDeletionService(); + verify(delService, times(1)).delete(argThat( + new DockerContainerDeletionMatcher(delService, + wc.c.getContainerId().toString()))); + } + private static class ResourcesReleasedMatcher extends ArgumentMatcher { final HashSet resources = @@ -971,6 +1237,7 @@ public boolean matches(Object o) { final Map localResources; final Map serviceData; final Context context = mock(Context.class); + private final DeletionService delService; private final Map initStateToEvent = new HashMap<>(); private final Map eventToFinalState = @@ -1004,6 +1271,7 @@ public boolean matches(Object o) { auxBus = mock(EventHandler.class); appBus = mock(EventHandler.class); LogBus = mock(EventHandler.class); + delService = mock(DeletionService.class); schedBus = new ContainerScheduler(context, dispatcher, metrics, 0) { @Override protected void scheduleContainer(Container container) { @@ -1081,6 +1349,7 @@ protected void scheduleContainer(Container container) { } when(ctxt.getServiceData()).thenReturn(serviceData); when(ctxt.getContainerRetryContext()).thenReturn(containerRetryContext); + when(context.getDeletionService()).thenReturn(delService); ContainerStateTransitionListener listener = new ContainerStateTransitionListener() { @Override @@ -1213,6 +1482,20 @@ public void containerResourcesCleanup() { drainDispatcherEvents(); } + public void dockerContainerResourcesCleanup() { + c.handle(new ContainerEvent(cId, + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + verify(delService, times(1)).delete(argThat( + new DockerContainerDeletionMatcher(delService, cId.toString()))); + drainDispatcherEvents(); + } + + public void setupDockerContainerEnv() { + Map env = new HashMap<>(); + env.put(ContainerRuntimeConstants.ENV_CONTAINER_TYPE, "docker"); + when(this.ctxt.getEnvironment()).thenReturn(env); + } + public void containerFailed(int exitCode) { String diagnosticMsg = "Container completed with exit code " + exitCode; c.handle(new ContainerExitEvent(cId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionMatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionMatcher.java new file mode 100644 index 0000000..8fa56fc --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionMatcher.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task; + +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.mockito.ArgumentMatcher; + +/** + * ArgumentMatcher to check the arguments of the + * {@link DockerContainerDeletionTask}. + */ +public class DockerContainerDeletionMatcher + extends ArgumentMatcher { + + private final DeletionService delService; + private final String containerId; + + public DockerContainerDeletionMatcher(DeletionService delService, + String containerId) { + this.delService = delService; + this.containerId = containerId; + } + + @Override + public boolean matches(Object o) { + DockerContainerDeletionTask task = (DockerContainerDeletionTask)o; + if (task.getContainerId() == null && containerId == null) { + return true; + } + if (task.getContainerId() != null && containerId != null) { + return task.getContainerId().equals(containerId); + } + return false; + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/TestDockerContainerDeletionTask.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/TestDockerContainerDeletionTask.java new file mode 100644 index 0000000..738a1dd --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/TestDockerContainerDeletionTask.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task; + +import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; + +/** + * Test the attributes of the {@link DockerContainerDeletionTask} class. + */ +public class TestDockerContainerDeletionTask { + + private static final int ID = 0; + private static final String USER = "user"; + private static final String CONTAINER_ID = "container_e123_123456_000001"; + + private DeletionService deletionService; + private DockerContainerDeletionTask deletionTask; + + @Before + public void setUp() throws Exception { + deletionService = mock(DeletionService.class); + deletionTask = new DockerContainerDeletionTask(ID, deletionService, USER, + CONTAINER_ID); + } + + @Test + public void testGetUser() { + assertEquals(USER, deletionTask.getUser()); + } + + @Test + public void testGetContainerId() { + assertEquals(CONTAINER_ID, deletionTask.getContainerId()); + } + + @Test + public void testConvertDeletionTaskToProto() { + YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto proto = + deletionTask.convertDeletionTaskToProto(); + assertEquals(ID, proto.getId()); + assertEquals(USER, proto.getUser()); + assertEquals(CONTAINER_ID, proto.getDockerContainerId()); + assertEquals(DeletionTaskType.DOCKER_CONTAINER.name(), proto.getTaskType()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index 4d32427..d3a8bc2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -38,7 +38,11 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerKillCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRmCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerStopCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; @@ -89,12 +93,12 @@ import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.SIGNAL; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.USER; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.USER_LOCAL_DIRS; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyBoolean; import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.anyMap; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -242,7 +246,7 @@ private PrivilegedOperation capturePrivilegedOperation() // warning annotation on the entire method verify(mockExecutor, times(1)) .executePrivilegedOperation(anyList(), opCaptor.capture(), any( - File.class), eq(null), eq(false), eq(false)); + File.class), anyMap(), anyBoolean(), anyBoolean()); //verification completed. we need to isolate specific invications. // hence, reset mock here @@ -1168,11 +1172,12 @@ public void testContainerLivelinessCheck() } @Test - public void testDockerStopOnTermSignal() + public void testDockerStopOnTermSignalWhenRunning() throws ContainerExecutionException, PrivilegedOperationException, IOException { List dockerCommands = getDockerCommandsForSignal( - ContainerExecutor.Signal.TERM); + ContainerExecutor.Signal.TERM, + DockerCommandExecutor.DockerContainerStatus.RUNNING); Assert.assertEquals(3, dockerCommands.size()); Assert.assertEquals("[docker-command-execution]", dockerCommands.get(0)); Assert.assertEquals(" docker-command=stop", dockerCommands.get(1)); @@ -1180,11 +1185,12 @@ public void testDockerStopOnTermSignal() } @Test - public void testDockerStopOnKillSignal() + public void testDockerStopOnKillSignalWhenRunning() throws ContainerExecutionException, PrivilegedOperationException, IOException { List dockerCommands = getDockerCommandsForSignal( - ContainerExecutor.Signal.KILL); + ContainerExecutor.Signal.KILL, + DockerCommandExecutor.DockerContainerStatus.RUNNING); Assert.assertEquals(3, dockerCommands.size()); Assert.assertEquals("[docker-command-execution]", dockerCommands.get(0)); Assert.assertEquals(" docker-command=stop", dockerCommands.get(1)); @@ -1192,24 +1198,57 @@ public void testDockerStopOnKillSignal() } @Test - public void testDockerStopOnQuitSignal() - throws ContainerExecutionException, PrivilegedOperationException, - IOException { + public void testDockerKillOnQuitSignalWhenRunning() throws Exception { List dockerCommands = getDockerCommandsForSignal( - ContainerExecutor.Signal.QUIT); - Assert.assertEquals(3, dockerCommands.size()); + ContainerExecutor.Signal.QUIT, + DockerCommandExecutor.DockerContainerStatus.RUNNING); + Assert.assertEquals(4, dockerCommands.size()); Assert.assertEquals("[docker-command-execution]", dockerCommands.get(0)); - Assert.assertEquals(" docker-command=stop", dockerCommands.get(1)); + Assert.assertEquals(" docker-command=kill", dockerCommands.get(1)); Assert.assertEquals(" name=container_id", dockerCommands.get(2)); + Assert.assertEquals(" signal=QUIT", dockerCommands.get(3)); + } + + @Test + public void testDockerRmOnWhenExited() throws Exception { + env.put(DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_DELAYED_REMOVAL, + "false"); + conf.set(YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL, "true"); + MockRuntime runtime = new MockRuntime(mockExecutor, + DockerCommandExecutor.DockerContainerStatus.EXITED, true); + builder.setExecutionAttribute(RUN_AS_USER, runAsUser) + .setExecutionAttribute(USER, user); + runtime.initialize(enableMockContainerExecutor(conf), null); + runtime.reapContainer(builder.build()); + verify(mockExecutor, times(1)) + .executePrivilegedOperation(anyList(), any(), any( + File.class), anyMap(), anyBoolean(), anyBoolean()); + } + + @Test + public void testNoDockerRmWhenDelayedDeletionEnabled() + throws Exception { + env.put(DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_DELAYED_REMOVAL, + "true"); + conf.set(YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL, "true"); + MockRuntime runtime = new MockRuntime(mockExecutor, + DockerCommandExecutor.DockerContainerStatus.EXITED, true); + builder.setExecutionAttribute(RUN_AS_USER, runAsUser) + .setExecutionAttribute(USER, user); + runtime.initialize(enableMockContainerExecutor(conf), null); + runtime.reapContainer(builder.build()); + verify(mockExecutor, never()) + .executePrivilegedOperation(anyList(), any(), any( + File.class), anyMap(), anyBoolean(), anyBoolean()); } private List getDockerCommandsForSignal( - ContainerExecutor.Signal signal) + ContainerExecutor.Signal signal, + DockerCommandExecutor.DockerContainerStatus status) throws ContainerExecutionException, PrivilegedOperationException, IOException { - DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime( - mockExecutor, mockCGroupsHandler); + MockRuntime runtime = new MockRuntime(mockExecutor, status, false); builder.setExecutionAttribute(RUN_AS_USER, runAsUser) .setExecutionAttribute(USER, user) .setExecutionAttribute(PID, signalPid) @@ -1576,4 +1615,69 @@ public void testDockerCapabilities() Assert.assertEquals("CHOWN", it.next()); Assert.assertEquals("DAC_OVERRIDE", it.next()); } + + class MockRuntime extends DockerLinuxContainerRuntime { + + private PrivilegedOperationExecutor privilegedOperationExecutor; + private DockerCommandExecutor.DockerContainerStatus containerStatus; + private boolean delayedRemovalAllowed; + + MockRuntime(PrivilegedOperationExecutor privilegedOperationExecutor, + DockerCommandExecutor.DockerContainerStatus containerStatus, + boolean delayedRemovalAllowed) { + super(privilegedOperationExecutor); + this.privilegedOperationExecutor = privilegedOperationExecutor; + this.containerStatus = containerStatus; + this.delayedRemovalAllowed = delayedRemovalAllowed; + } + + @Override public void signalContainer(ContainerRuntimeContext ctx) + throws ContainerExecutionException { + ContainerExecutor.Signal signal = ctx.getExecutionAttribute(SIGNAL); + String containerName = ctx.getContainer().getContainerId().toString(); + Map environment = + ctx.getContainer().getLaunchContext().getEnvironment(); + try { + if (ContainerExecutor.Signal.KILL.equals(signal) + || ContainerExecutor.Signal.TERM.equals(signal)) { + if (DockerCommandExecutor.isStoppable(containerStatus)) { + DockerStopCommand dockerStopCommand = + new DockerStopCommand(containerName); + DockerCommandExecutor.executeDockerCommand(dockerStopCommand, + containerName, environment, conf, mockExecutor, false); + } + } else { + if (DockerCommandExecutor.isKillable(containerStatus)) { + DockerKillCommand dockerKillCommand = + new DockerKillCommand(containerName); + dockerKillCommand.setSignal(signal.name()); + DockerCommandExecutor.executeDockerCommand(dockerKillCommand, + containerName, environment, conf, mockExecutor, false); + } + } + } catch (ContainerExecutionException e) { + LOG.warn("Signal docker container failed. Exception: ", e); + throw new ContainerExecutionException("Signal docker container failed", + e.getExitCode(), e.getOutput(), e.getErrorOutput()); + } + } + + @Override + public void reapContainer(ContainerRuntimeContext ctx) + throws ContainerExecutionException { + String delayedRemoval = env.get(ENV_DOCKER_CONTAINER_DELAYED_REMOVAL); + if (delayedRemovalAllowed && delayedRemoval != null + && delayedRemoval.equalsIgnoreCase("true")) { + LOG.info("Delayed removal requested and allowed, skipping removal - " + + containerId); + } else { + if (DockerCommandExecutor.isRemovable(containerStatus)) { + DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId); + DockerCommandExecutor + .executeDockerCommand(dockerRmCommand, containerId, env, conf, + privilegedOperationExecutor, false); + } + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java index c362787..94da90b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java @@ -19,6 +19,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.MockPrivilegedOperationCaptor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; @@ -42,6 +43,8 @@ import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.CONTAINER_ID_STR; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor.DockerContainerStatus; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; @@ -93,9 +96,8 @@ public void setUp() throws Exception { public void testExecuteDockerCommand() throws Exception { DockerStopCommand dockerStopCommand = new DockerStopCommand(MOCK_CONTAINER_ID); - DockerCommandExecutor - .executeDockerCommand(dockerStopCommand, cId.toString(), env, - configuration, mockExecutor, false); + DockerCommandExecutor.executeDockerCommand(dockerStopCommand, + cId.toString(), env, configuration, mockExecutor, false); List ops = MockPrivilegedOperationCaptor .capturePrivilegedOperations(mockExecutor, 1, true); assertEquals(1, ops.size()); @@ -106,9 +108,8 @@ public void testExecuteDockerCommand() throws Exception { @Test public void testExecuteDockerRm() throws Exception { DockerRmCommand dockerCommand = new DockerRmCommand(MOCK_CONTAINER_ID); - DockerCommandExecutor - .executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, env, - configuration, mockExecutor, false); + DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, + env, configuration, mockExecutor, false); List ops = MockPrivilegedOperationCaptor .capturePrivilegedOperations(mockExecutor, 1, true); List dockerCommands = getValidatedDockerCommands(ops); @@ -124,9 +125,8 @@ public void testExecuteDockerRm() throws Exception { @Test public void testExecuteDockerStop() throws Exception { DockerStopCommand dockerCommand = new DockerStopCommand(MOCK_CONTAINER_ID); - DockerCommandExecutor - .executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, env, - configuration, mockExecutor, false); + DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, + env, configuration, mockExecutor, false); List ops = MockPrivilegedOperationCaptor .capturePrivilegedOperations(mockExecutor, 1, true); List dockerCommands = getValidatedDockerCommands(ops); @@ -143,9 +143,8 @@ public void testExecuteDockerStop() throws Exception { public void testExecuteDockerInspectStatus() throws Exception { DockerInspectCommand dockerCommand = new DockerInspectCommand(MOCK_CONTAINER_ID).getContainerStatus(); - DockerCommandExecutor - .executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, env, - configuration, mockExecutor, false); + DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, + env, configuration, mockExecutor, false); List ops = MockPrivilegedOperationCaptor .capturePrivilegedOperations(mockExecutor, 1, true); List dockerCommands = getValidatedDockerCommands(ops); @@ -164,9 +163,8 @@ public void testExecuteDockerInspectStatus() throws Exception { public void testExecuteDockerPull() throws Exception { DockerPullCommand dockerCommand = new DockerPullCommand(MOCK_IMAGE_NAME); - DockerCommandExecutor - .executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, env, - configuration, mockExecutor, false); + DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, + env, configuration, mockExecutor, false); List ops = MockPrivilegedOperationCaptor .capturePrivilegedOperations(mockExecutor, 1, true); List dockerCommands = getValidatedDockerCommands(ops); @@ -183,9 +181,8 @@ public void testExecuteDockerPull() throws Exception { public void testExecuteDockerLoad() throws Exception { DockerLoadCommand dockerCommand = new DockerLoadCommand(MOCK_LOCAL_IMAGE_NAME); - DockerCommandExecutor - .executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, env, - configuration, mockExecutor, false); + DockerCommandExecutor.executeDockerCommand(dockerCommand, MOCK_CONTAINER_ID, + env, configuration, mockExecutor, false); List ops = MockPrivilegedOperationCaptor .capturePrivilegedOperations(mockExecutor, 1, true); List dockerCommands = getValidatedDockerCommands(ops); @@ -206,11 +203,140 @@ public void testGetContainerStatus() throws Exception { when(mockExecutor.executePrivilegedOperation(eq(null), any(PrivilegedOperation.class), eq(null), any(), eq(true), eq(false))) .thenReturn(status.getName()); - assertEquals(status, DockerCommandExecutor - .getContainerStatus(MOCK_CONTAINER_ID, configuration, mockExecutor)); + assertEquals(status, DockerCommandExecutor.getContainerStatus( + MOCK_CONTAINER_ID, configuration, mockExecutor)); } } + @Test + public void testExecuteDockerKillSIGQUIT() throws Exception { + DockerKillCommand dockerKillCommand = + new DockerKillCommand(MOCK_CONTAINER_ID) + .setSignal(ContainerExecutor.Signal.QUIT.name()); + DockerCommandExecutor.executeDockerCommand(dockerKillCommand, + MOCK_CONTAINER_ID, env, configuration, mockExecutor, false); + List ops = MockPrivilegedOperationCaptor + .capturePrivilegedOperations(mockExecutor, 1, true); + List dockerCommands = getValidatedDockerCommands(ops); + assertEquals(1, ops.size()); + assertEquals(PrivilegedOperation.OperationType.RUN_DOCKER_CMD.name(), + ops.get(0).getOperationType().name()); + assertEquals(4, dockerCommands.size()); + assertEquals("[docker-command-execution]", dockerCommands.get(0)); + assertEquals(" docker-command=kill", dockerCommands.get(1)); + assertEquals(" name=" + MOCK_CONTAINER_ID, dockerCommands.get(2)); + assertEquals(" signal=" + ContainerExecutor.Signal.QUIT.name(), + dockerCommands.get(3)); + } + + @Test + public void testExecuteDockerKillSIGKILL() throws Exception { + DockerKillCommand dockerKillCommand = + new DockerKillCommand(MOCK_CONTAINER_ID) + .setSignal(ContainerExecutor.Signal.KILL.name()); + DockerCommandExecutor.executeDockerCommand(dockerKillCommand, + MOCK_CONTAINER_ID, env, configuration, mockExecutor, false); + List ops = MockPrivilegedOperationCaptor + .capturePrivilegedOperations(mockExecutor, 1, true); + List dockerCommands = getValidatedDockerCommands(ops); + assertEquals(1, ops.size()); + assertEquals(PrivilegedOperation.OperationType.RUN_DOCKER_CMD.name(), + ops.get(0).getOperationType().name()); + assertEquals(4, dockerCommands.size()); + assertEquals("[docker-command-execution]", dockerCommands.get(0)); + assertEquals(" docker-command=kill", dockerCommands.get(1)); + assertEquals(" name=" + MOCK_CONTAINER_ID, dockerCommands.get(2)); + assertEquals(" signal=" + ContainerExecutor.Signal.KILL.name(), + dockerCommands.get(3)); + } + + @Test + public void testExecuteDockerKillSIGTERM() throws Exception { + DockerKillCommand dockerKillCommand = + new DockerKillCommand(MOCK_CONTAINER_ID) + .setSignal(ContainerExecutor.Signal.TERM.name()); + DockerCommandExecutor.executeDockerCommand(dockerKillCommand, + MOCK_CONTAINER_ID, env, configuration, mockExecutor, false); + List ops = MockPrivilegedOperationCaptor + .capturePrivilegedOperations(mockExecutor, 1, true); + List dockerCommands = getValidatedDockerCommands(ops); + assertEquals(1, ops.size()); + assertEquals(PrivilegedOperation.OperationType.RUN_DOCKER_CMD.name(), + ops.get(0).getOperationType().name()); + assertEquals(4, dockerCommands.size()); + assertEquals("[docker-command-execution]", dockerCommands.get(0)); + assertEquals(" docker-command=kill", dockerCommands.get(1)); + assertEquals(" name=" + MOCK_CONTAINER_ID, dockerCommands.get(2)); + assertEquals(" signal=" + ContainerExecutor.Signal.TERM.name(), + dockerCommands.get(3)); + } + + @Test + public void testIsStoppable() { + assertTrue(DockerCommandExecutor.isStoppable( + DockerContainerStatus.RUNNING)); + assertTrue(DockerCommandExecutor.isStoppable( + DockerContainerStatus.RESTARTING)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.EXITED)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.CREATED)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.DEAD)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.NONEXISTENT)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.REMOVING)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.STOPPED)); + assertFalse(DockerCommandExecutor.isStoppable( + DockerContainerStatus.UNKNOWN)); + } + + @Test + public void testIsKIllable() { + assertTrue(DockerCommandExecutor.isKillable( + DockerContainerStatus.RUNNING)); + assertTrue(DockerCommandExecutor.isKillable( + DockerContainerStatus.RESTARTING)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.EXITED)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.CREATED)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.DEAD)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.NONEXISTENT)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.REMOVING)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.STOPPED)); + assertFalse(DockerCommandExecutor.isKillable( + DockerContainerStatus.UNKNOWN)); + } + + @Test + public void testIsRemovable() { + assertTrue(DockerCommandExecutor.isRemovable( + DockerContainerStatus.STOPPED)); + assertTrue(DockerCommandExecutor.isRemovable( + DockerContainerStatus.RESTARTING)); + assertTrue(DockerCommandExecutor.isRemovable( + DockerContainerStatus.EXITED)); + assertTrue(DockerCommandExecutor.isRemovable( + DockerContainerStatus.CREATED)); + assertTrue(DockerCommandExecutor.isRemovable( + DockerContainerStatus.DEAD)); + assertFalse(DockerCommandExecutor.isRemovable( + DockerContainerStatus.NONEXISTENT)); + assertFalse(DockerCommandExecutor.isRemovable( + DockerContainerStatus.REMOVING)); + assertFalse(DockerCommandExecutor.isRemovable( + DockerContainerStatus.UNKNOWN)); + assertFalse(DockerCommandExecutor.isRemovable( + DockerContainerStatus.RUNNING)); + } + private List getValidatedDockerCommands( List ops) throws IOException { try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerKillCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerKillCommand.java new file mode 100644 index 0000000..cd3de2a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerKillCommand.java @@ -0,0 +1,61 @@ +/* + * * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * / + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.util.StringUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests the docker kill command and its command line arguments. + */ +public class TestDockerKillCommand { + + private DockerKillCommand dockerKillCommand; + + private static final String SIGNAL = "SIGUSR2"; + private static final String CONTAINER_NAME = "foo"; + + @Before + public void setup() { + dockerKillCommand = new DockerKillCommand(CONTAINER_NAME); + } + + @Test + public void testGetCommandOption() { + assertEquals("kill", dockerKillCommand.getCommandOption()); + } + + @Test + public void testSetGracePeriod() { + dockerKillCommand.setSignal(SIGNAL); + assertEquals("kill", StringUtils.join(",", + dockerKillCommand.getDockerCommandWithArguments() + .get("docker-command"))); + assertEquals("foo", StringUtils.join(",", + dockerKillCommand.getDockerCommandWithArguments().get("name"))); + assertEquals("SIGUSR2", StringUtils.join(",", + dockerKillCommand.getDockerCommandWithArguments().get("signal"))); + assertEquals(3, dockerKillCommand.getDockerCommandWithArguments().size()); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java index 2cca277..d7d826c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java @@ -42,6 +42,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; @@ -90,6 +91,11 @@ public boolean signalContainer(ContainerSignalContext ctx) return true; } @Override + public boolean reapContainer(ContainerReapContext ctx) + throws IOException { + return true; + } + @Override public void deleteAsUser(DeletionAsUserContext ctx) throws IOException, InterruptedException { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/executor/TestContainerReapContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/executor/TestContainerReapContext.java new file mode 100644 index 0000000..2cfe29a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/executor/TestContainerReapContext.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.executor; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; + +/** + * Test the attributes of the {@link ContainerReapContext}. + */ +public class TestContainerReapContext { + private final static String USER = "user"; + + private Container container; + private ContainerReapContext context; + + @Before + public void setUp() { + container = mock(Container.class); + context = new ContainerReapContext.Builder() + .setUser(USER) + .setContainer(container) + .build(); + } + + @Test + public void getContainer() { + assertEquals(container, context.getContainer()); + } + + @Test + public void getUser() { + assertEquals(USER, context.getUser()); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md index 1a50c92..a3e4105 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md @@ -291,6 +291,7 @@ environment variables in the application's environment: | `YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER` | Controls whether the Docker container is a privileged container. In order to use privileged containers, the yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed property must be set to true, and the application owner must appear in the value of the yarn.nodemanager.runtime.linux.docker.privileged-containers.acl property. If this environment variable is set to true, a privileged Docker container will be used if allowed. No other value is allowed, so the environment variable should be left unset rather than setting it to false. | | `YARN_CONTAINER_RUNTIME_DOCKER_LOCAL_RESOURCE_MOUNTS` | Adds additional volume mounts to the Docker container. The value of the environment variable should be a comma-separated list of mounts. All such mounts must be given as "source:dest", where the source is an absolute path that is not a symlink and that points to a localized resource. Note that as of YARN-5298, localized directories are automatically mounted into the container as volumes. | | `YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS` | Adds additional volume mounts to the Docker container. The value of the environment variable should be a comma-separated list of mounts. All such mounts must be given as "source:dest:mode" and the mode must be "ro" (read-only) or "rw" (read-write) to specify the type of access being requested. The requested mounts will be validated by container-executor based on the values set in container-executor.cfg for docker.allowed.ro-mounts and docker.allowed.rw-mounts. | +| `YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL` | Allows a user to request delayed deletion of the Docker container on a per container basis. If true, Docker containers will not be removed until the duration defined by yarn.nodemanager.delete.debug-delay-sec has elapsed. Administrators can disable this feature through the yarn-site property yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed. This feature is disabled by default. When this feature is disabled or set to false, the container will be removed as soon as it exits. | The first two are required. The remainder can be set as needed. While controlling the container type through environment variables is somewhat less