diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index f75ead2e0dd..8ebddb06175 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -764,6 +764,8 @@ public boolean signalContainer(ContainerSignalContext ctx) @Override public boolean reapContainer(ContainerReapContext ctx) throws IOException { Container container = ctx.getContainer(); + postComplete(container.getContainerId()); + String user = ctx.getUser(); String runAsUser = getRunAsUser(user); ContainerRuntimeContext runtimeContext = new ContainerRuntimeContext @@ -783,8 +785,6 @@ public boolean reapContainer(ContainerReapContext ctx) throws IOException { logOutput(e.getOutput()); throw new IOException("Error in reaping container " + container.getContainerId().toString() + " exit = " + retCode, e); - } finally { - postComplete(container.getContainerId()); } return true; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java index 92a82e8fbcd..76dc52d32f3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java @@ -56,7 +56,8 @@ LIST_AS_USER(""), // no CLI switch supported yet. ADD_NUMA_PARAMS(""), // no CLI switch supported yet. REMOVE_DOCKER_CONTAINER("--remove-docker-container"), - INSPECT_DOCKER_CONTAINER("--inspect-docker-container"); + INSPECT_DOCKER_CONTAINER("--inspect-docker-container"), + CLEAN_DOCKER_CGROUPS("--clean-docker-cgroups"); private final String option; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 1872830533f..04758194a63 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -1280,6 +1280,27 @@ private void handleContainerKill(ContainerRuntimeContext ctx, } } + private void cleanCGroups(String containerId) { + // Nothing to do if cgroups are not being used + if (cGroupsHandler == null) { + return; + } + String cGroupPath = cGroupsHandler.getRelativePathForCGroup(""); + if (cGroupPath == null || cGroupPath.isEmpty()) { + return; + } + cGroupPath = cGroupPath.replaceAll("/$",""); + try { + PrivilegedOperation cleanCGroupsOp = new PrivilegedOperation( + PrivilegedOperation.OperationType.CLEAN_DOCKER_CGROUPS, cGroupPath); + cleanCGroupsOp.appendArgs(containerId); + privilegedOperationExecutor.executePrivilegedOperation( + cleanCGroupsOp, false); + } catch (PrivilegedOperationException e) { + LOG.warn("Exception in DockerLinuxContainerRuntime.cleanCGroups", e); + } + } + private void handleContainerRemove(String containerId, Map env) throws ContainerExecutionException { String delayedRemoval = env.get(ENV_DOCKER_CONTAINER_DELAYED_REMOVAL); @@ -1296,6 +1317,7 @@ private void handleContainerRemove(String containerId, DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId, env, privilegedOperationExecutor, false, nmContext); } + cleanCGroups(containerId); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index f8b89ee2d67..15d267cfc21 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -46,6 +46,7 @@ #include #include #include +#include #ifndef HAVE_FCHMODAT #include "compat/fchmodat.h" @@ -2477,6 +2478,132 @@ cleanup: #endif } +#define CONTAINER_ID_PREFIX "container_" + +int clean_docker_cgroups_internal(const char *mount_table, + const char *yarn_hierarchy, + const char* container_id) { +#ifndef __linux + fprintf(LOGFILE, "Failed to clean cgroups, not supported\n"); + return -1; +#else + const char * CGROUP_MOUNT = "cgroup"; + char *mnt_type = NULL; + char *mnt_dir = NULL; + char *full_path = NULL; + char *lineptr = NULL; + FILE *fp = NULL; + int rc = 0; + size_t buf_size = 0; + + if (!mount_table || mount_table[0] == 0) { + fprintf(ERRORFILE, "clean_docker_cgroups: Invalid mount table\n"); + rc = -1; + goto cleanup; + } + if (!yarn_hierarchy || yarn_hierarchy[0] == 0) { + fprintf(ERRORFILE, "clean_docker_cgroups: Invalid yarn_hierarchy\n"); + rc = -1; + goto cleanup; + } + if (!container_id || container_id[0] == 0) { + fprintf(ERRORFILE, "clean_docker_cgroups: Invalid container_id\n"); + rc = -1; + goto cleanup; + } + if (strncmp(container_id, CONTAINER_ID_PREFIX, (sizeof(CONTAINER_ID_PREFIX) - 1)) != 0) { + fprintf(ERRORFILE, "clean_docker_cgroups: Invalid container_id: %s\n", container_id); + rc = -1; + goto cleanup; + } + full_path = malloc(PATH_MAX); + if (!full_path) { + fprintf(ERRORFILE, "clean_docker_cgroups: Failed to allocate memory for cgroup paths.\n"); + rc = -1; + goto cleanup; + } + fp = fopen(mount_table, "r"); + if (fp == NULL) { + fprintf(ERRORFILE, "clean_docker_cgroups: failed to open %s, error %d: %s\n", + mount_table, errno, strerror(errno)); + rc = -1; + goto cleanup; + } + + // Walk /proc/mounts and find cgroup mounts + while (getline(&lineptr, &buf_size, fp) != -1) { + // Free these from the last iteration, if set + free(mnt_type); + free(mnt_dir); + int ret = 0; + ret = sscanf(lineptr, " %ms %ms %*s %*s %*s %*s", &mnt_type, &mnt_dir); + if (ret != 2) { + fprintf(ERRORFILE, "clean_docker_cgroups: Failed to parse line: %s\n", lineptr); + rc = -1; + break; + } + if ((mnt_type == NULL) || (strcmp(mnt_type, CGROUP_MOUNT) != 0)) { + continue; + } + if ((mnt_dir == NULL) || (mnt_dir[0] == 0)) { + fprintf(ERRORFILE, "clean_docker_cgroups: skipping mount entry with invalid mnt_dir\n"); + continue; + } + + full_path[0] = '\0'; + if (snprintf(full_path, PATH_MAX, "%s/%s/%s", + mnt_dir, yarn_hierarchy, container_id) < 0) { + fprintf(ERRORFILE, "clean_docker_cgroups: Failed to print output path.\n"); + rc = -1; + break; + } + + // Make sure path is clean + if (!verify_path_safety(full_path)) { + fprintf(ERRORFILE, + "clean_docker_cgroups: skipping invalid path: %s\n", full_path); + continue; + } + + // If the dir does not exist, there is nothing to delete + if (dir_exists(full_path) != 0) { + fprintf(ERRORFILE, "clean_docker_cgroups: skipping path (does not exist) - %s\n", full_path); + continue; + } + + fprintf(LOGFILE, "Cleaning cgroup: path=%s\n", full_path); + + ret = rmdir(full_path); + if ((ret != 0) && (ret != EBUSY)) { + fprintf(ERRORFILE, "clean_docker_cgroups: Failed to rmdir cgroup, %s (error=%s)\n", + full_path, strerror(ret)); + rc = -1; + continue; + } + } + if (ferror(fp)) { + fprintf(ERRORFILE, "clean_docker_cgroups: Error reading %s, error=%d (%s) \n", + mount_table, errno, strerror(errno)); + rc = -1; + } + free(lineptr); + +cleanup: + free(mnt_type); + free(mnt_dir); + free(full_path); + if (fp != NULL) { + fclose(fp); + } + return rc; +#endif +} + +int clean_docker_cgroups(const char *yarn_hierarchy, const char* container_id) { + const char *PROC_MOUNT_PATH = "/proc/mounts"; + return clean_docker_cgroups_internal(PROC_MOUNT_PATH, yarn_hierarchy, container_id); +} + static int run_traffic_control(const char *opts[], char *command_file) { const int max_tc_args = 16; const char *args[max_tc_args]; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index 002f85f34d0..aba5e085259 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -49,7 +49,8 @@ enum operations { RUN_DOCKER = 11, RUN_AS_USER_LIST = 12, REMOVE_DOCKER_CONTAINER = 13, - INSPECT_DOCKER_CONTAINER = 14 + INSPECT_DOCKER_CONTAINER = 14, + CLEAN_DOCKER_CGROUPS = 15 }; #define NM_GROUP_KEY "yarn.nodemanager.linux-container-executor.group" @@ -292,3 +293,16 @@ struct configuration* get_cfg(); * Flatten docker launch command */ char* flatten(char **args); + +/** + * Clean up cgroups for the container + * - yarn_hierarchy: e.g. hadoop-yarn + * - container_id: e.g. container_x_y + * returns 0 if succeeded + */ +int clean_docker_cgroups(const char *yarn_hierarchy, const char* container_id); + +/** + * For testing + */ +int clean_docker_cgroups_internal(const char *mount_table, const char *yarn_hierarchy, const char* container_id); \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index 93691f95893..bec12fd830a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -55,13 +55,15 @@ static void display_usage(FILE *stream) { fprintf(stream, " container-executor --run-docker \n" " container-executor --remove-docker-container \n" - " container-executor --inspect-docker-container \n"); + " container-executor --inspect-docker-container \n" + " container-executor --clean-docker-cgroups \n"); } else { fprintf(stream, "[DISABLED] container-executor --run-docker \n" "[DISABLED] container-executor --remove-docker-container \n" "[DISABLED] container-executor --inspect-docker-container " - " ... \n"); + " ... \n" + "[DISABLED] container-executor --clean-docker-cgroups \n"); } fprintf(stream, @@ -379,6 +381,23 @@ static int validate_arguments(int argc, char **argv , int *operation) { } } + if (strcmp("--clean-docker-cgroups", argv[1]) == 0) { + if (is_docker_support_enabled()) { + if (argc != 4) { + display_usage(stdout); + return INVALID_ARGUMENT_NUMBER; + } + optind++; + cmd_input.cgroups_hierarchy = argv[optind++]; + cmd_input.container_id = argv[optind++]; + *operation = CLEAN_DOCKER_CGROUPS; + return 0; + } else { + display_feature_disabled_message("clean docker cgroups"); + return FEATURE_DISABLED; + } + } + /* Now we have to validate 'run as user' operations that don't use a 'long option' - we should fix this at some point. The validation/argument parsing here is extensive enough that it done in a separate function */ @@ -599,6 +618,9 @@ int main(int argc, char **argv) { case INSPECT_DOCKER_CONTAINER: exit_code = exec_docker_command("inspect", argv, argc, optind); break; + case CLEAN_DOCKER_CGROUPS: + exit_code = clean_docker_cgroups(cmd_input.cgroups_hierarchy, cmd_input.container_id); + break; case RUN_AS_USER_INITIALIZE_CONTAINER: exit_code = set_user(cmd_input.run_as_user_name); if (exit_code != 0) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c index 5607823c666..ea39ecb3710 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c @@ -23,6 +23,7 @@ #include "test/test-container-executor-common.h" #include +#include #include #include #include @@ -32,6 +33,8 @@ #include #include #include +#include + static char* username = NULL; static char* yarn_username = NULL; @@ -1224,6 +1227,145 @@ void test_is_empty() { } } +#define TCE_FAKE_CGROOT TEST_ROOT "/cgroup_root" +#define TCE_NUM_CG_CONTROLLERS 6 + +void test_cleaning_docker_cgroups() { + const char *controllers[TCE_NUM_CG_CONTROLLERS] = { "blkio", "cpu", "cpuset", "devices", "memory", "systemd" }; + const char *yarn_hierarchy = "hadoop-yarn"; + const char *fake_mount_table = TEST_ROOT "/fake_mounts"; + const char *container_id = "container_123456789_1234"; + const char *other_container_id = "container_98765432_4321"; + char cgroup_paths[TCE_NUM_CG_CONTROLLERS][PATH_MAX]; + char container_paths[TCE_NUM_CG_CONTROLLERS][PATH_MAX]; + char other_container_paths[TCE_NUM_CG_CONTROLLERS][PATH_MAX]; + + printf("\nTesting clean_docker_cgroups\n"); + + // Setup fake mount table + FILE *file; + file = fopen(fake_mount_table, "w"); + if (file == NULL) { + printf("Failed to open %s.\n", fake_mount_table); + exit(1); + } + fprintf(file, "rootfs " TEST_ROOT "/fake_root rootfs rw 0 0\n"); + fprintf(file, "sysfs " TEST_ROOT "/fake_sys sysfs rw,nosuid,nodev,noexec,relatime 0 0\n"); + fprintf(file, "proc " TEST_ROOT "/fake_proc proc rw,nosuid,nodev,noexec,relatime 0 0\n"); + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + fprintf(file, "cgroup %s/%s cgroup rw,nosuid,nodev,noexec,relatime,%s 0 0\n", + TCE_FAKE_CGROOT, controllers[i], controllers[i]); + } + fprintf(file, "/dev/vda " TEST_ROOT "/fake_root ext4 rw,relatime,data=ordered 0 0\n"); + fclose(file); + + // Test with null inputs + int ret = clean_docker_cgroups_internal(NULL, yarn_hierarchy, container_id); + if (ret != -1) { + printf("FAIL: clean_docker_cgroups_internal with NULL mount table should fail\n"); + exit(1); + } + ret = clean_docker_cgroups_internal(fake_mount_table, NULL, container_id); + if (ret != -1) { + printf("FAIL: clean_docker_cgroups_internal with NULL yarn_hierarchy should fail\n"); + exit(1); + } + ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, NULL); + if (ret != -1) { + printf("FAIL: clean_docker_cgroups_internal with NULL container_id should fail\n"); + exit(1); + } + + // Test with invalid container_id + ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, "not_a_container_123"); + if (ret != -1) { + printf("FAIL: clean_docker_cgroups_internal with invalid container_id should fail\n"); + exit(1); + } + if (mkdir(TCE_FAKE_CGROOT, 0755) != 0) { + printf("FAIL: failed to mkdir " TCE_FAKE_CGROOT "\n"); + exit(1); + } + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + snprintf(cgroup_paths[i], PATH_MAX, TCE_FAKE_CGROOT "/%s/%s", controllers[i], yarn_hierarchy); + if (mkdirs(cgroup_paths[i], 0755) != 0) { + printf("FAIL: failed to mkdir %s\n", cgroup_paths[i]); + exit(1); + } + } + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + DIR *dir = NULL; + dir = opendir(cgroup_paths[i]); + if (dir == NULL) { + printf("FAIL: failed to open dir %s\n", cgroup_paths[i]); + exit(1); + } + closedir(dir); + } + // Test before creating any containers + ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, container_id); + if (ret != 0) { + printf("FAIL: failed to clean cgroups: mt=%s, yh=%s, cId=%s\n", + fake_mount_table, yarn_hierarchy, container_id); + } + // make sure hadoop-yarn dirs are still there + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + DIR *dir = NULL; + dir = opendir(cgroup_paths[i]); + if (dir == NULL) { + printf("FAIL: failed to open dir %s\n", cgroup_paths[i]); + exit(1); + } + closedir(dir); + } + // Create container dirs + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + snprintf(container_paths[i], PATH_MAX, TCE_FAKE_CGROOT "/%s/%s/%s", + controllers[i], yarn_hierarchy, container_id); + if (mkdirs(container_paths[i], 0755) != 0) { + printf("FAIL: failed to mkdir %s\n", container_paths[i]); + exit(1); + } + snprintf(other_container_paths[i], PATH_MAX, TCE_FAKE_CGROOT "/%s/%s/%s", + controllers[i], yarn_hierarchy, other_container_id); + if (mkdirs(other_container_paths[i], 0755) != 0) { + printf("FAIL: failed to mkdir %s\n", other_container_paths[i]); + exit(1); + } + } + ret = clean_docker_cgroups_internal(fake_mount_table, yarn_hierarchy, container_id); + // make sure hadoop-yarn dirs are still there + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + DIR *dir = NULL; + dir = opendir(cgroup_paths[i]); + if (dir == NULL) { + printf("FAIL: failed to open dir %s\n", cgroup_paths[i]); + exit(1); + } + closedir(dir); + } + // make sure container dirs deleted + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + DIR *dir = NULL; + dir = opendir(container_paths[i]); + if (dir != NULL) { + printf("FAIL: container cgroup %s not deleted\n", container_paths[i]); + exit(1); + } + closedir(dir); + } + // make sure other container dirs are still there + for (int i = 0; i < TCE_NUM_CG_CONTROLLERS; i++) { + DIR *dir = NULL; + dir = opendir(other_container_paths[i]); + if (dir == NULL) { + printf("FAIL: container cgroup %s should not be deleted\n", other_container_paths[i]); + exit(1); + } + closedir(dir); + } +} + // This test is expected to be executed either by a regular // user or by root. If executed by a regular user it doesn't // test all the functions that would depend on changing the @@ -1328,6 +1470,8 @@ int main(int argc, char **argv) { test_check_user(0); + test_cleaning_docker_cgroups(); + #ifdef __APPLE__ printf("OS X: disabling CrashReporter\n"); /*