diff --cc hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c index 7159374ebd5,35e5f92b6e3..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c @@@ -74,48 -73,20 +74,62 @@@ static int add_param_to_command(const s return ret; } +int check_trusted_image(const struct configuration *command_config, const struct configuration *conf) { + int found = 0; + int i = 0; + int ret = 0; + char *image_name = get_configuration_value("image", DOCKER_COMMAND_FILE_SECTION, command_config); + char **privileged_registry = get_configuration_values_delimiter("docker.privileged-containers.registries", CONTAINER_EXECUTOR_CFG_DOCKER_SECTION, conf, ","); + char *registry_ptr = NULL; + if (image_name == NULL) { + ret = INVALID_DOCKER_IMAGE_NAME; + goto free_and_exit; + } + if (privileged_registry != NULL) { + for (i = 0; privileged_registry[i] != NULL; i++) { + int len = strlen(privileged_registry[i]); + if (privileged_registry[i][len - 1] != '/') { + registry_ptr = (char *) alloc_and_clear_memory(len + 2, sizeof(char)); + strncpy(registry_ptr, privileged_registry[i], len); + registry_ptr[len] = '/'; + registry_ptr[len + 1] = '\0'; + } else { + registry_ptr = strdup(privileged_registry[i]); + } + if (strncmp(image_name, registry_ptr, strlen(registry_ptr))==0) { + fprintf(ERRORFILE, "image: %s is trusted in %s registry.\n", image_name, privileged_registry[i]); + found=1; + free(registry_ptr); + break; + } + free(registry_ptr); + } + } + if (found==0) { + fprintf(ERRORFILE, "image: %s is not trusted.\n", image_name); + ret = INVALID_DOCKER_IMAGE_TRUST; + } + free(image_name); + + free_and_exit: + free(privileged_registry); + return ret; +} + + static int is_regex(const char *str) { + size_t len = strlen(str); + return !(len > 2 && str[0] == '^' && str[len-1] == '$'); + } + + static int validate_volume_name(const char *volume_name) { + const char *regex_str = "^[a-zA-Z0-9]([a-zA-Z0-9_.-]*)$"; + return execute_regex_match(regex_str, volume_name); + } + + static int validate_volume_name_with_argument(const char* requested, const char* pattern) { + return validate_volume_name(requested) && execute_regex_match(pattern, requested); + } + static int add_param_to_command_if_allowed(const struct configuration *command_config, const struct configuration *executor_cfg, const char *key, const char *allowed_key, const char *param, @@@ -142,15 -113,8 +156,16 @@@ } if (values != NULL) { + // Disable capabilities, devices if image is not trusted. + if (strcmp(key, "net") != 0) { + if (check_trusted_image(command_config, executor_cfg) != 0) { + fprintf(ERRORFILE, "Disable %s for untrusted image\n", key); + return INVALID_DOCKER_IMAGE_TRUST; + } + } + if (permitted_values != NULL) { + // Values are user requested. for (i = 0; values[i] != NULL; ++i) { memset(tmp_buffer, 0, tmp_buffer_size); permitted = 0; @@@ -1059,21 -977,8 +1088,22 @@@ static int add_mounts(const struct conf } if (values != NULL) { + // Disable mount volumes if image is not trusted. + if (check_trusted_image(command_config, conf) != 0) { + fprintf(ERRORFILE, "Disable mount volume for untrusted image\n"); + // YARN will implicitly bind node manager local directory to + // docker image. This can create file system security holes, + // if docker container has binary to escalate privileges. + // For untrusted image, we drop mounting without reporting + // INVALID_DOCKER_MOUNT messages to allow running untrusted + // image in a sandbox. + ret = 0; + goto free_and_exit; + } + - ret = normalize_mounts(permitted_ro_mounts); - ret |= normalize_mounts(permitted_rw_mounts); + ret = normalize_mounts(permitted_ro_mounts, 1); + ret |= normalize_mounts(permitted_rw_mounts, 1); ++ if (ret != 0) { fprintf(ERRORFILE, "Unable to find permitted docker mounts on disk\n"); ret = MOUNT_ACCESS_ERROR; diff --cc hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc index 7617d2c2fcf,4da45274bc5..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc @@@ -454,104 -454,8 +454,104 @@@ namespace ContainerExecutor ASSERT_EQ(0, strlen(buff)); } + TEST_F(TestDockerUtil, test_set_pid_namespace) { + struct configuration container_cfg, cmd_cfg; + const int buff_len = 1024; + char buff[buff_len]; + int ret = 0; + std::string container_executor_cfg_contents[] = {"[docker]\n docker.host-pid-namespace.enabled=1", + "[docker]\n docker.host-pid-namespace.enabled=true", + "[docker]\n docker.host-pid-namespace.enabled=True", + "[docker]\n docker.host-pid-namespace.enabled=0", + "[docker]\n docker.host-pid-namespace.enabled=false", + "[docker]\n"}; + std::vector > file_cmd_vec; + std::vector > bad_file_cmd_vec; + std::vector >::const_iterator itr; + std::vector >::const_iterator itr2; + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n pid=host", "--pid='host' ")); + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run", "")); + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n pid=other", + static_cast(INVALID_PID_NAMESPACE))); + + for (int i = 1; i < 3; ++i) { + write_container_executor_cfg(container_executor_cfg_contents[0]); + ret = read_config(container_executor_cfg_file.c_str(), &container_cfg); + + if (ret != 0) { + FAIL(); + } + for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) { + memset(buff, 0, buff_len); + write_command_file(itr->first); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(0, ret); + ASSERT_STREQ(itr->second.c_str(), buff); + } + for (itr2 = bad_file_cmd_vec.begin(); itr2 != bad_file_cmd_vec.end(); ++itr2) { + memset(buff, 0, buff_len); + write_command_file(itr2->first); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(itr2->second, ret); + ASSERT_EQ(0, strlen(buff)); + } + } + + // check default case and when it's turned off + for (int i = 3; i < 6; ++i) { + write_container_executor_cfg(container_executor_cfg_contents[i]); + ret = read_config(container_executor_cfg_file.c_str(), &container_cfg); + if (ret != 0) { + FAIL(); + } + file_cmd_vec.clear(); + file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run", "")); + for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) { + memset(buff, 0, buff_len); + write_command_file(itr->first); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(0, ret); + ASSERT_STREQ(itr->second.c_str(), buff); + } + bad_file_cmd_vec.clear(); + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n pid=other", + static_cast(INVALID_PID_NAMESPACE))); + bad_file_cmd_vec.push_back(std::make_pair( + "[docker-command-execution]\n docker-command=run\n pid=host", + static_cast(PID_HOST_DISABLED))); + for (itr2 = bad_file_cmd_vec.begin(); itr2 != bad_file_cmd_vec.end(); ++itr2) { + memset(buff, 0, buff_len); + write_command_file(itr2->first); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + ret = set_pid_namespace(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(itr2->second, ret); + ASSERT_EQ(0, strlen(buff)); + } + } + } + TEST_F(TestDockerUtil, test_check_mount_permitted) { - const char *permitted_mounts[] = {"/etc", "/usr/bin/cut", "/tmp/", NULL}; + const char *permitted_mounts[] = {"/etc", "/usr/bin/cut", "/tmp/", "^/usr/local/.*$", NULL}; std::vector > test_data; test_data.push_back(std::make_pair("/etc", 1)); test_data.push_back(std::make_pair("/etc/", 1)); @@@ -740,22 -619,28 +743,30 @@@ const int buff_len = 1024; char buff[buff_len]; int ret = 0; - std::string container_executor_cfg_contents = "[docker]\n docker.allowed.devices=/dev/test-device,/dev/device2,^/dev/nvidia.*$,^/dev/gpu-uvm.*$"; + std::string container_executor_cfg_contents = "[docker]\n" + " docker.privileged-containers.registries=hadoop\n" - " docker.allowed.devices=/dev/test-device,/dev/device2"; ++ " docker.allowed.devices=/dev/test-device,/dev/device2,^/dev/nvidia.*$,^/dev/gpu-uvm.*$"; std::vector > file_cmd_vec; file_cmd_vec.push_back(std::make_pair( - "[docker-command-execution]\n docker-command=run\n devices=/dev/test-device:/dev/test-device", + "[docker-command-execution]\n docker-command=run\n image=hadoop/image\n devices=/dev/test-device:/dev/test-device", "--device='/dev/test-device:/dev/test-device' ")); file_cmd_vec.push_back(std::make_pair( - "[docker-command-execution]\n docker-command=run\n devices=/dev/device2:/dev/device2", + "[docker-command-execution]\n docker-command=run\n image=hadoop/image\n devices=/dev/device2:/dev/device2", "--device='/dev/device2:/dev/device2' ")); file_cmd_vec.push_back(std::make_pair( - "[docker-command-execution]\n docker-command=run\n " + "[docker-command-execution]\n docker-command=run\n image=hadoop/image\n" - " devices=/dev/test-device:/dev/test-device,/dev/device2:/dev/device2", + "devices=/dev/test-device:/dev/test-device,/dev/device2:/dev/device2", "--device='/dev/test-device:/dev/test-device' --device='/dev/device2:/dev/device2' ")); file_cmd_vec.push_back(std::make_pair( - "[docker-command-execution]\n docker-command=run\n " ++ "[docker-command-execution]\n docker-command=run\n image=hadoop/image\n" + "devices=/dev/nvidiactl:/dev/nvidiactl", + "--device='/dev/nvidiactl:/dev/nvidiactl' ")); + file_cmd_vec.push_back(std::make_pair( - "[docker-command-execution]\n docker-command=run\n " ++ "[docker-command-execution]\n docker-command=run\n image=hadoop/image\n" + "devices=/dev/nvidia1:/dev/nvidia1,/dev/gpu-uvm-tools:/dev/gpu-uvm-tools", + "--device='/dev/nvidia1:/dev/nvidia1' --device='/dev/gpu-uvm-tools:/dev/gpu-uvm-tools' ")); + file_cmd_vec.push_back(std::make_pair( - "[docker-command-execution]\n docker-command=run\n", "")); + "[docker-command-execution]\n docker-command=run\n image=hadoop/image", "")); write_container_executor_cfg(container_executor_cfg_contents); ret = read_config(container_executor_cfg_file.c_str(), &container_cfg); @@@ -804,6 -669,36 +815,36 @@@ ASSERT_EQ(INVALID_DOCKER_DEVICE, ret); ASSERT_EQ(0, strlen(buff)); - write_command_file("[docker-command-execution]\n docker-command=run\n devices=/dev/testnvidia:/dev/testnvidia"); ++ write_command_file("[docker-command-execution]\n docker-command=run\n image=hadoop/image\n devices=/dev/testnvidia:/dev/testnvidia"); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + strcpy(buff, "test string"); + ret = set_devices(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(INVALID_DOCKER_DEVICE, ret); + ASSERT_EQ(0, strlen(buff)); + - write_command_file("[docker-command-execution]\n docker-command=run\n devices=/dev/gpu-nvidia-uvm:/dev/gpu-nvidia-uvm"); ++ write_command_file("[docker-command-execution]\n docker-command=run\n image=hadoop/image\n devices=/dev/gpu-nvidia-uvm:/dev/gpu-nvidia-uvm"); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + strcpy(buff, "test string"); + ret = set_devices(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(INVALID_DOCKER_DEVICE, ret); + ASSERT_EQ(0, strlen(buff)); + - write_command_file("[docker-command-execution]\n docker-command=run\n devices=/dev/device1"); ++ write_command_file("[docker-command-execution]\n docker-command=run\n image=hadoop/image\n devices=/dev/device1"); + ret = read_config(docker_command_file.c_str(), &cmd_cfg); + if (ret != 0) { + FAIL(); + } + strcpy(buff, "test string"); + ret = set_devices(&cmd_cfg, &container_cfg, buff, buff_len); + ASSERT_EQ(INVALID_DOCKER_DEVICE, ret); + ASSERT_EQ(0, strlen(buff)); + container_executor_cfg_contents = "[docker]\n"; write_container_executor_cfg(container_executor_cfg_contents); ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);