Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java (revision 1598fd3b7948b3592775e3be3227c4a336122bc9) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java (revision 0aa95603aefc9ff5f704b0c22f53b7f09f70bbee) @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; @@ -110,6 +111,58 @@ private ResourceHandler resourceHandlerChain; private LinuxContainerRuntime linuxContainerRuntime; + /** + * The container exit code. + */ + public enum ExitCode { + SUCCESS(0), + INVALID_ARGUMENT_NUMBER(1), + INVALID_COMMAND_PROVIDED(3), + INVALID_NM_ROOT_DIRS(5), + SETUID_OPER_FAILED(6), + UNABLE_TO_EXECUTE_CONTAINER_SCRIPT(7), + UNABLE_TO_SIGNAL_CONTAINER(8), + INVALID_CONTAINER_PID(9), + OUT_OF_MEMORY(18), + INITIALIZE_USER_FAILED(20), + PATH_TO_DELETE_IS_NULL(21), + INVALID_CONTAINER_EXEC_PERMISSIONS(22), + INVALID_CONFIG_FILE(24), + SETSID_OPER_FAILED(25), + WRITE_PIDFILE_FAILED(26), + WRITE_CGROUP_FAILED(27), + TRAFFIC_CONTROL_EXECUTION_FAILED(28), + DOCKER_RUN_FAILED(29), + ERROR_OPENING_DOCKER_FILE(30), + ERROR_READING_DOCKER_FILE(31), + FEATURE_DISABLED(32), + COULD_NOT_CREATE_SCRIPT_COPY(33), + COULD_NOT_CREATE_CREDENTIALS_FILE(34), + COULD_NOT_CREATE_WORK_DIRECTORIES(35), + COULD_NOT_CREATE_APP_LOG_DIRECTORIES(36), + COULD_NOT_CREATE_TMP_DIRECTORIES(37), + ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS(38); + + private final int code; + + ExitCode(int exitCode) { + this.code = exitCode; + } + + /** + * Get the exit code as an int. + * @return the exit code as an int + */ + public int getExitCode() { + return code; + } + + @Override + public String toString() { + return String.valueOf(code); + } + } + /** * Default constructor to allow for creation through reflection. */ @@ -386,7 +439,8 @@ } @Override - public int launchContainer(ContainerStartContext ctx) throws IOException { + public int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { Container container = ctx.getContainer(); Path nmPrivateContainerScriptPath = ctx.getNmPrivateContainerScriptPath(); Path nmPrivateTokensPath = ctx.getNmPrivateTokensPath(); @@ -496,7 +550,7 @@ } else { LOG.info( "Container was marked as inactive. Returning terminated error"); - return ExitCode.TERMINATED.getExitCode(); + return ContainerExecutor.ExitCode.TERMINATED.getExitCode(); } } catch (ContainerExecutionException e) { int exitCode = e.getExitCode(); @@ -504,8 +558,8 @@ // 143 (SIGTERM) and 137 (SIGKILL) exit codes means the container was // terminated/killed forcefully. In all other cases, log the // output - if (exitCode != ExitCode.FORCE_KILLED.getExitCode() - && exitCode != ExitCode.TERMINATED.getExitCode()) { + if (exitCode != ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode() + && exitCode != ContainerExecutor.ExitCode.TERMINATED.getExitCode()) { LOG.warn("Exception from container-launch with container ID: " + containerId + " and exit code: " + exitCode, e); @@ -525,6 +579,23 @@ logOutput(diagnostics); container.handle(new ContainerDiagnosticsUpdateEvent(containerId, diagnostics)); + if (exitCode == + ExitCode.INVALID_CONTAINER_EXEC_PERMISSIONS.getExitCode() || + exitCode == + ExitCode.INVALID_CONFIG_FILE.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_SCRIPT_COPY.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_CREDENTIALS_FILE.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_WORK_DIRECTORIES.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_APP_LOG_DIRECTORIES.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_TMP_DIRECTORIES.getExitCode()) { + throw new ConfigurationException( + "Linux Container Executor reached unrecoverable exception", e); + } } else { container.handle(new ContainerDiagnosticsUpdateEvent(containerId, "Container killed on request. Exit code is " + exitCode)); Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c (revision 1598fd3b7948b3592775e3be3227c4a336122bc9) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) @@ -649,19 +649,21 @@ const char *container_id, char* const* local_dir, char* const* log_dir, const char *work_dir) { // create dirs as 0750 const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP; - if (app_id == NULL || container_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { + if (user == NULL || app_id == NULL || container_id == NULL || + local_dir == NULL || log_dir == NULL || work_dir == NULL || + user_detail == NULL || user_detail->pw_name == NULL) { fprintf(LOGFILE, "Either app_id, container_id or the user passed is null.\n"); - return -1; + return ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS; } - int result = -1; + int result = COULD_NOT_CREATE_WORK_DIRECTORIES; char* const* local_dir_ptr; for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) { char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, container_id); if (container_dir == NULL) { - return -1; + return OUT_OF_MEMORY; } if (mkdirs(container_dir, perms) == 0) { result = 0; @@ -674,12 +676,12 @@ return result; } - result = -1; + result = COULD_NOT_CREATE_APP_LOG_DIRECTORIES; // also make the directory for the container logs char *combined_name = malloc(strlen(app_id) + strlen(container_id) + 2); if (combined_name == NULL) { fprintf(LOGFILE, "Malloc of combined name failed\n"); - result = -1; + result = OUT_OF_MEMORY; } else { sprintf(combined_name, "%s/%s", app_id, container_id); @@ -688,7 +690,7 @@ char *container_log_dir = get_app_log_directory(*log_dir_ptr, combined_name); if (container_log_dir == NULL) { free(combined_name); - return -1; + return OUT_OF_MEMORY; } else if (mkdirs(container_log_dir, perms) != 0) { free(container_log_dir); } else { @@ -703,12 +705,12 @@ return result; } - result = -1; + result = COULD_NOT_CREATE_TMP_DIRECTORIES; // also make the tmp directory char *tmp_dir = get_tmp_directory(work_dir); if (tmp_dir == NULL) { - return -1; + return OUT_OF_MEMORY; } if (mkdirs(tmp_dir, perms) == 0) { result = 0; @@ -1149,12 +1151,12 @@ fprintf(ERRORFILE, "Cannot open file %s - %s", command_file, strerror(errno)); fflush(ERRORFILE); - exit(ERROR_OPENING_FILE); + exit(ERROR_OPENING_DOCKER_FILE); } if ((read = getline(&line, &len, stream)) == -1) { fprintf(ERRORFILE, "Error reading command_file %s\n", command_file); fflush(ERRORFILE); - exit(ERROR_READING_FILE); + exit(ERROR_READING_DOCKER_FILE); } fclose(stream); @@ -1267,25 +1269,27 @@ // Create container specific directories as user. If there are no resources // to localize for this container, app-directories and log-directories are // also created automatically as part of this call. - if (create_container_directories(user, app_id, container_id, local_dirs, - log_dirs, work_dir) != 0) { + int directory_create_result = create_container_directories(user, app_id, + container_id, local_dirs, log_dirs, work_dir); + if (directory_create_result != 0) { fprintf(ERRORFILE, "Could not create container dirs"); fflush(ERRORFILE); + exit_code = directory_create_result; goto cleanup; } - // 700 + // Copy script file with permissions 700 if (copy_file(container_file_source, script_name, script_file_dest,S_IRWXU) != 0) { fprintf(ERRORFILE, "Could not create copy file %d %s\n", container_file_source, script_file_dest); fflush(ERRORFILE); - exit_code = INVALID_COMMAND_PROVIDED; + exit_code = COULD_NOT_CREATE_SCRIPT_COPY; goto cleanup; } - // 600 + // Copy credential file to permissions 600 if (copy_file(cred_file_source, cred_file, cred_file_dest, S_IRUSR | S_IWUSR) != 0) { - exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT; + exit_code = COULD_NOT_CREATE_CREDENTIALS_FILE; fprintf(ERRORFILE, "Could not copy file"); fflush(ERRORFILE); goto cleanup; @@ -1870,7 +1874,7 @@ /* Return an error if the path is null. */ if (full_path == NULL) { fprintf(LOGFILE, "Path is null\n"); - return UNABLE_TO_BUILD_PATH; + return PATH_TO_DELETE_IS_NULL; } ret = recursive_unlink_children(full_path); if (ret == ENOENT) { Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h (revision 1598fd3b7948b3592775e3be3227c4a336122bc9) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) @@ -37,8 +37,8 @@ enum errorcodes { INVALID_ARGUMENT_NUMBER = 1, - INVALID_USER_NAME, //2 - INVALID_COMMAND_PROVIDED, //3 + //INVALID_USER_NAME 2 + INVALID_COMMAND_PROVIDED = 3, // SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS (NOT USED) 4 INVALID_NM_ROOT_DIRS = 5, SETUID_OPER_FAILED, //6 @@ -56,18 +56,24 @@ OUT_OF_MEMORY = 18, // INITIALIZE_DISTCACHEFILE_FAILED (NOT USED) 19 INITIALIZE_USER_FAILED = 20, - UNABLE_TO_BUILD_PATH, //21 + PATH_TO_DELETE_IS_NULL, //21 INVALID_CONTAINER_EXEC_PERMISSIONS, //22 // PREPARE_JOB_LOGS_FAILED (NOT USED) 23 - INVALID_CONFIG_FILE = 24, + INVALID_CONFIG_FILE = 24, SETSID_OPER_FAILED = 25, WRITE_PIDFILE_FAILED = 26, WRITE_CGROUP_FAILED = 27, TRAFFIC_CONTROL_EXECUTION_FAILED = 28, - DOCKER_RUN_FAILED=29, - ERROR_OPENING_FILE = 30, - ERROR_READING_FILE = 31, - FEATURE_DISABLED = 32 + DOCKER_RUN_FAILED = 29, + ERROR_OPENING_DOCKER_FILE = 30, + ERROR_READING_DOCKER_FILE = 31, + FEATURE_DISABLED = 32, + COULD_NOT_CREATE_SCRIPT_COPY = 33, + COULD_NOT_CREATE_CREDENTIALS_FILE = 34, + COULD_NOT_CREATE_WORK_DIRECTORIES = 35, + COULD_NOT_CREATE_APP_LOG_DIRECTORIES = 36, + COULD_NOT_CREATE_TMP_DIRECTORIES = 37, + ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS = 38, }; enum operations { Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java (revision 1598fd3b7948b3592775e3be3227c4a336122bc9) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java (revision 6f7872e99f5be813c74493dd204e14355049659d) @@ -20,6 +20,7 @@ import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Mockito.doAnswer; @@ -49,6 +50,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; @@ -78,6 +80,8 @@ "./src/test/resources/mock-container-executor"; private static final String MOCK_EXECUTOR_WITH_ERROR = "./src/test/resources/mock-container-executer-with-error"; + private static final String MOCK_EXECUTOR_WITH_CONFIG_ERROR = + "./src/test/resources/mock-container-executer-with-configuration-error"; private String tmpMockExecutor; private LinuxContainerExecutor mockExec = null; @@ -147,7 +151,8 @@ } @Test - public void testContainerLaunch() throws IOException { + public void testContainerLaunch() + throws IOException, ConfigurationException { String appSubmitter = "nobody"; String cmd = String.valueOf( PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.getValue()); @@ -198,7 +203,8 @@ } @Test (timeout = 5000) - public void testContainerLaunchWithPriority() throws IOException { + public void testContainerLaunchWithPriority() + throws IOException, ConfigurationException { // set the scheduler priority to make sure still works with nice -n prio Configuration conf = new Configuration(); @@ -272,102 +278,121 @@ public void testContainerLaunchError() throws IOException, ContainerExecutionException { - // reinitialize executer - Configuration conf = new Configuration(); - setupMockExecutor(MOCK_EXECUTOR_WITH_ERROR, conf); - conf.set(YarnConfiguration.NM_LOCAL_DIRS, "file:///bin/echo"); - conf.set(YarnConfiguration.NM_LOG_DIRS, "file:///dev/null"); + final String[] expecetedMessage = {"badcommand", "Exit code: 24"}; + final String[] executor = { + MOCK_EXECUTOR_WITH_ERROR, + MOCK_EXECUTOR_WITH_CONFIG_ERROR + }; + + for (int i = 0; i < expecetedMessage.length; ++i) { + final int j = i; + // reinitialize executer + Configuration conf = new Configuration(); + setupMockExecutor(executor[j], conf); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, "file:///bin/echo"); + conf.set(YarnConfiguration.NM_LOG_DIRS, "file:///dev/null"); - LinuxContainerExecutor exec; - LinuxContainerRuntime linuxContainerRuntime = new - DefaultLinuxContainerRuntime(PrivilegedOperationExecutor.getInstance - (conf)); + LinuxContainerExecutor exec; + LinuxContainerRuntime linuxContainerRuntime = new + DefaultLinuxContainerRuntime(PrivilegedOperationExecutor.getInstance( + conf)); - linuxContainerRuntime.initialize(conf); - exec = new LinuxContainerExecutor(linuxContainerRuntime); + linuxContainerRuntime.initialize(conf); + exec = new LinuxContainerExecutor(linuxContainerRuntime); - mockExec = spy(exec); - doAnswer( - new Answer() { - @Override - public Object answer(InvocationOnMock invocationOnMock) - throws Throwable { - String diagnostics = (String) invocationOnMock.getArguments()[0]; - assertTrue("Invalid Diagnostics message: " + diagnostics, - diagnostics.contains("badcommand")); - return null; - } - } - ).when(mockExec).logOutput(any(String.class)); - dirsHandler = new LocalDirsHandlerService(); - dirsHandler.init(conf); - mockExec.setConf(conf); + mockExec = spy(exec); + doAnswer( + new Answer() { + @Override + public Object answer(InvocationOnMock invocationOnMock) + throws Throwable { + String diagnostics = (String) invocationOnMock.getArguments()[0]; + assertTrue("Invalid Diagnostics message: " + diagnostics, + diagnostics.contains(expecetedMessage[j])); + return null; + } + } + ).when(mockExec).logOutput(any(String.class)); + dirsHandler = new LocalDirsHandlerService(); + dirsHandler.init(conf); + mockExec.setConf(conf); - String appSubmitter = "nobody"; - String cmd = String - .valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.getValue()); - String appId = "APP_ID"; - String containerId = "CONTAINER_ID"; - Container container = mock(Container.class); - ContainerId cId = mock(ContainerId.class); - ContainerLaunchContext context = mock(ContainerLaunchContext.class); - HashMap env = new HashMap(); + String appSubmitter = "nobody"; + String cmd = String + .valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER. + getValue()); + String appId = "APP_ID"; + String containerId = "CONTAINER_ID"; + Container container = mock(Container.class); + ContainerId cId = mock(ContainerId.class); + ContainerLaunchContext context = mock(ContainerLaunchContext.class); + HashMap env = new HashMap(); - when(container.getContainerId()).thenReturn(cId); - when(container.getLaunchContext()).thenReturn(context); - doAnswer( - new Answer() { - @Override - public Object answer(InvocationOnMock invocationOnMock) - throws Throwable { - ContainerDiagnosticsUpdateEvent event = - (ContainerDiagnosticsUpdateEvent) invocationOnMock - .getArguments()[0]; - assertTrue("Invalid Diagnostics message: " + - event.getDiagnosticsUpdate(), - event.getDiagnosticsUpdate().contains("badcommand")); - return null; - } - } - ).when(container).handle(any(ContainerDiagnosticsUpdateEvent.class)); - - when(cId.toString()).thenReturn(containerId); + when(container.getContainerId()).thenReturn(cId); + when(container.getLaunchContext()).thenReturn(context); + doAnswer( + new Answer() { + @Override + public Object answer(InvocationOnMock invocationOnMock) + throws Throwable { + ContainerDiagnosticsUpdateEvent event = + (ContainerDiagnosticsUpdateEvent) invocationOnMock + .getArguments()[0]; + assertTrue("Invalid Diagnostics message: " + + event.getDiagnosticsUpdate(), + event.getDiagnosticsUpdate().contains(expecetedMessage[j])); + return null; + } + } + ).when(container).handle(any(ContainerDiagnosticsUpdateEvent.class)); + + when(cId.toString()).thenReturn(containerId); - when(context.getEnvironment()).thenReturn(env); + when(context.getEnvironment()).thenReturn(env); - Path scriptPath = new Path("file:///bin/echo"); - Path tokensPath = new Path("file:///dev/null"); - Path workDir = new Path("/tmp"); - Path pidFile = new Path(workDir, "pid.txt"); + Path scriptPath = new Path("file:///bin/echo"); + Path tokensPath = new Path("file:///dev/null"); + Path workDir = new Path("/tmp"); + Path pidFile = new Path(workDir, "pid.txt"); - mockExec.activateContainer(cId, pidFile); + mockExec.activateContainer(cId, pidFile); - int ret = mockExec.launchContainer(new ContainerStartContext.Builder() - .setContainer(container) - .setNmPrivateContainerScriptPath(scriptPath) - .setNmPrivateTokensPath(tokensPath) - .setUser(appSubmitter) - .setAppId(appId) - .setContainerWorkDir(workDir) - .setLocalDirs(dirsHandler.getLocalDirs()) - .setLogDirs(dirsHandler.getLogDirs()) - .setFilecacheDirs(new ArrayList<>()) - .setUserLocalDirs(new ArrayList<>()) - .setContainerLocalDirs(new ArrayList<>()) - .setContainerLogDirs(new ArrayList<>()) - .build()); + try { + int ret = mockExec.launchContainer(new ContainerStartContext.Builder() + .setContainer(container) + .setNmPrivateContainerScriptPath(scriptPath) + .setNmPrivateTokensPath(tokensPath) + .setUser(appSubmitter) + .setAppId(appId) + .setContainerWorkDir(workDir) + .setLocalDirs(dirsHandler.getLocalDirs()) + .setLogDirs(dirsHandler.getLogDirs()) + .setFilecacheDirs(new ArrayList<>()) + .setUserLocalDirs(new ArrayList<>()) + .setContainerLocalDirs(new ArrayList<>()) + .setContainerLogDirs(new ArrayList<>()) + .build()); - Assert.assertNotSame(0, ret); - assertEquals(Arrays.asList(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, - appSubmitter, cmd, appId, containerId, - workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), - StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, - dirsHandler.getLocalDirs()), - StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, - dirsHandler.getLogDirs()), - "cgroups=none"), readMockParams()); + Assert.assertNotSame(0, ret); + assertEquals(Arrays.asList(YarnConfiguration. + DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + appSubmitter, cmd, appId, containerId, + workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), + StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, + dirsHandler.getLocalDirs()), + StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, + dirsHandler.getLogDirs()), + "cgroups=none"), readMockParams()); + assertNotEquals("Expected YarnRuntimeException", + MOCK_EXECUTOR_WITH_CONFIG_ERROR, executor[i]); + } catch (ConfigurationException ex) { + assertEquals(MOCK_EXECUTOR_WITH_CONFIG_ERROR, executor[i]); + Assert.assertEquals("Linux Container Executor reached unrecoverable " + + "exception", ex.getMessage()); + } + } } @Test Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) @@ -0,0 +1,20 @@ +#!/bin/sh +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +for PARAM in "$@" +do + echo $PARAM; +done > params.txt + +exit 24 + Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java (revision 8eb0fc101e35febfd7893b6c0c853bb3974d2456) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java (revision 8eb0fc101e35febfd7893b6c0c853bb3974d2456) @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; + +/** + * This exception is thrown on unrecoverable configuration errors. + * An example is container launch error due to configuration. + */ +@Public +@Evolving +public class ConfigurationException extends YarnException { + + private static final long serialVersionUID = 0x9801a7a0f8e3L; + + public ConfigurationException(Throwable cause) { + super(cause); + } + + public ConfigurationException(String message) { + super(message); + } + + public ConfigurationException(String message, Throwable cause) { + super(message, cause); + } +} Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java (revision 0aa95603aefc9ff5f704b0c22f53b7f09f70bbee) @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; @@ -164,9 +165,10 @@ * @param ctx Encapsulates information necessary for launching containers. * @return the return status of the launch * @throws IOException if the container launch fails + * @throws ConfigurationException if config error was found */ public abstract int launchContainer(ContainerStartContext ctx) throws - IOException; + IOException, ConfigurationException; /** * Signal container with the specified signal. Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java (revision 6f7872e99f5be813c74493dd204e14355049659d) @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; @@ -209,7 +210,8 @@ } @Override - public int launchContainer(ContainerStartContext ctx) throws IOException { + public int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { Container container = ctx.getContainer(); Path nmPrivateContainerScriptPath = ctx.getNmPrivateContainerScriptPath(); Path nmPrivateTokensPath = ctx.getNmPrivateTokensPath(); @@ -291,8 +293,7 @@ if (isContainerActive(containerId)) { shExec.execute(); - } - else { + } else { LOG.info("Container " + containerIdStr + " was marked as inactive. Returning terminated error"); return ExitCode.TERMINATED.getExitCode(); Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java (revision 03f4cd8a1391360ea3d7790b1044421eb05d6d2d) @@ -53,4 +53,10 @@ * Clear the list of recently completed containers */ public void clearFinishedContainersFromCache(); + + /** + * Report an unrecoverable exception. + * @param ex exception that makes the node unhealthy + */ + void reportException(Exception ex); } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (revision 03f4cd8a1391360ea3d7790b1044421eb05d6d2d) @@ -990,6 +990,12 @@ return false; } + @Override + public void reportException(Exception ex) { + healthChecker.reportException(ex); + sendOutofBandHeartBeat(); + } + private List getLogAggregationReportsForApps( ConcurrentLinkedQueue lastestLogAggregationStatus) { LogAggregationReport status; Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java (revision c3e97212662de4b9242fd179409a8ea93a030bde) @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; @@ -200,23 +201,23 @@ FileContext lfs = FileContext.getLocalFSFileContext(); Path nmPrivateContainerScriptPath = dirsHandler.getLocalPathForWrite( - getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR - + CONTAINER_SCRIPT); + getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR + + CONTAINER_SCRIPT); Path nmPrivateTokensPath = dirsHandler.getLocalPathForWrite( - getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR - + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, - containerIdStr)); + getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR + + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, + containerIdStr)); Path nmPrivateClasspathJarDir = dirsHandler.getLocalPathForWrite( - getContainerPrivateDir(appIdStr, containerIdStr)); + getContainerPrivateDir(appIdStr, containerIdStr)); DataOutputStream containerScriptOutStream = null; DataOutputStream tokensOutStream = null; // Select the working directory for the container Path containerWorkDir = dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE - + Path.SEPARATOR + user + Path.SEPARATOR - + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr - + Path.SEPARATOR + containerIdStr, + + Path.SEPARATOR + user + Path.SEPARATOR + + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr + + Path.SEPARATOR + containerIdStr, LocalDirAllocator.SIZE_UNKNOWN, false); recordContainerWorkDir(containerID, containerWorkDir.toString()); @@ -246,12 +247,12 @@ appDirs.add(new Path(appsdir, appIdStr)); } containerScriptOutStream = - lfs.create(nmPrivateContainerScriptPath, - EnumSet.of(CREATE, OVERWRITE)); + lfs.create(nmPrivateContainerScriptPath, + EnumSet.of(CREATE, OVERWRITE)); // Set the token location too. environment.put( - ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, + ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE).toUri().getPath()); // Sanitize the container's environment @@ -266,7 +267,7 @@ .setCommands(launchContext.getCommands()).build()); // Write out the environment exec.writeLaunchEnv(containerScriptOutStream, environment, - localResources, launchContext.getCommands(), + localResources, launchContext.getCommands(), new Path(containerLogDirs.get(0)), user); // /////////// End of writing out container-script @@ -294,6 +295,14 @@ .setUserLocalDirs(userLocalDirs) .setContainerLocalDirs(containerLocalDirs) .setContainerLogDirs(containerLogDirs).build()); + } catch (ConfigurationException e) { + LOG.error("Failed to launch container due to configuration error.", e); + dispatcher.getEventHandler().handle(new ContainerExitEvent( + containerID, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, + e.getMessage())); + // Mark the node as unhealthy + context.getNodeStatusUpdater().reportException(e); + return ret; } catch (Throwable e) { LOG.warn("Failed to launch container.", e); dispatcher.getEventHandler().handle(new ContainerExitEvent( @@ -396,7 +405,8 @@ } @SuppressWarnings("unchecked") - protected int launchContainer(ContainerStartContext ctx) throws IOException { + protected int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { ContainerId containerId = container.getContainerId(); if (container.isMarkedForKilling()) { LOG.info("Container " + containerId + " not launched as it has already " Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java (revision c3e97212662de4b9242fd179409a8ea93a030bde) @@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; @@ -115,6 +116,14 @@ .setContainerLocalDirs(containerLocalDirs) .setContainerLogDirs(containerLogDirs) .build()); + } catch (ConfigurationException e) { + LOG.error("Failed to launch container due to configuration error.", e); + dispatcher.getEventHandler().handle(new ContainerExitEvent( + containerId, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, + e.getMessage())); + // Mark the node as unhealthy + getContext().getNodeStatusUpdater().reportException(e); + return ret; } catch (Throwable e) { LOG.warn("Failed to relaunch container.", e); dispatcher.getEventHandler().handle(new ContainerExitEvent( Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java (revision 6f7872e99f5be813c74493dd204e14355049659d) @@ -61,6 +61,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol; import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; @@ -226,7 +227,7 @@ @Test public void testContainerLaunchError() - throws IOException, InterruptedException { + throws IOException, InterruptedException, ConfigurationException { if (Shell.WINDOWS) { BASE_TMP_PATH = Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java (revision 6f7872e99f5be813c74493dd204e14355049659d) @@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; @@ -305,11 +306,13 @@ return cId; } - private int runAndBlock(String... cmd) throws IOException { + private int runAndBlock(String... cmd) + throws IOException, ConfigurationException { return runAndBlock(getNextContainerId(), cmd); } - private int runAndBlock(ContainerId cId, String... cmd) throws IOException { + private int runAndBlock(ContainerId cId, String... cmd) + throws IOException, ConfigurationException { String appId = "APP_" + getNextId(); Container container = mock(Container.class); ContainerLaunchContext context = mock(ContainerLaunchContext.class); @@ -448,7 +451,7 @@ public void run() { try { runAndBlock(sleepId, "sleep", "100"); - } catch (IOException e) { + } catch (IOException|ConfigurationException e) { LOG.warn("Caught exception while running sleep", e); } }; Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java (revision 0aa95603aefc9ff5f704b0c22f53b7f09f70bbee) @@ -20,11 +20,9 @@ import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; +import static org.junit.Assert.*; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.*; import java.io.BufferedReader; import java.io.File; @@ -47,6 +45,7 @@ import java.util.jar.JarFile; import java.util.jar.Manifest; +import com.google.common.collect.Lists; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -82,17 +81,17 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; -import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.ShellScriptBuilder; @@ -115,7 +114,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest { private static final String INVALID_JAVA_HOME = "/no/jvm/here"; - private Context distContext = + private NMContext distContext = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), new NMNullStateStoreService(), @@ -1480,4 +1479,54 @@ } } } + + /** + * Test container launch fault. + * @throws Exception + */ + @Test + public void testContainerLaunchOnConfigurationError() throws Exception { + Dispatcher dispatcher = mock(Dispatcher.class); + EventHandler handler = mock(EventHandler.class); + when(dispatcher.getEventHandler()).thenReturn(handler); + Application app = mock(Application.class); + ApplicationId appId = mock(ApplicationId.class); + when(appId.toString()).thenReturn("1"); + when(app.getAppId()).thenReturn(appId); + Container container = mock(Container.class); + ContainerId id = mock(ContainerId.class); + when(id.toString()).thenReturn("1"); + when(container.getContainerId()).thenReturn(id); + when(container.getUser()).thenReturn("user"); + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + when(clc.getCommands()).thenReturn(Lists.newArrayList()); + when(container.getLaunchContext()).thenReturn(clc); + Credentials credentials = mock(Credentials.class); + when(container.getCredentials()).thenReturn(credentials); + + // Configuration errors should result in node shutdown... + ContainerExecutor returnConfigError = mock(ContainerExecutor.class); + when(returnConfigError.launchContainer(any())). + thenThrow(new ConfigurationException("Mock configuration error")); + ContainerLaunch launchConfigError = new ContainerLaunch( + distContext, conf, dispatcher, + returnConfigError, app, container, dirsHandler, containerManager); + NodeStatusUpdater updater = mock(NodeStatusUpdater.class); + distContext.setNodeStatusUpdater(updater); + launchConfigError.call(); + verify(updater, atLeastOnce()).reportException(any()); + + // ... any other error should continue. + ContainerExecutor returnOtherError = mock(ContainerExecutor.class); + when(returnOtherError.launchContainer(any())). + thenThrow(new IOException("Mock configuration error")); + ContainerLaunch launchOtherError = new ContainerLaunch( + distContext, conf, dispatcher, + returnOtherError, app, container, dirsHandler, containerManager); + NodeStatusUpdater updaterNoCall = mock(NodeStatusUpdater.class); + distContext.setNodeStatusUpdater(updaterNoCall); + launchOtherError.call(); + verify(updaterNoCall, never()).reportException(any()); + + } } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java (revision 6f7872e99f5be813c74493dd204e14355049659d) @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -80,7 +81,7 @@ } @Override public int launchContainer(ContainerStartContext ctx) throws - IOException { + IOException, ConfigurationException { return 0; } @Override Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java (revision cb97a1911c0df3528c49aa0ba96e7bc6233d630a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java (revision 6f7872e99f5be813c74493dd204e14355049659d) @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; @@ -126,7 +127,8 @@ protected ContainerExecutor createContainerExecutor() { DefaultContainerExecutor exec = new DefaultContainerExecutor() { @Override - public int launchContainer(ContainerStartContext ctx) throws IOException { + public int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { if (delayContainers) { try { Thread.sleep(10000); Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java (revision 6f7872e99f5be813c74493dd204e14355049659d) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java (revision c3e97212662de4b9242fd179409a8ea93a030bde) @@ -18,10 +18,14 @@ package org.apache.hadoop.yarn.server.nodemanager; +import com.google.common.base.Joiner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.NodeHealthScriptRunner; +import java.util.Arrays; +import java.util.Collections; + /** * The class which provides functionality of checking the health of the node and * reporting back to the service for which the health checker has been asked to @@ -31,6 +35,8 @@ private NodeHealthScriptRunner nodeHealthScriptRunner; private LocalDirsHandlerService dirsHandler; + private Exception nodeHealthException; + private long nodeHealthExceptionReportTime; static final String SEPARATOR = ";"; @@ -39,6 +45,8 @@ super(NodeHealthCheckerService.class.getName()); nodeHealthScriptRunner = scriptRunner; dirsHandler = dirHandlerService; + nodeHealthException = null; + nodeHealthExceptionReportTime = 0; } @Override @@ -50,37 +58,48 @@ super.serviceInit(conf); } + private String nullIfEmpty(String in) { + if (in == null || in.equals("")) { + return null; + } else { + return in; + } + } + /** * @return the reporting string of health of the node */ String getHealthReport() { - String scriptReport = (nodeHealthScriptRunner == null) ? "" - : nodeHealthScriptRunner.getHealthReport(); - if (scriptReport.equals("")) { - return dirsHandler.getDisksHealthReport(false); - } else { - return scriptReport.concat(SEPARATOR + dirsHandler.getDisksHealthReport(false)); - } + String scriptReport = nullIfEmpty(nodeHealthScriptRunner == null ? null : + nodeHealthScriptRunner.getHealthReport()); + String discReport = + nullIfEmpty(dirsHandler.getDisksHealthReport(false)); + String exceptionReport = nullIfEmpty(nodeHealthException == null ? null : + nodeHealthException.getMessage()); + + return Joiner.on(SEPARATOR).skipNulls() + .join(scriptReport, discReport, exceptionReport); } /** * @return true if the node is healthy */ boolean isHealthy() { - boolean scriptHealthStatus = (nodeHealthScriptRunner == null) ? true - : nodeHealthScriptRunner.isHealthy(); - return scriptHealthStatus && dirsHandler.areDisksHealthy(); + boolean scriptHealthy = nodeHealthScriptRunner == null || + nodeHealthScriptRunner.isHealthy(); + return nodeHealthException == null && + scriptHealthy && dirsHandler.areDisksHealthy(); } /** * @return when the last time the node health status is reported */ long getLastHealthReportTime() { - long diskCheckTime = dirsHandler.getLastDisksCheckTime(); - long lastReportTime = (nodeHealthScriptRunner == null) - ? diskCheckTime - : Math.max(nodeHealthScriptRunner.getLastReportedTime(), diskCheckTime); - return lastReportTime; + return Collections.max(Arrays.asList( + dirsHandler.getLastDisksCheckTime(), + nodeHealthScriptRunner == null ? 0 : + nodeHealthScriptRunner.getLastReportedTime(), + nodeHealthExceptionReportTime)); } /** @@ -96,4 +115,13 @@ NodeHealthScriptRunner getNodeHealthScriptRunner() { return nodeHealthScriptRunner; } + + /** + * Report an exception to mark the node as unhealthy. + * @param ex the exception that makes the node unhealthy + */ + void reportException(Exception ex) { + nodeHealthException = ex; + nodeHealthExceptionReportTime = System.currentTimeMillis(); + } } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java (revision d012be42e39736989790bef8259c68bddb00231a) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java (revision 0aa95603aefc9ff5f704b0c22f53b7f09f70bbee) @@ -23,6 +23,8 @@ import java.io.IOException; import java.io.PrintWriter; +import com.google.common.base.Joiner; +import com.google.common.base.Strings; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -165,8 +167,11 @@ healthStatus.getIsNodeHealthy()); Assert.assertTrue("Node script time out message not propagated", healthStatus.getHealthReport().equals( - NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG - + NodeHealthCheckerService.SEPARATOR - + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false))); + Joiner.on(NodeHealthCheckerService.SEPARATOR).skipNulls().join( + NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG, + Strings.emptyToNull( + nodeHealthChecker.getDiskHandler() + .getDisksHealthReport(false)) + ))); } }