diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java index 36bd468..91419ad 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java @@ -136,26 +136,71 @@ private void initializeControllerPaths() throws ResourceHandlerException { Map ret = new HashMap<>(); for (CGroupController controller : CGroupController.values()) { - String name = controller.getName(); - String controllerPath = findControllerInMtab(name, parsedMtab); + String subsystemName = controller.getName(); + String controllerPath = findControllerInMtab(subsystemName, parsedMtab); if (controllerPath != null) { - File f = new File(controllerPath + "/" + cGroupPrefix); + // Check permissions to cgroup hierarchy and + // create YARN cgroup if it does not exist, yet + File rootHierarchy = new File(controllerPath); + File yarnHierarchy = new File(controllerPath, cGroupPrefix); + + if (!rootHierarchy.exists()) { + StringBuilder error = + getErrorWithDetails( + "Cgroups mount point does not exist or not accessible", + mtab, + subsystemName, + rootHierarchy.getAbsolutePath() + ); + LOG.error(error.toString()); + throw new ResourceHandlerException(error.toString()); + } + if (!yarnHierarchy.exists()) { + LOG.info("Yarn control group does not exist. Creating " + + yarnHierarchy.getAbsolutePath()); + try { + if (!yarnHierarchy.mkdir()) { + // Unexpected: we just checked that it was missing + StringBuilder error = + getErrorWithDetails( + "Unexpected: Cannot create yarn cgroup", + mtab, + subsystemName, + yarnHierarchy.getAbsolutePath() + ); + LOG.error(error.toString()); + throw new ResourceHandlerException(error.toString()); + } + } catch (SecurityException e) { + StringBuilder error = + getErrorWithDetails( + "No permissions to create yarn cgroup", + mtab, + subsystemName, + yarnHierarchy.getAbsolutePath() + ); + LOG.error(error.toString(), e); + throw new ResourceHandlerException(error.toString()); + } + } - if (FileUtil.canWrite(f)) { - ret.put(controller, controllerPath); + if (!FileUtil.canWrite(yarnHierarchy)) { + StringBuilder error = + getErrorWithDetails( + "Yarn control group not writable", + mtab, + subsystemName, + yarnHierarchy.getAbsolutePath() + ); + LOG.error(error.toString()); + throw new ResourceHandlerException(error.toString()); } else { - String error = - new StringBuffer("Mount point Based on mtab file: ") - .append(mtab) - .append(". Controller mount point not writable for: ") - .append(name).toString(); - - LOG.error(error); - throw new ResourceHandlerException(error); + ret.put(controller, controllerPath); } } else { - LOG.warn("Controller not mounted but automount disabled: " + name); + LOG.warn("Controller not mounted but automount disabled: " + + subsystemName); } } return ret; @@ -166,6 +211,31 @@ private void initializeControllerPaths() throws ResourceHandlerException { } } + /** + * Creates an actionable error message for mtab parsing. + * @param errorMessage message to use + * @param mtab mtab file path + * @param subsystemName cgroup subsystem + * @param yarnCgroupPath cgroup path that failed + * @return a string builder that can be appended by the caller + */ + static private StringBuilder getErrorWithDetails( + String errorMessage, + String mtab, + String subsystemName, + String yarnCgroupPath) { + return new StringBuilder() + .append(errorMessage) + .append(" Mount point based on mtab file: ") + .append(mtab) + .append(" Subsystem:") + .append(subsystemName) + .append(" User:") + .append(System.getProperty("user.name")) + .append(" Path: ") + .append(yarnCgroupPath); + } + /* We are looking for entries of the form: * none /cgroup/path/mem cgroup rw,memory 0 0 * @@ -214,11 +284,21 @@ private void initializeControllerPaths() throws ResourceHandlerException { return ret; } + /** + * Find the hierarchy of the subsystem. + * The kernel ensures that a subsystem can only be part of a single hierarchy. + * The subsystem can be part of multiple mount points, if they belong to the + * same hierarchy. + * @param controller subsystem like cpu, cpuset, etc... + * @param entries map of paths to mount options + * @return the first mount path that has the requested subsystem + */ private static String findControllerInMtab(String controller, Map> entries) { for (Map.Entry> e : entries.entrySet()) { - if (e.getValue().contains(controller)) + if (e.getValue().contains(controller)) { return e.getKey(); + } } return null; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java index 76d56b4..dca408c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java @@ -270,9 +270,13 @@ public static File createMockMTab(File parentDir) throws IOException { return mockMtab; } - + /** + * Tests whether mtab parsing works as expected with a valid hierarchy set. + * @throws Exception the test will fail + */ @Test public void testMtabParsing() throws Exception { + // Initialize mtab and cgroup dir File parentDir = new File(tmpPath); // create mock cgroup File cpuCgroupMountDir = createMockCgroupMount(parentDir, "cpu", @@ -282,9 +286,73 @@ public void testMtabParsing() throws Exception { "blkio", hierarchy); Assert.assertTrue(blkioCgroupMountDir.exists()); File mockMtabFile = createMockMTab(parentDir); + + // Run mtabs parsing Map controllerPaths = CGroupsHandlerImpl.initializeControllerPathsFromMtab( + mockMtabFile.getAbsolutePath(), hierarchy); + + // Verify + Assert.assertEquals(2, controllerPaths.size()); + Assert.assertTrue(controllerPaths + .containsKey(CGroupsHandler.CGroupController.CPU)); + Assert.assertTrue(controllerPaths + .containsKey(CGroupsHandler.CGroupController.BLKIO)); + String cpuDir = controllerPaths.get(CGroupsHandler.CGroupController.CPU); + String blkioDir = + controllerPaths.get(CGroupsHandler.CGroupController.BLKIO); + Assert.assertEquals(parentDir.getAbsolutePath() + "/cpu", cpuDir); + Assert.assertEquals(parentDir.getAbsolutePath() + "/blkio", blkioDir); + + // Test that a missing yarn hierarchy will be created automatically + Assert.assertTrue(cpuCgroupMountDir.delete()); + try { + Map ret = + CGroupsHandlerImpl.initializeControllerPathsFromMtab( + mockMtabFile.getAbsolutePath(), hierarchy); + File dirCreated = new File(ret.get(CGroupsHandler + .CGroupController.CPU), hierarchy); + Assert.assertTrue(dirCreated.exists() && dirCreated.canWrite()); + } catch (Exception e) { + Assert.fail("An empty path should not have been created"); + } + + // Test that an inaccessible yarn hierarchy results in an exception + Assert.assertTrue(cpuCgroupMountDir.setWritable(false)); + try { + CGroupsHandlerImpl.initializeControllerPathsFromMtab( mockMtabFile.getAbsolutePath(), hierarchy); + Assert.fail("An inaccessible path should result in an exception"); + } catch (Exception e) { + Assert.assertTrue("Unexpected exception " + e.getClass().toString(), + e instanceof ResourceHandlerException); + } finally { + Assert.assertTrue(cpuCgroupMountDir.setWritable(true)); + } + } + + /** + * Tests whether mtab parsing works as expected with the specified hierarchy. + * @throws Exception the test will fail + */ + private void testMtabParsingWithPrefix(String myHierarchy) throws Exception { + // Initialize mtab and cgroup dir + File parentDir = new File(tmpPath); + // create mock cgroup + File cpuCgroupMountDir = createMockCgroupMount(parentDir, "cpu", + myHierarchy); + Assert.assertTrue(cpuCgroupMountDir.exists()); + File blkioCgroupMountDir = createMockCgroupMount(parentDir, + "blkio", myHierarchy); + Assert.assertTrue(blkioCgroupMountDir.exists()); + File mockMtabFile = createMockMTab(parentDir); + + // Run mtabs parsing Test that an empty prefix is handled correctly + Map controllerPaths = + CGroupsHandlerImpl.initializeControllerPathsFromMtab( + mockMtabFile.getAbsolutePath(), myHierarchy); + + // Verify Assert.assertEquals(2, controllerPaths.size()); Assert.assertTrue(controllerPaths .containsKey(CGroupsHandler.CGroupController.CPU)); @@ -297,6 +365,16 @@ public void testMtabParsing() throws Exception { Assert.assertEquals(parentDir.getAbsolutePath() + "/blkio", blkioDir); } + /** + * Tests whether mtab parsing works as expected with an empty hierarchy set. + * @throws Exception the test will fail + */ + @Test + public void testMtabParsingNoPrefix() throws Exception { + testMtabParsingWithPrefix(""); + testMtabParsingWithPrefix("/"); + } + @After public void teardown() { FileUtil.fullyDelete(new File(tmpPath)); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md index 79a428d..58d52c5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md @@ -31,7 +31,7 @@ The following settings are related to setting up CGroups. These need to be set i |:---- |:---- | | `yarn.nodemanager.container-executor.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor". CGroups is a Linux kernel feature and is exposed via the LinuxContainerExecutor. | | `yarn.nodemanager.linux-container-executor.resources-handler.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler". Using the LinuxContainerExecutor doesn't force you to use CGroups. If you wish to use CGroups, the resource-handler-class must be set to CGroupsLCEResourceHandler. | -| `yarn.nodemanager.linux-container-executor.cgroups.hierarchy` | The cgroups hierarchy under which to place YARN proccesses(cannot contain commas). If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have been pre-configured), then this cgroups hierarchy must already exist | +| `yarn.nodemanager.linux-container-executor.cgroups.hierarchy` | The cgroups hierarchy under which to place YARN proccesses(cannot contain commas). | | `yarn.nodemanager.linux-container-executor.cgroups.mount` | Whether the LCE should attempt to mount cgroups if not found - can be true or false. | | `yarn.nodemanager.linux-container-executor.cgroups.mount-path` | Where the LCE should attempt to mount cgroups if not found. Common locations include /sys/fs/cgroup and /cgroup; the default location can vary depending on the Linux distribution in use. This path must exist before the NodeManager is launched. Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and yarn.nodemanager.linux-container-executor.cgroups.mount is true. A point to note here is that the container-executor binary will try to mount the path specified + "/" + the subsystem. In our case, since we are trying to limit CPU the binary tries to mount the path specified + "/cpu" and that's the path it expects to exist. | | `yarn.nodemanager.linux-container-executor.group` | The Unix group of the NodeManager. It should match the setting in "container-executor.cfg". This configuration is required for validating the secure access of the container-executor binary. |