diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 9b7dda5..172566f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -743,6 +743,33 @@ */ public static final float DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION = 0.25F; + + /** + * The maximum percentage of disk space that can be used after which a + * disk is marked as offline. Values can range from 0.0 to 100.0. + * If the value is greater than or equal to 100, NM will check for full disk. + * This applies to nm-local-dirs and nm-log-dirs. + */ + public static final String NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC = + NM_PREFIX + "disk-health-checker.max-space-utilization-perc"; + /** + * By default, 100% of the disk can be used before it is marked as offline. + */ + public static final float DEFAULT_NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC + = 100.0F; + + /** + * The minimum space that must be available on a local dir + * for it to be used. + * This applies to nm-local-dirs and nm-log-dirs. + */ + public static final String NM_HEALTHY_DISKS_MIN_FREE_SPACE_MB = + NM_PREFIX + "disk-health-checker.min-free-space-mb"; + /** + * By default, all of the disk can be used before it is marked as offline. + */ + public static final long DEFAULT_NM_HEALTHY_DISKS_MIN_FREE_SPACE_MB + = 0; /** Frequency of running node health script.*/ public static final String NM_HEALTH_CHECK_INTERVAL_MS = diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java index 10362d2..4940520 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java @@ -22,6 +22,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; @@ -43,10 +44,72 @@ private List localDirs; private List failedDirs; private int numFailures; - + + private float diskUtilizationPercCutoff; + private long diskUtilizationSpaceCutoff; + + /** + * Create collection for the directories specified. No check for free space. + * @param dirs directories to be monitored + */ public DirectoryCollection(String[] dirs) { + this(dirs, 100.0F, 0); + } + + /** + * Create collection for the directories specified. + * Users must specify the maximum percentage of disk utilization allowed. + * Minimum amount of disk space is not checked. + * @param dirs directories to be monitored + * @param utilizationPercCutOff percentage of disk that + * can be used before the dir is taken out of the good dirs list + * + */ + public DirectoryCollection(String[] dirs, + float utilizationPercCutOff) { + this(dirs, utilizationPercCutOff, 0); + } + + /** + * Create collection for the directories specified. + * Users must specify the minimum amount of free space that must be available + * for the dir to be used. + * @param dirs directories to be monitored + * @param utilizationSpaceCutOff minimum space, in MB, that must be + * available on the disk for the dir to be marked as good + * + */ + public DirectoryCollection(String[] dirs, + long utilizationSpaceCutOff) { + this(dirs, 100.0F, utilizationSpaceCutOff); + } + + /** + * Create collection for the directories specified. + * Users must specify the maximum percentage of disk utilization allowed + * and the minimum amount of free space that must be available + * for the dir to be used. If either check fails the dir is removed + * from the good dirs list. + * @param dirs directories to be monitored + * @param utilizationPercCutOff percentage of disk that + * can be used before the dir is taken out of the good dirs list + * @param utilizationSpaceCutOff minimum space, in MB, that must be + * available on the disk for the dir to be marked as good + * + */ + public DirectoryCollection(String[] dirs, + float utilizationPercCutOff, + long utilizationSpaceCutOff) { localDirs = new CopyOnWriteArrayList(dirs); failedDirs = new CopyOnWriteArrayList(); + diskUtilizationPercCutoff = utilizationPercCutOff; + diskUtilizationSpaceCutoff = utilizationSpaceCutOff; + diskUtilizationPercCutoff = + utilizationPercCutOff < 0.0F ? 0.0F : utilizationPercCutOff; + diskUtilizationPercCutoff = + utilizationPercCutOff > 100.0F ? 100.0F : diskUtilizationPercCutoff; + diskUtilizationSpaceCutoff = + utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff; } /** @@ -103,19 +166,58 @@ synchronized boolean createNonExistentDirs(FileContext localFs, */ synchronized boolean checkDirs() { int oldNumFailures = numFailures; + HashSet checkFailedDirs = new HashSet(); for (final String dir : localDirs) { try { - DiskChecker.checkDir(new File(dir)); + File testDir = new File(dir); + DiskChecker.checkDir(testDir); + if(checkDiskPercentageLimit(testDir)) { + LOG.warn("Directory " + dir + + " error, used space above threshold of " + + diskUtilizationPercCutoff + + "%, removing from the list of valid directories."); + checkFailedDirs.add(dir); + } + else if(checkDiskSpaceLimit(testDir)) { + LOG.warn("Directory " + dir + + " error, free space below limit of " + + diskUtilizationSpaceCutoff + + "MB, removing from the list of valid directories."); + checkFailedDirs.add(dir); + } } catch (DiskErrorException de) { LOG.warn("Directory " + dir + " error " + - de.getMessage() + ", removing from the list of valid directories."); - localDirs.remove(dir); - failedDirs.add(dir); - numFailures++; + de.getMessage() + + ", removing from the list of valid directories."); + checkFailedDirs.add(dir); } } + for(String dir: checkFailedDirs) { + localDirs.remove(dir); + failedDirs.add(dir); + numFailures++; + } return numFailures > oldNumFailures; } + + private boolean checkDiskPercentageLimit(File dir) { + float freePerc = 100 * + (dir.getUsableSpace()/(float)dir.getTotalSpace()); + float usedPerc = 100.0F - freePerc; + if(usedPerc > diskUtilizationPercCutoff || + usedPerc >= 100.0F) { + return true; + } + return false; + } + + private boolean checkDiskSpaceLimit(File dir) { + long freeSpace = dir.getUsableSpace()/(1024*1024); + if(freeSpace < this.diskUtilizationSpaceCutoff) { + return true; + } + return false; + } private void createDir(FileContext localFs, Path dir, FsPermission perm) throws IOException { @@ -132,4 +234,26 @@ private void createDir(FileContext localFs, Path dir, FsPermission perm) } } } + + public float getDiskUtilizationPercCutoff() { + return diskUtilizationPercCutoff; + } + + public void setDiskUtilizationPercCutoff(float diskUtilizationPercCutoff) { + diskUtilizationPercCutoff = + diskUtilizationPercCutoff < 0.0F ? 0.0F : diskUtilizationPercCutoff; + diskUtilizationPercCutoff = + diskUtilizationPercCutoff > 100.0F ? 100.0F : diskUtilizationPercCutoff; + this.diskUtilizationPercCutoff = diskUtilizationPercCutoff; + } + + public long getDiskUtilizationSpaceCutoff() { + return diskUtilizationSpaceCutoff; + } + + public void setDiskUtilizationSpaceCutoff(long diskUtilizationSpaceCutoff) { + diskUtilizationSpaceCutoff = + diskUtilizationSpaceCutoff < 0 ? 0 : diskUtilizationSpaceCutoff; + this.diskUtilizationSpaceCutoff = diskUtilizationSpaceCutoff; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index b9b46eb..8a13756 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -89,10 +89,18 @@ private final class MonitoringTimerTask extends TimerTask { public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException { + float maxUsedSpacePerc = conf.getFloat( + YarnConfiguration.NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC, + YarnConfiguration.DEFAULT_NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC); + long minFreeSpaceMB = conf.getLong( + YarnConfiguration.NM_HEALTHY_DISKS_MIN_FREE_SPACE_MB, + YarnConfiguration.DEFAULT_NM_HEALTHY_DISKS_MIN_FREE_SPACE_MB); localDirs = new DirectoryCollection( - validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS))); + validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)), + maxUsedSpacePerc, minFreeSpaceMB); logDirs = new DirectoryCollection( - validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS))); + validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)), + maxUsedSpacePerc, minFreeSpaceMB); localDirsAllocator = new LocalDirAllocator( YarnConfiguration.NM_LOCAL_DIRS); logDirsAllocator = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java index 4ab61c9..fa0a386 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -55,8 +56,11 @@ public static void teardown() { @Test public void testConcurrentAccess() throws IOException { // Initialize DirectoryCollection with a file instead of a directory + Configuration conf = new Configuration(); String[] dirs = {testFile.getPath()}; - DirectoryCollection dc = new DirectoryCollection(dirs); + DirectoryCollection dc = new DirectoryCollection(dirs, + conf.getFloat(YarnConfiguration.NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC, + YarnConfiguration.DEFAULT_NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC)); // Create an iterator before checkDirs is called to reliable test case List list = dc.getGoodDirs(); @@ -88,7 +92,9 @@ public void testCreateDirectories() throws IOException { localFs.setPermission(pathC, permDirC); String[] dirs = { dirA, dirB, dirC }; - DirectoryCollection dc = new DirectoryCollection(dirs); + DirectoryCollection dc = new DirectoryCollection(dirs, + conf.getFloat(YarnConfiguration.NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC, + YarnConfiguration.DEFAULT_NM_HEALTHY_DISKS_MAX_SPACE_UTILIZATION_PERC)); FsPermission defaultPerm = FsPermission.getDefault() .applyUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK)); boolean createResult = dc.createNonExistentDirs(localFs, defaultPerm); @@ -104,4 +110,84 @@ public void testCreateDirectories() throws IOException { Assert.assertEquals("existing local directory permissions modified", permDirC, status.getPermission()); } + + @Test + public void testDiskSpaceUtilizationLimit() throws IOException { + + String dirA = new File(testDir, "dirA").getPath(); + String[] dirs = { dirA }; + DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F); + dc.checkDirs(); + Assert.assertEquals(0, dc.getGoodDirs().size()); + Assert.assertEquals(1, dc.getFailedDirs().size()); + + dc = new DirectoryCollection(dirs, 100.0F); + dc.checkDirs(); + Assert.assertEquals(1, dc.getGoodDirs().size()); + Assert.assertEquals(0, dc.getFailedDirs().size()); + + dc = new DirectoryCollection(dirs, testDir.getTotalSpace()/(1024*1024)); + dc.checkDirs(); + Assert.assertEquals(0, dc.getGoodDirs().size()); + Assert.assertEquals(1, dc.getFailedDirs().size()); + + dc = new DirectoryCollection(dirs, 100.0F, 0); + dc.checkDirs(); + Assert.assertEquals(1, dc.getGoodDirs().size()); + Assert.assertEquals(0, dc.getFailedDirs().size()); + } + + @Test + public void testCutoffSetters() { + + String[] dirs = { "dir" }; + DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100); + float testValue = 57.5F; + float delta = 0.1F; + dc.setDiskUtilizationPercCutoff(testValue); + Assert.assertEquals(testValue, dc.getDiskUtilizationPercCutoff(), delta); + testValue = -57.5F; + dc.setDiskUtilizationPercCutoff(testValue); + Assert.assertEquals(0.0F, dc.getDiskUtilizationPercCutoff(), delta); + testValue = 157.5F; + dc.setDiskUtilizationPercCutoff(testValue); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercCutoff(), delta); + + long spaceValue = 57; + dc.setDiskUtilizationSpaceCutoff(spaceValue); + Assert.assertEquals(spaceValue, dc.getDiskUtilizationSpaceCutoff()); + spaceValue = -57; + dc.setDiskUtilizationSpaceCutoff(spaceValue); + Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); + } + + @Test + public void testConstructors() { + + String[] dirs = { "dir" }; + float delta = 0.1F; + DirectoryCollection dc = new DirectoryCollection(dirs); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercCutoff(), delta); + Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); + + dc = new DirectoryCollection(dirs, 57.5F); + Assert.assertEquals(57.5F, dc.getDiskUtilizationPercCutoff(), delta); + Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); + + dc = new DirectoryCollection(dirs, 57); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercCutoff(), delta); + Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff()); + + dc = new DirectoryCollection(dirs, 57.5F, 67); + Assert.assertEquals(57.5F, dc.getDiskUtilizationPercCutoff(), delta); + Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff()); + + dc = new DirectoryCollection(dirs, -57.5F, -67); + Assert.assertEquals(0.0F, dc.getDiskUtilizationPercCutoff(), delta); + Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); + + dc = new DirectoryCollection(dirs, 157.5F, -67); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercCutoff(), delta); + Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); + } }