diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java index ae2a4ef1ca4..90c75778ba7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java @@ -38,6 +38,7 @@ import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; @@ -45,6 +46,7 @@ import org.apache.hadoop.util.DiskValidator; import org.apache.hadoop.util.DiskValidatorFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ReadWriteDiskValidatorMetrics; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -259,6 +261,21 @@ void deregisterDirsChangeListener( } /** + * @return the directories that have errors - many not have appropriate permissions + * or other disk validation checks might have failed in {@link DiskValidator} + * + */ + @InterfaceStability.Evolving + List getErroredDirs() { + this.readLock.lock(); + try { + return errorDirs; + } finally { + this.readLock.unlock(); + } + } + + /** * @return total the number of directory failures seen till now */ int getNumFailures() { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index f8cb4eee709..de31c0a0daf 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -344,21 +344,33 @@ public String getDisksHealthReport(boolean listGoodDirs) { } StringBuilder report = new StringBuilder(); - List failedLocalDirsList = localDirs.getFailedDirs(); - List failedLogDirsList = logDirs.getFailedDirs(); + List erroredLocalDirsList = localDirs.getErroredDirs(); + List erroredLogDirsList = logDirs.getErroredDirs(); + List diskFullLocalDirsList = localDirs.getFullDirs(); + List diskFullLogDirsList = logDirs.getFullDirs(); List goodLocalDirsList = localDirs.getGoodDirs(); List goodLogDirsList = logDirs.getGoodDirs(); - int numLocalDirs = goodLocalDirsList.size() + failedLocalDirsList.size(); - int numLogDirs = goodLogDirsList.size() + failedLogDirsList.size(); + int numLocalDirs = goodLocalDirsList.size() + erroredLocalDirsList.size() + diskFullLocalDirsList.size(); + int numLogDirs = goodLogDirsList.size() + erroredLogDirsList.size() + diskFullLogDirsList.size(); if (!listGoodDirs) { - if (!failedLocalDirsList.isEmpty()) { - report.append(failedLocalDirsList.size() + "/" + numLocalDirs - + " local-dirs are bad: " - + StringUtils.join(",", failedLocalDirsList) + "; "); + if (!erroredLocalDirsList.isEmpty()) { + report.append(erroredLocalDirsList.size() + "/" + numLocalDirs + + " local-dirs have errors: " + + StringUtils.join(",", erroredLocalDirsList) + "; "); } - if (!failedLogDirsList.isEmpty()) { - report.append(failedLogDirsList.size() + "/" + numLogDirs - + " log-dirs are bad: " + StringUtils.join(",", failedLogDirsList)); + if (!diskFullLocalDirsList.isEmpty()) { + report.append(diskFullLocalDirsList.size() + "/" + numLocalDirs + + " local-dirs are full: " + + StringUtils.join(",", diskFullLocalDirsList) + "; "); + } + + if (!erroredLogDirsList.isEmpty()) { + report.append(erroredLogDirsList.size() + "/" + numLogDirs + + " log-dirs have errors: " + StringUtils.join(",", erroredLogDirsList)); + } + if (!diskFullLogDirsList.isEmpty()) { + report.append(diskFullLogDirsList.size() + "/" + numLogDirs + + " log-dirs are full: " + StringUtils.join(",", diskFullLogDirsList)); } } else { report.append(goodLocalDirsList.size() + "/" + numLocalDirs diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java index e529628b710..6aeb01714ed 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java @@ -128,6 +128,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException { DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F); dc.checkDirs(); Assert.assertEquals(0, dc.getGoodDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); Assert.assertEquals(1, dc.getFailedDirs().size()); Assert.assertEquals(1, dc.getFullDirs().size()); // no good dirs @@ -139,6 +140,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException { testDir.getTotalSpace()); dc.checkDirs(); Assert.assertEquals(1, dc.getGoodDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); Assert.assertEquals(0, dc.getFailedDirs().size()); Assert.assertEquals(0, dc.getFullDirs().size()); Assert.assertEquals(utilizedSpacePerc, @@ -147,6 +149,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException { dc = new DirectoryCollection(dirs, testDir.getTotalSpace() / (1024 * 1024)); dc.checkDirs(); Assert.assertEquals(0, dc.getGoodDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); Assert.assertEquals(1, dc.getFailedDirs().size()); Assert.assertEquals(1, dc.getFullDirs().size()); // no good dirs @@ -158,6 +161,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException { testDir.getTotalSpace()); dc.checkDirs(); Assert.assertEquals(1, dc.getGoodDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); Assert.assertEquals(0, dc.getFailedDirs().size()); Assert.assertEquals(0, dc.getFullDirs().size()); Assert.assertEquals(utilizedSpacePerc, @@ -209,12 +213,14 @@ public void testFailedDisksBecomingGoodAgain() throws Exception { Assert.assertEquals(0, dc.getGoodDirs().size()); Assert.assertEquals(1, dc.getFailedDirs().size()); Assert.assertEquals(1, dc.getFullDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F); dc.checkDirs(); Assert.assertEquals(1, dc.getGoodDirs().size()); Assert.assertEquals(0, dc.getFailedDirs().size()); Assert.assertEquals(0, dc.getFullDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); @@ -232,12 +238,15 @@ public void testFailedDisksBecomingGoodAgain() throws Exception { Assert.assertEquals(0, dc.getGoodDirs().size()); Assert.assertEquals(1, dc.getFailedDirs().size()); Assert.assertEquals(0, dc.getFullDirs().size()); + Assert.assertEquals(1, dc.getErroredDirs().size()); + permDirB = new FsPermission((short) 0700); localFs.setPermission(pathB, permDirB); dc.checkDirs(); Assert.assertEquals(1, dc.getGoodDirs().size()); Assert.assertEquals(0, dc.getFailedDirs().size()); Assert.assertEquals(0, dc.getFullDirs().size()); + Assert.assertEquals(0, dc.getErroredDirs().size()); } @Test