diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index 7d1aa53..2064c6b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -238,13 +238,29 @@ protected void serviceStop() throws Exception { /** * Function to generate a report on the state of the disks. + * This method signature is to keep backward compatibility. + * @param listGoodDirs flag to determine whether the report should report + * the state of good dirs or failed dirs + * @return the health report of nm-local-dirs and nm-log-dirs + * @deprecated Use {@link #getDisksHealthReport(boolean, boolean)} instead. + */ + @Deprecated + public String getDisksHealthReport(boolean listGoodDirs) { + return getDisksHealthReport(listGoodDirs, false); + } + + /** + * Function to generate a report on the state of the disks. * * @param listGoodDirs * flag to determine whether the report should report the state of * good dirs or failed dirs + * @param reportFullDirs flag to report which dirs are full. full dirs are + * included in failed dirs. * @return the health report of nm-local-dirs and nm-log-dirs */ - public String getDisksHealthReport(boolean listGoodDirs) { + public String getDisksHealthReport(boolean listGoodDirs, + boolean reportFullDirs) { if (!isDiskHealthCheckerEnabled) { return ""; } @@ -254,6 +270,8 @@ public String getDisksHealthReport(boolean listGoodDirs) { List failedLogDirsList = logDirs.getFailedDirs(); List goodLocalDirsList = localDirs.getGoodDirs(); List goodLogDirsList = logDirs.getGoodDirs(); + List fullLocalDirsList = localDirs.getFullDirs(); + List fullLogDirsList = logDirs.getFullDirs(); int numLocalDirs = goodLocalDirsList.size() + failedLocalDirsList.size(); int numLogDirs = goodLogDirsList.size() + failedLogDirsList.size(); if (!listGoodDirs) { @@ -262,9 +280,20 @@ public String getDisksHealthReport(boolean listGoodDirs) { + " local-dirs are bad: " + StringUtils.join(",", failedLocalDirsList) + "; "); } + if (!fullLocalDirsList.isEmpty() && reportFullDirs) { + report.append(fullLocalDirsList.size() + "/" + numLocalDirs + + " local-dirs are full: " + + StringUtils.join(",", fullLocalDirsList) + "; "); + } if (!failedLogDirsList.isEmpty()) { report.append(failedLogDirsList.size() + "/" + numLogDirs - + " log-dirs are bad: " + StringUtils.join(",", failedLogDirsList)); + + " log-dirs are bad: " + + StringUtils.join(",", failedLogDirsList) + "; "); + } + if (!fullLogDirsList.isEmpty() && reportFullDirs) { + report.append(fullLogDirsList.size() + "/" + numLogDirs + + " log-dirs are full: " + + StringUtils.join(",", fullLogDirsList) + "; "); } } else { report.append(goodLocalDirsList.size() + "/" + numLocalDirs @@ -329,17 +358,18 @@ private void updateDirsAfterTest() { logDirs.toArray(new String[logDirs.size()])); if (!areDisksHealthy()) { // Just log. - LOG.error("Most of the disks failed. " + getDisksHealthReport(false)); + LOG.error( + "Most of the disks failed. " + getDisksHealthReport(false, true)); } } private void logDiskStatus(boolean newDiskFailure, boolean diskTurnedGood) { if (newDiskFailure) { - String report = getDisksHealthReport(false); + String report = getDisksHealthReport(false, true); LOG.info("Disk(s) failed: " + report); } if (diskTurnedGood) { - String report = getDisksHealthReport(true); + String report = getDisksHealthReport(true, true); LOG.info("Disk(s) turned good: " + report); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java index 02b318a..9a66b8a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java @@ -59,9 +59,10 @@ String getHealthReport() { String scriptReport = (nodeHealthScriptRunner == null) ? "" : nodeHealthScriptRunner.getHealthReport(); if (scriptReport.equals("")) { - return dirsHandler.getDisksHealthReport(false); + return dirsHandler.getDisksHealthReport(false, true); } else { - return scriptReport.concat(SEPARATOR + dirsHandler.getDisksHealthReport(false)); + return scriptReport.concat( + SEPARATOR + dirsHandler.getDisksHealthReport(false, true)); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index a87238d..dd18036 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -243,7 +243,7 @@ public Integer call() { if (!dirsHandler.areDisksHealthy()) { ret = ContainerExitStatus.DISKS_FAILED; throw new IOException("Most of the disks failed. " - + dirsHandler.getDisksHealthReport(false)); + + dirsHandler.getDisksHealthReport(false, true)); } try { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index dd50ead..2153ee2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -1089,7 +1089,7 @@ public void run() { dirsHandler); } else { throw new IOException("All disks failed. " - + dirsHandler.getDisksHealthReport(false)); + + dirsHandler.getDisksHealthReport(false, true)); } // TODO handle ExitCodeException separately? } catch (FSError fe) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java index 2e41dea..0d006cb 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java @@ -167,6 +167,7 @@ public void testNodeHealthService() throws Exception { healthStatus.getHealthReport().equals( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG + NodeHealthCheckerService.SEPARATOR - + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false))); + + nodeHealthChecker.getDiskHandler() + .getDisksHealthReport(false, true))); } }