From 7dfac1f746eb0ae08e0689e179b1f4396438af22 Mon Sep 17 00:00:00 2001 From: Adam Antal Date: Thu, 7 Nov 2019 19:22:09 +0100 Subject: [PATCH] YARN-9923. Detect missing Docker binary or not running Docker daemon --- .../dev-support/findbugs-exclude.xml | 2 +- .../hadoop/yarn/conf/YarnConfiguration.java | 18 ++ .../src/main/resources/yarn-default.xml | 18 ++ .../nodemanager/LocalDirsHandlerService.java | 20 ++- .../nodemanager/NodeHealthCheckerService.java | 123 ------------- .../yarn/server/nodemanager/NodeManager.java | 28 +-- .../nodemanager/NodeStatusUpdaterImpl.java | 1 + .../health/DockerHealthCheckerService.java | 140 +++++++++++++++ .../nodemanager/health/HealthReporter.java | 43 +++++ .../health/NodeHealthCheckerService.java | 134 ++++++++++++++ .../health}/NodeHealthScriptRunner.java | 166 ++++-------------- .../health/TimedHealthReporterService.java | 159 +++++++++++++++++ .../nodemanager/MockNodeStatusUpdater.java | 2 + .../nodemanager/NodeManagerTestBase.java | 5 +- .../server/nodemanager/TestEventFlow.java | 5 +- .../nodemanager/TestNodeManagerReboot.java | 1 + .../nodemanager/TestNodeManagerResync.java | 1 + .../nodemanager/TestNodeManagerShutdown.java | 1 + .../nodemanager/TestNodeStatusUpdater.java | 3 +- .../TestNodeStatusUpdaterForAttributes.java | 1 + .../TestNodeStatusUpdaterForLabels.java | 1 + .../BaseContainerManagerTest.java | 6 +- .../TestContainerManagerRecovery.java | 7 +- .../TestResourcePluginManager.java | 2 +- .../TestDockerHealthCheckerService.java | 22 +++ .../health}/TestNodeHealthScriptRunner.java | 3 +- .../{ => health}/TestNodeHealthService.java | 18 +- .../webapp/TestContainerLogsPage.java | 6 +- .../webapp/TestNMContainerWebSocket.java | 13 +- .../nodemanager/webapp/TestNMWebServer.java | 13 +- .../nodemanager/webapp/TestNMWebServices.java | 6 +- .../webapp/TestNMWebServicesApps.java | 4 +- .../webapp/TestNMWebServicesAuxServices.java | 4 +- .../webapp/TestNMWebServicesContainers.java | 6 +- .../nodemanager/webapp/TestNMWebTerminal.java | 12 +- .../hadoop/yarn/server/MiniYARNCluster.java | 2 +- .../src/site/markdown/NodeManager.md | 11 ++ 37 files changed, 661 insertions(+), 346 deletions(-) delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/DockerHealthCheckerService.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/HealthReporter.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java rename {hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util => hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health}/NodeHealthScriptRunner.java (63%) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/TimedHealthReporterService.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestDockerHealthCheckerService.java rename {hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util => hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health}/TestNodeHealthScriptRunner.java (98%) rename hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/{ => health}/TestNodeHealthService.java (92%) diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index e3149f079c6..444c3e27b21 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -662,7 +662,7 @@ - + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index d9840ac9999..0185541ea61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1977,6 +1977,24 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_HEALTH_CHECK_SCRIPT_OPTS = NM_PREFIX + "health-checker.script.opts"; + public static final String NM_DOCKER_HEALTH_CHECKER_PREFIX = + NM_PREFIX + "docker-health-checker."; + + public static final String NM_DOCKER_HEALTH_CHECKER_ENABLE = + NM_DOCKER_HEALTH_CHECKER_PREFIX + "enable"; + public static final boolean DEFAULT_NM_DOCKER_HEALTH_CHECKER_ENABLE = + false; + + public static final String NM_DOCKER_HEALTH_CHECKER_STARTUP = + NM_DOCKER_HEALTH_CHECKER_PREFIX + "startup"; + public static final boolean DEFAULT_NM_DOCKER_HEALTH_CHECKER_STARTUP = + false; + + public static final String NM_DOCKER_HEALTH_CHECKER_INTERVAL_MS = + NM_DOCKER_HEALTH_CHECKER_PREFIX + "interval-ms"; + public static final long DEFAULT_NM_DOCKER_HEALTH_CHECKER_INTERVAL_MS = + DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS; + /** The JVM options used on forking ContainerLocalizer process by container executor. */ public static final String NM_CONTAINER_LOCALIZER_JAVA_OPTS_KEY = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 907f290afad..3434ef8f025 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1619,6 +1619,24 @@ + + + yarn.nodemanager.docker-health-checker.enable + false + + + + + yarn.nodemanager.docker-health-checker.startup + false + + + + + yarn.nodemanager.docker-health-checker.interval-ms + 600000 + + Frequency of running disk health checker code. yarn.nodemanager.disk-health-checker.interval-ms diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index 8d060b01adf..aefdb68744c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -29,9 +29,11 @@ import java.util.TimerTask; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.DiskValidator; import org.apache.hadoop.util.DiskValidatorFactory; +import org.apache.hadoop.yarn.server.nodemanager.health.HealthReporter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +44,6 @@ import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -54,7 +55,7 @@ * directories of a node. This specifically manages nodemanager-local-dirs and * nodemanager-log-dirs by periodically checking their health. */ -public class LocalDirsHandlerService extends AbstractService { +public class LocalDirsHandlerService extends AbstractService implements HealthReporter { private static final Logger LOG = LoggerFactory.getLogger(LocalDirsHandlerService.class); @@ -426,6 +427,11 @@ public String getDisksHealthReport(boolean listGoodDirs) { } + @Override + public String getHealthReport() { + return getDisksHealthReport(false); + } + /** * The minimum fraction of number of disks needed to be healthy for a node to * be considered healthy in terms of disks is configured using @@ -457,10 +463,20 @@ public boolean areDisksHealthy() { return true; } + @Override + public boolean isHealthy() { + return areDisksHealthy(); + } + public long getLastDisksCheckTime() { return lastDisksCheckTime; } + @Override + public long getLastHealthReportTime() { + return getLastDisksCheckTime(); + } + public boolean isGoodLocalDir(String path) { return isInGoodDirs(getLocalDirs(), path); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java deleted file mode 100644 index 7e2fc7e022d..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java +++ /dev/null @@ -1,123 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.yarn.server.nodemanager; - -import com.google.common.base.Joiner; -import com.google.common.base.Strings; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.service.CompositeService; -import org.apache.hadoop.util.NodeHealthScriptRunner; - -import java.util.Arrays; -import java.util.Collections; - -/** - * The class which provides functionality of checking the health of the node and - * reporting back to the service for which the health checker has been asked to - * report. - */ -public class NodeHealthCheckerService extends CompositeService { - - private NodeHealthScriptRunner nodeHealthScriptRunner; - private LocalDirsHandlerService dirsHandler; - private Exception nodeHealthException; - private long nodeHealthExceptionReportTime; - - static final String SEPARATOR = ";"; - - public NodeHealthCheckerService(NodeHealthScriptRunner scriptRunner, - LocalDirsHandlerService dirHandlerService) { - super(NodeHealthCheckerService.class.getName()); - nodeHealthScriptRunner = scriptRunner; - dirsHandler = dirHandlerService; - nodeHealthException = null; - nodeHealthExceptionReportTime = 0; - } - - @Override - protected void serviceInit(Configuration conf) throws Exception { - if (nodeHealthScriptRunner != null) { - addService(nodeHealthScriptRunner); - } - addService(dirsHandler); - super.serviceInit(conf); - } - - /** - * @return the reporting string of health of the node - */ - String getHealthReport() { - String scriptReport = Strings.emptyToNull( - nodeHealthScriptRunner == null ? null : - nodeHealthScriptRunner.getHealthReport()); - String discReport = - Strings.emptyToNull( - dirsHandler.getDisksHealthReport(false)); - String exceptionReport = Strings.emptyToNull( - nodeHealthException == null ? null : - nodeHealthException.getMessage()); - - return Joiner.on(SEPARATOR).skipNulls() - .join(scriptReport, discReport, exceptionReport); - } - - /** - * @return true if the node is healthy - */ - boolean isHealthy() { - boolean scriptHealthy = nodeHealthScriptRunner == null || - nodeHealthScriptRunner.isHealthy(); - return nodeHealthException == null && - scriptHealthy && dirsHandler.areDisksHealthy(); - } - - /** - * @return when the last time the node health status is reported - */ - long getLastHealthReportTime() { - return Collections.max(Arrays.asList( - dirsHandler.getLastDisksCheckTime(), - nodeHealthScriptRunner == null ? 0 : - nodeHealthScriptRunner.getLastReportedTime(), - nodeHealthExceptionReportTime)); - } - - /** - * @return the disk handler - */ - public LocalDirsHandlerService getDiskHandler() { - return dirsHandler; - } - - /** - * @return the node health script runner - */ - NodeHealthScriptRunner getNodeHealthScriptRunner() { - return nodeHealthScriptRunner; - } - - /** - * Report an exception to mark the node as unhealthy. - * @param ex the exception that makes the node unhealthy - */ - void reportException(Exception ex) { - nodeHealthException = ex; - nodeHealthExceptionReportTime = System.currentTimeMillis(); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 4bbae340a77..5f48b3acbee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -33,7 +33,7 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.JvmPauseMonitor; -import org.apache.hadoop.util.NodeHealthScriptRunner; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.ShutdownHookManager; @@ -347,27 +347,6 @@ private void recoverTokens(NMTokenSecretManagerInNM nmTokenSecretManager, } } - public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration conf) { - String nodeHealthScript = - conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); - if(!NodeHealthScriptRunner.shouldRun(nodeHealthScript)) { - LOG.info("Node Manager health check script is not available " - + "or doesn't have execute permission, so not " - + "starting the node health script runner."); - return null; - } - long nmCheckintervalTime = conf.getLong( - YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, - YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS); - long scriptTimeout = conf.getLong( - YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, - YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS); - String[] scriptArgs = conf.getStrings( - YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {}); - return new NodeHealthScriptRunner(nodeHealthScript, - nmCheckintervalTime, scriptTimeout, scriptArgs); - } - @VisibleForTesting protected ResourcePluginManager createResourcePluginManager() { return new ResourcePluginManager(); @@ -431,12 +410,9 @@ protected void serviceInit(Configuration conf) throws Exception { // NodeManager level dispatcher this.dispatcher = createNMDispatcher(); - nodeHealthChecker = - new NodeHealthCheckerService( - getNodeHealthScriptRunner(conf), dirsHandler); + this.nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); addService(nodeHealthChecker); - ((NMContext)context).setContainerExecutor(exec); ((NMContext)context).setDeletionService(del); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 181094ea6c6..5e3693ae9c1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -86,6 +86,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeAttributesProvider; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/DockerHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/DockerHealthCheckerService.java new file mode 100644 index 00000000000..42179d1c723 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/DockerHealthCheckerService.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.Scanner; +import java.util.TimerTask; + +public class DockerHealthCheckerService extends TimedHealthReporterService { + + private static final Logger LOG = + LoggerFactory.getLogger(DockerHealthCheckerService.class); + + private boolean startupMode; + + private static final String NO_PID_FILE = "Unable to obtain pid file of Docker daemon"; + private static final String HEALTH_CHECK_DISABLED = "Docker health checker service is disabled."; + + private DockerHealthCheckerService(boolean startupMode, long intervalMs) { + super(DockerHealthCheckerService.class.getName(), intervalMs); + + this.startupMode = startupMode; + setTimerTask(new DockerDaemonMonitorExecutor()); + } + + public static DockerHealthCheckerService newInstance(Configuration conf) { + boolean enabled = + conf.getBoolean(YarnConfiguration.NM_DOCKER_HEALTH_CHECKER_ENABLE, + YarnConfiguration.DEFAULT_NM_DOCKER_HEALTH_CHECKER_ENABLE); + if (!enabled) { + if (conf.get(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES).contains("docker")) { + LOG.info(HEALTH_CHECK_DISABLED); + } else { + LOG.debug(HEALTH_CHECK_DISABLED); + } + return null; + } + LOG.info("Docker health checker service enabled"); + boolean startupMode = conf.getBoolean( + YarnConfiguration.NM_DOCKER_HEALTH_CHECKER_STARTUP, + YarnConfiguration.DEFAULT_NM_DOCKER_HEALTH_CHECKER_STARTUP); + long intervalMs = conf.getLong( + YarnConfiguration.NM_DOCKER_HEALTH_CHECKER_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_DOCKER_HEALTH_CHECKER_INTERVAL_MS); + return new DockerHealthCheckerService(startupMode, intervalMs); + } + + private class DockerDaemonMonitorExecutor extends TimerTask { + @Override + public void run() { + long now = System.currentTimeMillis(); + + // 1. Check whether docker.pid file exists on the default location + File defaultPidFile = new File("/var/run/docker.pid"); + if (defaultPidFile.exists() && !defaultPidFile.isDirectory()) { + setHealthStatus(true, "", now); + } + + // 2. Before start guessing let's check whether the pid file + // is configured in the Docker daemon's json file + File dockerDaemonConf = new File("/etc/docker/daemon.json"); + if (dockerDaemonConf.exists() && dockerDaemonConf.isFile()) { + try { + String jsonString = FileUtils.readFileToString(dockerDaemonConf, Charset.defaultCharset()); + if (jsonString.contains("pidfile")) { + try (Scanner scanner = new Scanner(jsonString)) { + while (scanner.hasNextLine()) { + String line = scanner.nextLine(); + if (line.contains("pidfile")) { + String[] parts = line.split(":"); + if (parts.length >= 2) { + String pidFileLoc = parts[1].replace("\"", "").trim(); + File configuredPidFile = new File(pidFileLoc); + if (configuredPidFile.exists() && + !configuredPidFile.isDirectory()) { + setHealthStatus(true, "", now); + } else { + // give up trying, the pid file should be there, + // but it isn't + setHealthStatus(false, NO_PID_FILE); + } + } + } + } + } + } + } catch (IOException ignore) { + } + } + + // 3. try to guess the OS-specific default location for docker.pid + File dockerPidVar = new File("/var/docker.pid"); + if (dockerPidVar.exists() && !dockerPidVar.isDirectory()) { + setHealthStatus(true, "", now); + } + File dockerPidRun = new File("/run/docker.pid"); + if (dockerPidRun.exists() && !dockerPidRun.isDirectory()) { + setHealthStatus(true, "", now); + } + // 4. Conclude that the Docker daemon is not running + setHealthStatus(false, NO_PID_FILE); + } + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf); + if (startupMode) { + TimerTask task = new DockerDaemonMonitorExecutor(); + task.run(); + if (!isHealthy()) { + throw new Exception("Haven't detected running Docker daemon during startup!"); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/HealthReporter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/HealthReporter.java new file mode 100644 index 00000000000..0d81e3af98d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/HealthReporter.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +/** + * TODO + */ +public interface HealthReporter { + + /** + * + * @return + */ + boolean isHealthy(); + + /** + * + * @return + */ + String getHealthReport(); + + /** + * + * @return + */ + long getLastHealthReportTime(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java new file mode 100644 index 00000000000..6bdbfef78b9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java @@ -0,0 +1,134 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Strings; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * The class which provides functionality of checking the health of the node and + * reporting back to the service for which the health checker has been asked to + * report. + */ +public class NodeHealthCheckerService extends CompositeService { + + private List reporters; + private LocalDirsHandlerService dirsHandler; + private Exception nodeHealthException; + private long nodeHealthExceptionReportTime; + + public static final String SEPARATOR = ";"; + + public NodeHealthCheckerService(LocalDirsHandlerService dirHandlerService) { + super(NodeHealthCheckerService.class.getName()); + + this.reporters = new ArrayList<>(); + this.dirsHandler = dirHandlerService; + this.nodeHealthException = null; + this.nodeHealthExceptionReportTime = 0; + } + + @Override + protected void serviceInit(Configuration conf) throws Exception { + addHealthReporter(dirsHandler); + addHealthReporter(NodeHealthScriptRunner.newInstance(conf)); + addHealthReporter(DockerHealthCheckerService.newInstance(conf)); + + super.serviceInit(conf); + } + + /** + * Adds a {@link Service} implementing the {@link HealthReporter} interface, + * if that service has not been added to this {@link CompositeService} yet. + * + * @param service to add + * @throws Exception if not a {@link HealthReporter} + * implementation is provided to this function + */ + @VisibleForTesting + void addHealthReporter(Service service) throws Exception { + if (service != null && getServices().stream() + .noneMatch(x -> x.getName().equals(service.getName()))) { + if (!(service instanceof HealthReporter)) { + throw new Exception("Attempted to add service to " + + "NodeHealthCheckerService that is not implements HealthReporter."); + } + reporters.add((HealthReporter) service); + addService(service); + } + } + + /** + * @return the reporting string of health of the node + */ + public String getHealthReport() { + List reports = reporters.stream() + .map(reporter -> Strings.emptyToNull(reporter.getHealthReport())) + .collect(Collectors.toList()); + reports.add(Strings.emptyToNull( + nodeHealthException == null ? null : + nodeHealthException.getMessage())); + return Joiner.on(SEPARATOR).skipNulls().join(reports); + } + + /** + * @return true if the node is healthy + */ + public boolean isHealthy() { + return nodeHealthException == null && + reporters.stream().allMatch(HealthReporter::isHealthy); + } + + /** + * @return when the last time the node health status is reported + */ + public long getLastHealthReportTime() { + Optional max = reporters.stream() + .map(HealthReporter::getLastHealthReportTime).max(Long::compareTo); + return Long.max( + max.orElse(nodeHealthExceptionReportTime), + nodeHealthExceptionReportTime); + } + + /** + * @return the disk handler + */ + public LocalDirsHandlerService getDiskHandler() { + return dirsHandler; + } + + /** + * Report an exception to mark the node as unhealthy. + * @param ex the exception that makes the node unhealthy + */ + public void reportException(Exception ex) { + nodeHealthException = ex; + nodeHealthExceptionReportTime = System.currentTimeMillis(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthScriptRunner.java similarity index 63% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthScriptRunner.java index f2a5b242a8d..32098649a62 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthScriptRunner.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.util; +package org.apache.hadoop.yarn.server.nodemanager.health; import java.io.File; import java.io.IOException; @@ -27,11 +27,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,37 +41,45 @@ * using the configured node health script and reporting back to the service * for which the health checker has been asked to report. */ -public class NodeHealthScriptRunner extends AbstractService { +public class NodeHealthScriptRunner extends TimedHealthReporterService { private static final Logger LOG = LoggerFactory.getLogger(NodeHealthScriptRunner.class); /** Absolute path to the health script. */ private String nodeHealthScript; - /** Delay after which node health script to be executed */ - private long intervalTime; /** Time after which the script should be timedout */ private long scriptTimeout; - /** Timer used to schedule node health monitoring script execution */ - private Timer nodeHealthScriptScheduler; - /** ShellCommandExecutor used to execute monitoring script */ ShellCommandExecutor shexec = null; /** Pattern used for searching in the output of the node health script */ - static private final String ERROR_PATTERN = "ERROR"; + private static final String ERROR_PATTERN = "ERROR"; /** Time out error message */ - public static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out"; - - private boolean isHealthy; - - private String healthReport; - - private long lastReportedTime; + static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out"; + + public static NodeHealthScriptRunner newInstance(Configuration conf) { + String nodeHealthScript = + conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); + if(!shouldRun(nodeHealthScript)) { + LOG.info("Node Manager health check script is not available " + + "or doesn't have execute permission, so not " + + "starting the node health script runner."); + return null; + } + long nmCheckintervalTime = conf.getLong( + YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS); + long scriptTimeout = conf.getLong( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS); + String[] scriptArgs = conf.getStrings( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {}); + return new NodeHealthScriptRunner(nodeHealthScript, + nmCheckintervalTime, scriptTimeout, scriptArgs); + } - private TimerTask timer; - private enum HealthCheckerExitStatus { SUCCESS, TIMED_OUT, @@ -90,7 +98,7 @@ String exceptionStackTrace = ""; - public NodeHealthMonitorExecutor(String[] args) { + NodeHealthMonitorExecutor(String[] args) { ArrayList execScript = new ArrayList(); execScript.add(nodeHealthScript); if (args != null) { @@ -190,111 +198,24 @@ private boolean hasErrors(String output) { } } - public NodeHealthScriptRunner(String scriptName, long chkInterval, long timeout, + NodeHealthScriptRunner(String scriptName, long chkInterval, long timeout, String[] scriptArgs) { - super(NodeHealthScriptRunner.class.getName()); - this.lastReportedTime = System.currentTimeMillis(); - this.isHealthy = true; - this.healthReport = ""; + super(NodeHealthScriptRunner.class.getName(), chkInterval); + this.nodeHealthScript = scriptName; - this.intervalTime = chkInterval; this.scriptTimeout = timeout; - this.timer = new NodeHealthMonitorExecutor(scriptArgs); - } - - /* - * Method which initializes the values for the script path and interval time. - */ - @Override - protected void serviceInit(Configuration conf) throws Exception { - super.serviceInit(conf); - } - - /** - * Method used to start the Node health monitoring. - * - */ - @Override - protected void serviceStart() throws Exception { - nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true); - // Start the timer task immediately and - // then periodically at interval time. - nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime); - super.serviceStart(); + setTimerTask(new NodeHealthMonitorExecutor(scriptArgs)); } - /** - * Method used to terminate the node health monitoring service. - * - */ @Override - protected void serviceStop() { - if (nodeHealthScriptScheduler != null) { - nodeHealthScriptScheduler.cancel(); - } + public void serviceStop() throws Exception { if (shexec != null) { Process p = shexec.getProcess(); if (p != null) { p.destroy(); } } - } - - /** - * Gets the if the node is healthy or not - * - * @return true if node is healthy - */ - public boolean isHealthy() { - return isHealthy; - } - - /** - * Sets if the node is healthy or not considering disks' health also. - * - * @param isHealthy - * if or not node is healthy - */ - private synchronized void setHealthy(boolean isHealthy) { - this.isHealthy = isHealthy; - } - - /** - * Returns output from health script. if node is healthy then an empty string - * is returned. - * - * @return output from health script - */ - public String getHealthReport() { - return healthReport; - } - - /** - * Sets the health report from the node health script. Also set the disks' - * health info obtained from DiskHealthCheckerService. - * - * @param healthReport - */ - private synchronized void setHealthReport(String healthReport) { - this.healthReport = healthReport; - } - - /** - * Returns time stamp when node health script was last run. - * - * @return timestamp when node health script was last run - */ - public long getLastReportedTime() { - return lastReportedTime; - } - - /** - * Sets the last run time of the node health script. - * - * @param lastReportedTime - */ - private synchronized void setLastReportedTime(long lastReportedTime) { - this.lastReportedTime = lastReportedTime; + super.serviceStop(); } /** @@ -315,25 +236,4 @@ public static boolean shouldRun(String healthScript) { File f = new File(healthScript); return f.exists() && FileUtil.canExecute(f); } - - private synchronized void setHealthStatus(boolean isHealthy, String output) { - LOG.info("health status being set as " + output); - this.setHealthy(isHealthy); - this.setHealthReport(output); - } - - private synchronized void setHealthStatus(boolean isHealthy, String output, - long time) { - LOG.info("health status being set as " + output); - this.setHealthStatus(isHealthy, output); - this.setLastReportedTime(time); - } - - /** - * Used only by tests to access the timer task directly - * @return the timer task - */ - public TimerTask getTimerTask() { - return timer; - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/TimedHealthReporterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/TimedHealthReporterService.java new file mode 100644 index 00000000000..3b64b90c040 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/TimedHealthReporterService.java @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.service.AbstractService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Timer; +import java.util.TimerTask; + +/** + * TODO javadoc + */ +public abstract class TimedHealthReporterService extends AbstractService + implements HealthReporter { + + private static final Logger LOG = + LoggerFactory.getLogger(TimedHealthReporterService.class); + + private boolean isHealthy; + private String healthReport; + private long lastReportedTime; + + private Timer timer; + private TimerTask task; + private long intervalMs; + + TimedHealthReporterService(String name, long intervalMs) { + super(name); + this.isHealthy = true; + this.healthReport = ""; + this.lastReportedTime = System.currentTimeMillis(); + this.intervalMs = intervalMs; + } + + void setTimerTask(TimerTask task) { + this.task = task; + } + + @VisibleForTesting + TimerTask getTimerTask() { + return task; + } + + /** + * Method used to start the health monitoring. + */ + @Override + public void serviceStart() throws Exception { + if (task == null) { + throw new Exception("Health reporting task hasn't been set!"); + } + timer = new Timer("HealthReporterService-Timer", true); + timer.scheduleAtFixedRate(task, 0, intervalMs); + super.serviceStart(); + } + + /** + * Method used to terminate the health monitoring service. + */ + @Override + protected void serviceStop() throws Exception { + if (timer != null) { + timer.cancel(); + } + super.serviceStop(); + } + + /** + * Gets whether the node is healthy or not + * + * @return true if node is healthy + */ + @Override + public boolean isHealthy() { + return isHealthy; + } + + /** + * Sets if the node is healthy or not + * + * @param isHealthy + * whether the node is healthy + */ + protected synchronized void setHealthy(boolean isHealthy) { + this.isHealthy = isHealthy; + } + + /** + * Returns output from health check. If node is healthy then an empty string + * is returned. + * + * @return output from health check + */ + @Override + public String getHealthReport() { + return healthReport; + } + + /** + * Sets the health report from the node health check. Also set the disks' + * health info obtained from DiskHealthCheckerService. + * + * @param healthReport + */ + protected synchronized void setHealthReport(String healthReport) { + this.healthReport = healthReport; + } + + /** + * Returns time stamp when node health check was last run. + * + * @return timestamp when node health script was last run + */ + @Override + public long getLastHealthReportTime() { + return lastReportedTime; + } + + /** + * Sets the last run time of the node health check. + * + * @param lastReportedTime + */ + synchronized void setLastReportedTime(long lastReportedTime) { + this.lastReportedTime = lastReportedTime; + } + + synchronized void setHealthStatus(boolean isHealthy, String output) { + LOG.info("health status being set as " + output); + this.setHealthy(isHealthy); + this.setHealthReport(output); + } + + synchronized void setHealthStatus(boolean isHealthy, String output, + long time) { + LOG.info("health status being set as " + output); + this.setHealthStatus(isHealthy, output); + this.setLastReportedTime(time); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java index 2e80259d210..81d5fd23823 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager; import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,6 +39,7 @@ import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java index 13b3ee91bdc..172c6f45da4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeManagerResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UnRegisterNodeManagerResponsePBImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.junit.Assert; import org.junit.Before; @@ -117,8 +118,8 @@ public UnRegisterNodeManagerResponse unRegisterNodeManager( protected Context context; public BaseNodeStatusUpdaterForTest(Context context, Dispatcher dispatcher, - NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, - ResourceTracker resourceTracker) { + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, + ResourceTracker resourceTracker) { super(context, dispatcher, healthChecker, metrics); this.context = context; this.resourceTracker = resourceTracker; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index 54e090a29e2..b1fc2f1aa26 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -102,8 +103,8 @@ public int getHttpPort() { DeletionService del = new DeletionService(exec); Dispatcher dispatcher = new AsyncDispatcher(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + NodeHealthCheckerService healthChecker = + new NodeHealthCheckerService(dirsHandler); healthChecker.init(conf); NodeManagerMetrics metrics = NodeManagerMetrics.create(); NodeStatusUpdater nodeStatusUpdater = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java index fbd3646940d..260c3c4144c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java @@ -64,6 +64,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.FileDeletionMatcher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.util.Records; import org.junit.After; import org.junit.Assert; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index 25cca876ac6..9eae82a9322 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -86,6 +86,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index 25dbc1dd2ea..9a0213d87cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 1b21b936543..948540c4485 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -107,6 +107,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; @@ -318,7 +319,7 @@ public UnRegisterNodeManagerResponse unRegisterNodeManager( private class MyNodeStatusUpdater extends BaseNodeStatusUpdaterForTest { public MyNodeStatusUpdater(Context context, Dispatcher dispatcher, - NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { this(context, dispatcher, healthChecker, metrics, false); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForAttributes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForAttributes.java index 325d60c59be..072f4432c62 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForAttributes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForAttributes.java @@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeAttributesProvider; import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; import org.junit.After; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForLabels.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForLabels.java index a86ca3e8211..e3dce3b5b51 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForLabels.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdaterForLabels.java @@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider; import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; import org.junit.After; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 15c1cac9cb8..5236620afd6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.doNothing; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthScriptRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,7 +76,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; @@ -218,8 +219,7 @@ public void setup() throws IOException { delSrvc.init(conf); dirsHandler = new LocalDirsHandlerService(); - nodeHealthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); nodeHealthChecker.init(conf); containerManager = createContainerManager(delSrvc); ((NMContext)context).setContainerManager(containerManager); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index e920105abf9..d23b8423617 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -85,8 +85,7 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeManager; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -107,6 +106,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthScriptRunner; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.TestNodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; @@ -157,8 +157,7 @@ public void setup() throws IOException { delSrvc.init(conf); exec = createContainerExecutor(); dirsHandler = new LocalDirsHandlerService(); - nodeHealthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); nodeHealthChecker.init(conf); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java index 28f917fd842..784a73c44d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java @@ -30,7 +30,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeManagerTestBase; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestDockerHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestDockerHealthCheckerService.java new file mode 100644 index 00000000000..5954eec9f8e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestDockerHealthCheckerService.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +public class TestDockerHealthCheckerService { +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthScriptRunner.java similarity index 98% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthScriptRunner.java index 2748c0b581a..b93d30a62a9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthScriptRunner.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.util; +package org.apache.hadoop.yarn.server.nodemanager.health; import java.io.File; import java.io.FileOutputStream; @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Shell; import org.junit.After; import org.junit.Assert; import org.junit.Before; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthService.java similarity index 92% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthService.java index 8083a563773..011a4e17a12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthService.java @@ -16,12 +16,14 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.server.nodemanager; +package org.apache.hadoop.yarn.server.nodemanager.health; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; + +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,7 +33,6 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; @@ -42,6 +43,7 @@ import org.junit.Before; import org.junit.Test; +import static org.junit.Assert.fail; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.spy; @@ -115,10 +117,14 @@ public void testNodeHealthService() throws Exception { writeNodeHealthScriptFile("", true); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthScriptRunner nodeHealthScriptRunner = - spy(NodeManager.getNodeHealthScriptRunner(conf)); - NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService( - nodeHealthScriptRunner, dirsHandler); + NodeHealthScriptRunner nodeHealthScriptRunner = NodeHealthScriptRunner.newInstance(conf); + if (nodeHealthScriptRunner == null) { + fail("Should have created NodeHealthScriptRunner instance"); + } + nodeHealthScriptRunner = spy(nodeHealthScriptRunner); + NodeHealthCheckerService nodeHealthChecker = + new NodeHealthCheckerService(dirsHandler); + nodeHealthChecker.addHealthReporter(nodeHealthScriptRunner); nodeHealthChecker.init(conf); doReturn(true).when(nodeHealthScriptRunner).isHealthy(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java index ece1af4a260..a6f7cec5d8a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java @@ -43,7 +43,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -57,7 +56,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -80,9 +79,8 @@ public class TestContainerLogsPage { private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) { - NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(scriptRunner, dirsHandler); + return new NodeHealthCheckerService(dirsHandler); } @Test(timeout=30000) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java index 0d618fde10f..914bda7018b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java @@ -20,11 +20,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -105,8 +104,7 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, TESTROOTDIR.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService( - conf); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); @@ -120,12 +118,9 @@ public boolean isPmemCheckEnabled() { } } - private NodeHealthCheckerService createNodeHealthCheckerService( - Configuration conf) { - NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner( - conf); + private NodeHealthCheckerService createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(scriptRunner, dirsHandler); + return new NodeHealthCheckerService(dirsHandler); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index 0a71a9179bb..232cc127044 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -28,7 +28,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -42,7 +41,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -54,7 +53,6 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -79,10 +77,9 @@ public void tearDown() { FileUtil.fullyDelete(testLogDir); } - private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) { - NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf); + private NodeHealthCheckerService createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(scriptRunner, dirsHandler); + return new NodeHealthCheckerService(dirsHandler); } private int startNMWebAppServer(String webAddr) { @@ -113,7 +110,7 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); @@ -176,7 +173,7 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf); + NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index ad17ae81322..f591bad00d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -47,7 +47,7 @@ import org.apache.hadoop.yarn.logaggregation.TestContainerLogsUtils; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; @@ -141,8 +141,8 @@ protected void configureServlets() { conf.set(YarnConfiguration.YARN_LOG_SERVER_WEBSERVICE_URL, LOGSERVICEWSADDR); dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + NodeHealthCheckerService healthChecker = + new NodeHealthCheckerService(dirsHandler); healthChecker.init(conf); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java index 3533d16849d..041036750fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java @@ -47,13 +47,13 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.AppsInfo; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -105,7 +105,7 @@ protected void configureServlets() { conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java index 4ee63db8177..cd476c25338 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java @@ -44,11 +44,11 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServices; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceRecord; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; @@ -125,7 +125,7 @@ public boolean isPmemCheckEnabled() { conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); ApplicationACLsManager aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index a99ce280381..aacdf4c7138 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -28,7 +28,6 @@ import java.io.File; import java.io.IOException; import java.io.StringReader; -import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -48,16 +47,15 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; import org.apache.hadoop.yarn.webapp.GuiceServletConfig; import org.apache.hadoop.yarn.webapp.JerseyTestBase; @@ -132,7 +130,7 @@ public boolean isPmemCheckEnabled() { conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = new NodeHealthCheckerService( - NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java index ec7d62c803c..d4180e48251 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java @@ -26,13 +26,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.JettyUtils; -import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.junit.After; import org.junit.Before; @@ -54,12 +53,9 @@ private WebServer server; private int port; - private NodeHealthCheckerService createNodeHealthCheckerService( - Configuration conf) { - NodeHealthScriptRunner scriptRunner = NodeManager - .getNodeHealthScriptRunner(conf); + private NodeHealthCheckerService createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(scriptRunner, dirsHandler); + return new NodeHealthCheckerService(dirsHandler); } private int startNMWebAppServer(String webAddr) { @@ -90,7 +86,7 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, TESTROOTDIR.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, TESTLOGDIR.getAbsolutePath()); - healthChecker = createNodeHealthCheckerService(conf); + healthChecker = createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java index 68d97ee32ac..fdcefcbc350 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java @@ -70,7 +70,7 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md index e4ed57f5cb4..08525b877c6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md @@ -42,6 +42,17 @@ The following configuration parameters can be used to modify the disk checks: | `yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage` | Float between 0-100 | The maximum percentage of disk space that may be utilized before a disk is marked as unhealthy by the disk checker service. This check is run for every disk used by the NodeManager. The default value is 90 i.e. 90% of the disk can be used. | | `yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb` | Integer | The minimum amount of free space that must be available on the disk for the disk checker service to mark the disk as healthy. This check is run for every disk used by the NodeManager. The default value is 0 i.e. the entire disk can be used. | + +### Docker health checker + +If the Docker on Yarn feature is enabled, you can enabled the Docker health checker service. The Docker health checker checks the availability of the Docker daemon in the host, marking the node as unhealthy if the daemon is not responding. + +| Configuration Name | Allowed Values | Description | +|:---- |:---- |:---- | +| `yarn.nodemanager.docker-health-checker.enable` | true, false | Enable or disable the Docker health checker service. Default is false | +| `yarn.nodemanager.docker-health-checker.startup` | true, false | The NodeManager will fail to come up if the Docker daemon is not responding during startup. Default to false | +| `yarn.nodemanager.docker-health-checker.interval-ms` | Positive integer | The interval, in milliseconds, at which the Docker checker should run; the default value is 2 minutes | + ###External Health Script Users may specify their own health checker script that will be invoked by the health checker service. Users may specify a timeout as well as options to be passed to the script. If the script times out, results in an exception being thrown or outputs a line which begins with the string ERROR, the node is marked as unhealthy. Please note that: -- 2.21.0