diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 4cf8aef18ff..03d8433e194 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; @@ -70,10 +71,12 @@ public static NodeId newNodeID(String host, int port) { private List toCleanUpContainers; private List toCleanUpApplications; private List runningApplications; + private NodeHealthDetails nodeHealthDetails; public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, Resource perNode, String rackName, String healthReport, - int cmdPort, String hostName, NodeState state) { + int cmdPort, String hostName, NodeState state, + NodeHealthDetails nodeHealthDetails) { this.nodeId = nodeId; this.nodeAddr = nodeAddr; this.httpAddress = httpAddress; @@ -86,6 +89,7 @@ public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, toCleanUpApplications = new ArrayList(); toCleanUpContainers = new ArrayList(); runningApplications = new ArrayList(); + this.nodeHealthDetails = nodeHealthDetails; } public NodeId getNodeID() { @@ -120,6 +124,11 @@ public long getLastHealthReportTime() { return 0; } + @Override + public NodeHealthDetails getNodeHealthDetails() { + return this.nodeHealthDetails; + } + public Resource getTotalCapability() { return perNode; } @@ -256,7 +265,7 @@ public static RMNode newNodeInfo(String rackName, String hostName, return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress, resource, rackName, "Me good", - port, hostName, null); + port, hostName, null, null); } public static RMNode newNodeInfo(String rackName, String hostName, diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index 750b708f890..acc10848159 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; @@ -94,6 +95,11 @@ public long getLastHealthReportTime() { return node.getLastHealthReportTime(); } + @Override + public NodeHealthDetails getNodeHealthDetails() { + return node.getNodeHealthDetails(); + } + @Override public Resource getTotalCapability() { return node.getTotalCapability(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 2d5a59fb2a0..df7efb423ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1931,6 +1931,10 @@ public static boolean isAclEnabled(Configuration conf) { public static final int DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS = 10000; + /** The Service to check the health of the node. */ + public static final String NM_HEALTH_CHECKER_SERVICE = + NM_PREFIX + "health-checker-service.class"; + /** Prefix for all node manager disk health checker configs. */ private static final String NM_DISK_HEALTH_CHECK_PREFIX = "yarn.nodemanager.disk-health-checker."; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index d7ca2a563ea..40988c4da43 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -823,6 +823,11 @@ message StringLocalResourceMapProto { optional LocalResourceProto value = 2; } +message StringIntMapProto { + optional string key = 1; + optional int32 value = 2; +} + message StringStringMapProto { optional string key = 1; optional string value = 2; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java index 455ca24405f..76ec4295232 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java @@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.QueueStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.ReservationRequestInterpreterProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; +import org.apache.hadoop.yarn.proto.YarnProtos.StringIntMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.StringStringMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.TimedPlacementConstraintProto; import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationAttemptStateProto; @@ -192,7 +193,7 @@ public static NodeStateProto convertToProtoFormat(NodeState e) { public static NodeState convertFromProtoFormat(NodeStateProto e) { return NodeState.valueOf(e.name().replace(NODE_STATE_PREFIX, "")); } - + /* * NodeId */ @@ -281,7 +282,7 @@ public static LocalResourceVisibilityProto convertToProtoFormat(LocalResourceVis public static LocalResourceVisibility convertFromProtoFormat(LocalResourceVisibilityProto e) { return LocalResourceVisibility.valueOf(e.name()); } - + /* * AMCommand */ @@ -587,6 +588,33 @@ public static ResourceTypes convertFromProtoFormat(ResourceTypesProto e) { return ret; } + public static List + convertStringIntMapToProtoList(Map stringIntMap) { + List pList = new ArrayList<>(); + if (stringIntMap != null && !stringIntMap.isEmpty()) { + StringIntMapProto.Builder pBuilder = StringIntMapProto.newBuilder(); + for (Map.Entry entry : stringIntMap.entrySet()) { + pBuilder.setKey(entry.getKey()); + pBuilder.setValue(entry.getValue()); + pList.add(pBuilder.build()); + } + } + return pList; + } + + public static Map convertProtoListToStringIntMap( + List pList) { + Map ret = new HashMap<>(); + if (pList != null) { + for (StringIntMapProto p : pList) { + if (p.hasKey()) { + ret.put(p.getKey(), p.getValue()); + } + } + } + return ret; + } + public static Map convertStringStringMapProtoListToMap( List pList) { Map ret = new HashMap<>(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 67da860cf56..b779e0f38f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -2393,6 +2393,16 @@ + + + The Service class to check the health of the node. + + yarn.nodemanager.health-checker-service.class + + org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService + + + Flag to enable NodeManager disk health checker diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthDetails.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthDetails.java new file mode 100644 index 00000000000..0f0c8a1e0ae --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthDetails.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.util.Records; + +import java.util.Map; + +/** + * {@code NodeHealthDetails} is a summary of the overall health score + * of the node. + *

+ * It includes information such as: + *

    + *
  • + * In depth analysis of the health of the node. Even if the node is healthy + * it gives out a score based on node resources. + *
  • + *
  • + * Holds a map of information about the node resources. + * Example: SSD, HDD, SKU etc. + *
  • + *
+ * + */ +public abstract class NodeHealthDetails { + + @Private + public static NodeHealthDetails newInstance(Integer overallScore, + Map nodeResourceScore) { + NodeHealthDetails nodeHealthDetails = Records.newRecord( + NodeHealthDetails.class); + nodeHealthDetails.setOverallScore(overallScore); + nodeHealthDetails.setNodeResourceScores(nodeResourceScore); + return nodeHealthDetails; + } + + /** + * Set the overall score of the node. This score is derived from node + * resources score. + * @param overallScore + */ + @Private + public abstract void setOverallScore(Integer overallScore); + + /** + * Holds a Map of the resources and its scores. + * @param nodeResourceScores + */ + @Private + public abstract void setNodeResourceScores( + Map nodeResourceScores); + + /** + * @return the score of the node. + */ + @Private + public abstract Integer getOverallScore(); + + /** + * @return Scores of each resources in the node. + */ + @Private + public abstract Map getNodeResourceScores(); + + @Override + public String toString() { + StringBuffer healthDetailsString = new StringBuffer("Overall Score = " + + this.getOverallScore() + "%n "); + this.getNodeResourceScores().forEach((key, value) -> healthDetailsString + .append(key).append(" = ").append(value).append("%n")); + return healthDetailsString.toString(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java index b21b88071f5..53dad1531f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java @@ -90,4 +90,22 @@ public static NodeHealthStatus newInstance(boolean isNodeHealthy, @Private @Unstable public abstract void setLastHealthReportTime(long lastHealthReport); + + /** + * Set the overall score of the Node. + * @param nodeHealthDetails contains the resources score and the total + * overall score of the node + */ + @Private + @Unstable + public abstract void setNodeHealthDetails(NodeHealthDetails + nodeHealthDetails); + + /** + * @return {@link NodeHealthDetails} Gives a detailed overall score + * of the node health. + */ + @Public + @Stable + public abstract NodeHealthDetails getNodeHealthDetails(); } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthDetailsPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthDetailsPBImpl.java new file mode 100644 index 00000000000..c2944d61d12 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthDetailsPBImpl.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.records.impl.pb; + +import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthDetailsProto; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthDetailsProtoOrBuilder; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; + +import java.util.Collections; +import java.util.Map; + +public class NodeHealthDetailsPBImpl extends NodeHealthDetails { + private NodeHealthDetailsProto proto = NodeHealthDetailsProto + .getDefaultInstance(); + private NodeHealthDetailsProto.Builder builder = null; + private boolean viaProto = false; + + public NodeHealthDetailsPBImpl() { + builder = NodeHealthDetailsProto.newBuilder(); + } + + public NodeHealthDetailsPBImpl(NodeHealthDetailsProto proto) { + this.proto = proto; + viaProto = true; + } + public NodeHealthDetailsProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = NodeHealthDetailsProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public void setOverallScore(Integer overallScore) { + maybeInitBuilder(); + this.builder.setOverallScore(overallScore); + } + + @Override + public Integer getOverallScore() { + NodeHealthDetailsProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + if (!p.hasOverallScore()) { + return null; + } + return (p.getOverallScore()); + } + + @Override + public void setNodeResourceScores(Map nodeResourceScores) { + maybeInitBuilder(); + this.builder.addAllNodeResourceScores(ProtoUtils + .convertStringIntMapToProtoList(nodeResourceScores)); + } + + @Override + public Map getNodeResourceScores() { + NodeHealthDetailsProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + return p.getNodeResourceScoresCount() > 0 ? + ProtoUtils.convertProtoListToStringIntMap( + p.getNodeResourceScoresList()) : Collections.emptyMap(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java index 20697834687..83f9f51d016 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java @@ -18,8 +18,10 @@ package org.apache.hadoop.yarn.server.api.records.impl.pb; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthDetailsProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProtoOrBuilder; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.thirdparty.protobuf.TextFormat; @@ -30,6 +32,7 @@ private boolean viaProto = false; private NodeHealthStatusProto proto = NodeHealthStatusProto .getDefaultInstance(); + private NodeHealthDetails nodeHealthDetails; public NodeHealthStatusPBImpl() { this.builder = NodeHealthStatusProto.newBuilder(); @@ -70,6 +73,7 @@ public String toString() { private void mergeLocalToProto() { if (this.viaProto) maybeInitBuilder(); + mergeLocalToBuilder(); this.proto = this.builder.build(); this.viaProto = true; @@ -82,6 +86,13 @@ private void maybeInitBuilder() { this.viaProto = false; } + private void mergeLocalToBuilder() { + if(nodeHealthDetails != null) { + builder.setNodeHealthDetails(convertToProtoFormat( + nodeHealthDetails)); + } + } + @Override public boolean getIsNodeHealthy() { NodeHealthStatusProtoOrBuilder p = @@ -128,4 +139,36 @@ public void setLastHealthReportTime(long lastHealthReport) { this.builder.setLastHealthReportTime((lastHealthReport)); } + @Override + public void setNodeHealthDetails( + NodeHealthDetails nodeHealthDetails) { + maybeInitBuilder(); + if(nodeHealthDetails == null) { + this.builder.clearNodeHealthDetails(); + } + this.nodeHealthDetails = nodeHealthDetails; + } + + @Override + public NodeHealthDetails getNodeHealthDetails() { + NodeHealthStatusProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + if(this.nodeHealthDetails != null) { + return this.nodeHealthDetails; + } + if(!p.hasNodeHealthDetails()) { + return null; + } + this.nodeHealthDetails = convertFromProtoFormat(p.getNodeHealthDetails()); + return this.nodeHealthDetails; + } + + private NodeHealthDetailsPBImpl convertFromProtoFormat( + NodeHealthDetailsProto p) { + return new NodeHealthDetailsPBImpl(p); + } + + private NodeHealthDetailsProto convertToProtoFormat(NodeHealthDetails nhd) { + return ((NodeHealthDetailsPBImpl) nhd).getProto(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index ea8df4fb800..65d5f8b6d2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -58,10 +58,16 @@ message MasterKeyProto { optional bytes bytes = 2; } +message NodeHealthDetailsProto { + optional int32 overall_score = 1; + repeated StringIntMapProto node_resource_scores = 2; +} + message NodeHealthStatusProto { optional bool is_node_healthy = 1; optional string health_report = 2; optional int64 last_health_report_time = 3; + optional NodeHealthDetailsProto node_health_details = 4; } message VersionProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index f90423cf6b6..11ff996420a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -29,6 +29,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.GenericOptionsParser; @@ -62,6 +63,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.logaggregation.tracker.NMLogAggregationStatusTracker; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider; @@ -85,6 +87,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.reflect.Constructor; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -245,6 +248,27 @@ protected NodeResourceMonitor createNodeResourceMonitor() { return new NodeResourceMonitorImpl(context); } + protected NodeHealthCheckerService createNodeHealthCheckerService( + Configuration conf) { + + Class clazz = conf.getClassByNameOrNull(conf.get(YarnConfiguration + .NM_HEALTH_CHECKER_SERVICE, + NodeHealthCheckerServiceImpl.class.getName())); + try { + if (clazz == null || !AbstractService.class.isAssignableFrom(clazz)) { + throw new RuntimeException(clazz + " does not implement " + + AbstractService.class); + } + Constructor cons = clazz.getConstructor(LocalDirsHandlerService.class); + return (NodeHealthCheckerService) cons.newInstance(dirsHandler); + } catch (Exception e) { + throw new YarnRuntimeException( + "Could not instantiate NodeHealthChecker Class: " + YarnConfiguration + .NM_HEALTH_CHECKER_SERVICE, e); + } + } + + protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, @@ -410,7 +434,7 @@ protected void serviceInit(Configuration conf) throws Exception { // NodeManager level dispatcher this.dispatcher = createNMDispatcher(); - this.nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); + this.nodeHealthChecker = createNodeHealthCheckerService(conf); addService(nodeHealthChecker); ((NMContext)context).setContainerExecutor(exec); @@ -934,6 +958,14 @@ public NodeHealthCheckerService getNodeHealthChecker() { return nodeHealthChecker; } + /** + * @return the node disk handler + */ + @VisibleForTesting + public LocalDirsHandlerService getDiskHandler() { + return dirsHandler; + } + private void initAndStartNodeManager(Configuration conf, boolean hasToReboot) { try { // Failed to start if we're a Unix based system but we don't have bash. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 37da31a322b..4ec05702b22 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -517,9 +517,12 @@ protected NodeStatus getNodeStatus(int responseId) throws IOException { nodeHealthStatus.setIsNodeHealthy(healthChecker.isHealthy()); nodeHealthStatus.setLastHealthReportTime(healthChecker .getLastHealthReportTime()); - LOG.debug("Node's health-status : {}, {}", + nodeHealthStatus.setNodeHealthDetails(healthChecker + .updateNodeHealthDetails()); + LOG.debug("Node's health-status : {}, {}, {}", nodeHealthStatus.getIsNodeHealthy(), - nodeHealthStatus.getHealthReport()); + nodeHealthStatus.getHealthReport(), + nodeHealthStatus.getNodeHealthDetails()); List containersStatuses = getContainerStatuses(); ResourceUtilization containersUtilization = getContainersUtilization(); ResourceUtilization nodeUtilization = getNodeUtilization(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java index a89fb86362b..b6951c2b061 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java @@ -1,164 +1,63 @@ /** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.yarn.server.nodemanager.health; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Strings; -import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.service.CompositeService; -import org.apache.hadoop.service.Service; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; /** - * This class provides functionality of checking the health of a node and - * reporting back to the service for which the health checker has been asked to - * report. - * - * It is a {@link CompositeService}: every {@link Service} must be registered - * first in serviceInit, and should also implement the {@link HealthReporter} - * interface - otherwise an exception is thrown. - * - * Calling functions of HealthReporter merge its dependent - * services' reports. - * - * @see HealthReporter - * @see LocalDirsHandlerService - * @see TimedHealthReporterService + * Base class for all NodeHealthCheckerServices. */ -public class NodeHealthCheckerService extends CompositeService +public abstract class NodeHealthCheckerService extends CompositeService implements HealthReporter { - public static final Logger LOG = - LoggerFactory.getLogger(NodeHealthCheckerService.class); - private static final int MAX_SCRIPTS = 4; - - private List reporters; - private LocalDirsHandlerService dirsHandler; - private ExceptionReporter exceptionReporter; - - public static final String SEPARATOR = ";"; - - public NodeHealthCheckerService( - LocalDirsHandlerService dirHandlerService) { - super(NodeHealthCheckerService.class.getName()); - - this.reporters = new ArrayList<>(); - this.dirsHandler = dirHandlerService; - this.exceptionReporter = new ExceptionReporter(); - } - - @Override - protected void serviceInit(Configuration conf) throws Exception { - reporters.add(exceptionReporter); - addHealthReporter(dirsHandler); - String[] configuredScripts = conf.getTrimmedStrings( - YarnConfiguration.NM_HEALTH_CHECK_SCRIPTS, - YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPTS); - if (configuredScripts.length > MAX_SCRIPTS) { - throw new IllegalArgumentException("Due to performance reasons " + - "running more than " + MAX_SCRIPTS + "scripts is not allowed."); - } - for (String configuredScript : configuredScripts) { - addHealthReporter(NodeHealthScriptRunner.newInstance( - configuredScript, conf)); - } - super.serviceInit(conf); - } - - /** - * Adds a {@link Service} implementing the {@link HealthReporter} interface, - * if that service has not been added to this {@link CompositeService} yet. - * - * @param service to add - * @throws Exception if not a {@link HealthReporter} - * implementation is provided to this function - */ - @VisibleForTesting - void addHealthReporter(Service service) throws Exception { - if (service != null) { - if (getServices().stream() - .noneMatch(x -> x.getName().equals(service.getName()))) { - if (!(service instanceof HealthReporter)) { - throw new Exception("Attempted to add service to " + - "NodeHealthCheckerService that does not implement " + - "HealthReporter."); - } - reporters.add((HealthReporter) service); - addService(service); - } else { - LOG.debug("Omitting duplicate service: {}.", service.getName()); - } - } + public NodeHealthCheckerService(String name) { + super(name); } /** - * Joining the health reports of the dependent services. - * - * @return the report string about the health of the node + * @return the health of the node. true if healthy. */ - @Override - public String getHealthReport() { - ArrayList reports = reporters.stream() - .map(reporter -> Strings.emptyToNull(reporter.getHealthReport())) - .collect(Collectors.toCollection(ArrayList::new)); - return Joiner.on(SEPARATOR).skipNulls().join(reports); - } + public abstract boolean isHealthy(); /** - * @return true if the node is healthy + * @return the exceptions that have occurred when running the scripts. + * Otherwise an empty String is returned. */ - @Override - public boolean isHealthy() { - return reporters.stream().allMatch(HealthReporter::isHealthy); - } + public abstract String getHealthReport(); /** * @return when the last time the node health status is reported */ - @Override - public long getLastHealthReportTime() { - Optional max = reporters.stream() - .map(HealthReporter::getLastHealthReportTime).max(Long::compareTo); - return max.orElse(0L); - } + public abstract long getLastHealthReportTime(); /** - * @return the disk handler + * Gives the detailed overview of the node and also holds the score of the + * node. This method is optional. Subclasses can implement it if needed. + * @return {@link NodeHealthDetails} */ - public LocalDirsHandlerService getDiskHandler() { - return dirsHandler; + @Private + public NodeHealthDetails updateNodeHealthDetails() { + return null; } - /** - * Propagating an exception to {@link ExceptionReporter}. - * @param exception the exception to propagate - */ - public void reportException(Exception exception) { - exceptionReporter.reportException(exception); - } + public abstract void reportException(Exception exception); + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerServiceImpl.java new file mode 100644 index 00000000000..b0c4453bebe --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerServiceImpl.java @@ -0,0 +1,163 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Strings; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * This class provides functionality of checking the health of a node and + * reporting back to the service for which the health checker has been asked to + * report. + * + * It is a {@link CompositeService}: every {@link Service} must be registered + * first in serviceInit, and should also implement the {@link HealthReporter} + * interface - otherwise an exception is thrown. + * + * Calling functions of HealthReporter merge its dependent + * services' reports. + * + * @see HealthReporter + * @see LocalDirsHandlerService + * @see TimedHealthReporterService + */ +public class NodeHealthCheckerServiceImpl extends NodeHealthCheckerService { + + public static final Logger LOG = + LoggerFactory.getLogger(NodeHealthCheckerServiceImpl.class); + private static final int MAX_SCRIPTS = 4; + + private List reporters; + private LocalDirsHandlerService dirsHandler; + private ExceptionReporter exceptionReporter; + + public static final String SEPARATOR = ";"; + + public NodeHealthCheckerServiceImpl( + LocalDirsHandlerService dirHandlerService) { + super(NodeHealthCheckerServiceImpl.class.getName()); + + this.reporters = new ArrayList<>(); + this.dirsHandler = dirHandlerService; + this.exceptionReporter = new ExceptionReporter(); + } + + @Override + protected void serviceInit(Configuration conf) throws Exception { + reporters.add(exceptionReporter); + addHealthReporter(dirsHandler); + String[] configuredScripts = conf.getTrimmedStrings( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPTS, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPTS); + if (configuredScripts.length > MAX_SCRIPTS) { + throw new IllegalArgumentException("Due to performance reasons " + + "running more than " + MAX_SCRIPTS + "scripts is not allowed."); + } + for (String configuredScript : configuredScripts) { + addHealthReporter(NodeHealthScriptRunner.newInstance( + configuredScript, conf)); + } + super.serviceInit(conf); + } + + /** + * Adds a {@link Service} implementing the {@link HealthReporter} interface, + * if that service has not been added to this {@link CompositeService} yet. + * + * @param service to add + * @throws Exception if not a {@link HealthReporter} + * implementation is provided to this function + */ + @VisibleForTesting + void addHealthReporter(Service service) throws Exception { + if (service != null) { + if (getServices().stream() + .noneMatch(x -> x.getName().equals(service.getName()))) { + if (!(service instanceof HealthReporter)) { + throw new Exception("Attempted to add service to " + + "NodeHealthCheckerService that does not implement " + + "HealthReporter."); + } + reporters.add((HealthReporter) service); + addService(service); + } else { + LOG.debug("Omitting duplicate service: {}.", service.getName()); + } + } + } + + /** + * Joining the health reports of the dependent services. + * + * @return the report string about the health of the node + */ + @Override + public String getHealthReport() { + ArrayList reports = reporters.stream() + .map(reporter -> Strings.emptyToNull(reporter.getHealthReport())) + .collect(Collectors.toCollection(ArrayList::new)); + return Joiner.on(SEPARATOR).skipNulls().join(reports); + } + + /** + * @return true if the node is healthy + */ + @Override + public boolean isHealthy() { + return reporters.stream().allMatch(HealthReporter::isHealthy); + } + + /** + * @return when the last time the node health status is reported + */ + @Override + public long getLastHealthReportTime() { + Optional max = reporters.stream() + .map(HealthReporter::getLastHealthReportTime).max(Long::compareTo); + return max.orElse(0L); + } + + /** + * @return the disk handler + */ + public LocalDirsHandlerService getDiskHandler() { + return dirsHandler; + } + + /** + * Propagating an exception to {@link ExceptionReporter}. + * @param exception the exception to propagate + */ + public void reportException(Exception exception) { + exceptionReporter.reportException(exception); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index 3f4879b23ea..b0ee4b03c91 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -106,7 +107,7 @@ public int getHttpPort() { Dispatcher dispatcher = new AsyncDispatcher(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); NodeManagerMetrics metrics = NodeManagerMetrics.create(); NodeStatusUpdater nodeStatusUpdater = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 9ee3ce6bc8b..4b5191458e2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -27,6 +27,7 @@ import static org.mockito.Mockito.doNothing; import org.apache.hadoop.yarn.server.nodemanager.NodeResourceMonitorImpl; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -154,7 +155,6 @@ public NMLogAggregationStatusTracker getNMLogAggregationStatusTracker() { protected ContainerExecutor exec; protected DeletionService delSrvc; protected String user = "nobody"; - protected NodeHealthCheckerService nodeHealthChecker; protected LocalDirsHandlerService dirsHandler; protected final long DUMMY_RM_IDENTIFIER = 1234; private NodeResourceMonitorImpl nodeResourceMonitor = mock( @@ -172,6 +172,12 @@ public void setNodeStatusUpdater( this.nodeStatusUpdater = nodeStatusUpdater; } + public void setNodeHealthCheckerService(NodeHealthCheckerService nhcs, + Configuration conf) { + this.nodeHealthCheckerService = nhcs; + this.nodeHealthCheckerService.init(conf); + } + protected ContainerExecutor createContainerExecutor() { DefaultContainerExecutor exec = new DefaultContainerExecutor(); exec.setConf(conf); @@ -208,7 +214,7 @@ public void setup() throws IOException { dirsHandler = new LocalDirsHandlerService(); dirsHandler.init(conf); - nodeHealthCheckerService = new NodeHealthCheckerService(dirsHandler); + nodeHealthCheckerService = new NodeHealthCheckerServiceImpl(dirsHandler); nodeStatusUpdater = new NodeStatusUpdaterImpl( context, new AsyncDispatcher(), nodeHealthCheckerService, metrics) { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 826cc02219b..8d70b17ce29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -85,7 +85,6 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -106,6 +105,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.TestNodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; @@ -156,9 +156,8 @@ public void setup() throws IOException { delSrvc.init(conf); exec = createContainerExecutor(); dirsHandler = new LocalDirsHandlerService(); - nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); - nodeHealthChecker.init(conf); - + setNodeHealthCheckerService(new NodeHealthCheckerServiceImpl(dirsHandler), + conf); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java index 725cc7b8821..0579ff0ac01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java @@ -139,8 +139,8 @@ public void testNodeHealthService() throws Exception { fail("Should have created NodeHealthScriptRunner instance"); } nodeHealthScriptRunner = spy(nodeHealthScriptRunner); - NodeHealthCheckerService nodeHealthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl nodeHealthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); nodeHealthChecker.addHealthReporter(nodeHealthScriptRunner); nodeHealthChecker.init(conf); @@ -190,7 +190,7 @@ public void testNodeHealthService() throws Exception { healthStatus.getIsNodeHealthy()); Assert.assertTrue("Node script time out message not propagated", healthStatus.getHealthReport().equals( - Joiner.on(NodeHealthCheckerService.SEPARATOR).skipNulls().join( + Joiner.on(NodeHealthCheckerServiceImpl.SEPARATOR).skipNulls().join( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG, Strings.emptyToNull( nodeHealthChecker.getDiskHandler() @@ -229,8 +229,8 @@ public long getLastHealthReportTime() { Configuration conf = new Configuration(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService nodeHealthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl nodeHealthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); nodeHealthChecker.addHealthReporter(customHealthReporter); nodeHealthChecker.init(conf); @@ -245,8 +245,8 @@ public long getLastHealthReportTime() { public void testExceptionReported() { Configuration conf = new Configuration(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService nodeHealthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl nodeHealthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); nodeHealthChecker.init(conf); assertThat(nodeHealthChecker.isHealthy()).isTrue(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java index 71716da37e7..5892c133706 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java @@ -63,7 +63,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.webapp.ContainerLogsPage.ContainersLogsBlock; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -78,9 +78,9 @@ public class TestContainerLogsPage { - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } @Test(timeout=30000) @@ -90,7 +90,8 @@ public void testContainerLogDirs() throws IOException, YarnException { String logdirwithFile = absLogDir.toURI().toString(); Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_LOG_DIRS, logdirwithFile); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerServiceImpl healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, @@ -213,7 +214,8 @@ public void testContainerLogPageAccess() throws IOException { "kerberos"); UserGroupInformation.setConfiguration(conf); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerServiceImpl healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); // Add an application and the corresponding containers diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java index 1e636650463..82b104b9206 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java @@ -26,7 +26,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.eclipse.jetty.websocket.api.Session; import org.eclipse.jetty.websocket.api.UpgradeRequest; @@ -104,7 +104,8 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, TESTROOTDIR.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerServiceImpl healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); @@ -118,9 +119,9 @@ public boolean isPmemCheckEnabled() { } } - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index cbfaa177921..58a1d05510f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -47,7 +47,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; @@ -78,9 +78,9 @@ public void tearDown() { FileUtil.fullyDelete(testLogDir); } - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } private int startNMWebAppServer(String webAddr) { @@ -111,7 +111,8 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerServiceImpl healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); @@ -174,7 +175,8 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerServiceImpl healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 70fe3731071..72e241b9a59 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -56,7 +56,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; @@ -142,8 +142,8 @@ protected void configureServlets() { conf.set(YarnConfiguration.YARN_LOG_SERVER_WEBSERVICE_URL, LOGSERVICEWSADDR); dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl healthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java index ab06c0f9f33..73c21687a7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java @@ -53,7 +53,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.AppsInfo; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -104,8 +104,8 @@ protected void configureServlets() { conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl healthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java index 7ec8fcd47d3..edeeb92e5d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java @@ -48,7 +48,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServices; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceRecord; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; @@ -124,8 +124,8 @@ public boolean isPmemCheckEnabled() { conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl healthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); ApplicationACLsManager aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index 175a0b02470..6cf50137081 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -52,7 +52,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -129,8 +129,8 @@ public boolean isPmemCheckEnabled() { conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + NodeHealthCheckerServiceImpl healthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java index d4180e48251..10034d53b14 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java @@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.junit.After; import org.junit.Before; @@ -49,13 +49,13 @@ TestNMWebServer.class.getSimpleName()); private static final File TESTLOGDIR = new File("target", TestNMWebServer.class.getSimpleName() + "LogDir"); - private NodeHealthCheckerService healthChecker; + private NodeHealthCheckerServiceImpl healthChecker; private WebServer server; private int port; - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } private int startNMWebAppServer(String webAddr) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java index 8f1cda94a7c..3b5d3828b21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java @@ -96,6 +96,9 @@ public String getLiveNodeManagers() { ni.getLastHealthReportTime()); info.put("HealthReport", ni.getHealthReport()); + info.put("HealthDetails", + ni.getNodeHealthDetails() != null ? + ni.getNodeHealthDetails().toString() : ""); info.put("NodeManagerVersion", ni.getNodeManagerVersion()); if(report != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java index d3b515e8241..62ee67a802b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.api.records.NodeAttribute; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -85,13 +86,19 @@ * @return the latest health report received from this node. */ public String getHealthReport(); - + /** * the time of the latest health report received from this node. * @return the time of the latest health report received from this node. */ public long getLastHealthReportTime(); + /** + * the overall health score received from this node. + * @return the overall health score of the node. + */ + NodeHealthDetails getNodeHealthDetails(); + /** * the node manager version of the node received as part of the * registration with the resource manager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 68f44dc6d54..457c5a436cb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -36,6 +36,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; import org.apache.commons.collections.keyvalue.DefaultMapEntry; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -132,6 +133,7 @@ private String healthReport; private long lastHealthReportTime; + private NodeHealthDetails nodeHealthDetails; private String nodeManagerVersion; private Integer decommissioningTimeout; @@ -512,7 +514,27 @@ public void setLastHealthReportTime(long lastHealthReportTime) { this.writeLock.unlock(); } } - + + public void setNodeHealthDetails(NodeHealthDetails nhd) { + this.writeLock.lock(); + + try { + this.nodeHealthDetails = nhd; + } finally { + this.writeLock.unlock(); + } + } + + public NodeHealthDetails getNodeHealthDetails() { + this.writeLock.lock(); + + try { + return this.nodeHealthDetails; + } finally { + this.writeLock.unlock(); + } + } + @Override public long getLastHealthReportTime() { this.readLock.lock(); @@ -844,6 +866,7 @@ private static NodeHealthStatus updateRMNodeFromStatusEvents( rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime(remoteNodeHealthStatus .getLastHealthReportTime()); + rmNode.setNodeHealthDetails(remoteNodeHealthStatus.getNodeHealthDetails()); rmNode.setAggregatedContainersUtilization(statusEvent .getAggregatedContainersUtilization()); rmNode.setNodeUtilization(statusEvent.getNodeUtilization()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index 600edfc7f4d..69e16586f5e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.resourcetypes.ResourceTypesTestHelper; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; @@ -127,6 +128,7 @@ public static Resource newAvailResource(Resource total, Resource used) { private ResourceUtilization nodeUtilization; private Resource physicalResource; private RMContext rmContext; + private NodeHealthDetails nodeHealthDetails; MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, Resource perNode, String rackName, String healthReport, @@ -270,6 +272,10 @@ public long getLastHealthReportTime() { return lastHealthReportTime; } + @Override public NodeHealthDetails getNodeHealthDetails() { + return nodeHealthDetails; + } + @Override public Set getNodeLabels() { if (labels != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java index 23bb0399930..752605e8362 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java @@ -173,7 +173,7 @@ private void testDirsFailures(boolean localORLogDirs) throws IOException { NodeManager nm = yarnCluster.getNodeManager(0); LOG.info("Configured nm-" + dirType + "-dirs=" + nm.getConfig().get(dirsProperty)); - dirsHandler = nm.getNodeHealthChecker().getDiskHandler(); + dirsHandler = nm.getDiskHandler(); List list = localORLogDirs ? dirsHandler.getLocalDirs() : dirsHandler.getLogDirs(); String[] dirs = list.toArray(new String[list.size()]);