diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 32567db666e..067340c3e6a 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; @@ -70,10 +71,12 @@ public static NodeId newNodeID(String host, int port) { private List toCleanUpContainers; private List toCleanUpApplications; private List runningApplications; + private NodeHealthDetails nodeHealthDetails; public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, Resource perNode, String rackName, String healthReport, - int cmdPort, String hostName, NodeState state) { + int cmdPort, String hostName, NodeState state, + NodeHealthDetails nodeHealthDetails) { this.nodeId = nodeId; this.nodeAddr = nodeAddr; this.httpAddress = httpAddress; @@ -86,6 +89,7 @@ public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, toCleanUpApplications = new ArrayList(); toCleanUpContainers = new ArrayList(); runningApplications = new ArrayList(); + this.nodeHealthDetails = nodeHealthDetails; } public NodeId getNodeID() { @@ -120,6 +124,11 @@ public long getLastHealthReportTime() { return 0; } + @Override + public NodeHealthDetails getNodeHealthDetails() { + return this.nodeHealthDetails; + } + public Resource getTotalCapability() { return perNode; } @@ -263,7 +272,7 @@ public static RMNode newNodeInfo(String rackName, String hostName, return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress, resource, rackName, "Me good", - port, hostName, null); + port, hostName, null, null); } public static RMNode newNodeInfo(String rackName, String hostName, diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index b5ae4f5b3c0..91db28f3ad4 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; @@ -94,6 +95,11 @@ public long getLastHealthReportTime() { return node.getLastHealthReportTime(); } + @Override + public NodeHealthDetails getNodeHealthDetails() { + return node.getNodeHealthDetails(); + } + @Override public Resource getTotalCapability() { return node.getTotalCapability(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index cf0c5e97d8d..bba4bc4f679 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1977,6 +1977,10 @@ public static boolean isAclEnabled(Configuration conf) { public static final int DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS = 10000; + /** The Service to check the health of the node. */ + public static final String NM_HEALTH_CHECKER_SERVICE = + NM_PREFIX + "health-checker-service.class"; + /** Prefix for all node manager disk health checker configs. */ private static final String NM_DISK_HEALTH_CHECK_PREFIX = "yarn.nodemanager.disk-health-checker."; @@ -2092,6 +2096,14 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_HEALTH_CHECK_SCRIPT_INTERVAL_MS_TEMPLATE = NM_PREFIX + "health-checker.%s.interval-ms"; + /** The health checker score file. */ + public static final boolean DEFAULT_NM_HEALTH_CHECK_SCORE_ENABLED = + false; + public static final String NM_HEALTH_CHECK_SCORE_ENABLED = + NM_PREFIX + "health-checker.score-enabled"; + public static final String NM_HEALTH_CHECK_SCORE_FILE = + NM_PREFIX + "health-checker.score-file"; + /** The JVM options used on forking ContainerLocalizer process by container executor. */ public static final String NM_CONTAINER_LOCALIZER_JAVA_OPTS_KEY = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index d7ca2a563ea..40988c4da43 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -823,6 +823,11 @@ message StringLocalResourceMapProto { optional LocalResourceProto value = 2; } +message StringIntMapProto { + optional string key = 1; + optional int32 value = 2; +} + message StringStringMapProto { optional string key = 1; optional string value = 2; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java index cdeb417243e..1d2b9e08cf4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java @@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.QueueStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.ReservationRequestInterpreterProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; +import org.apache.hadoop.yarn.proto.YarnProtos.StringIntMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.StringStringMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.TimedPlacementConstraintProto; import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationAttemptStateProto; @@ -587,6 +588,34 @@ public static ResourceTypes convertFromProtoFormat(ResourceTypesProto e) { return ret; } + + public static List + convertStringIntMapToProtoList(Map stringIntMap) { + List pList = new ArrayList<>(); + if (stringIntMap != null && !stringIntMap.isEmpty()) { + StringIntMapProto.Builder pBuilder = StringIntMapProto.newBuilder(); + for (Map.Entry entry : stringIntMap.entrySet()) { + pBuilder.setKey(entry.getKey()); + pBuilder.setValue(entry.getValue()); + pList.add(pBuilder.build()); + } + } + return pList; + } + + public static Map convertProtoListToStringIntMap( + List pList) { + Map ret = new HashMap<>(); + if (pList != null) { + for (StringIntMapProto p : pList) { + if (p.hasKey()) { + ret.put(p.getKey(), p.getValue()); + } + } + } + return ret; + } + public static Map convertStringStringMapProtoListToMap( List pList) { Map ret = new HashMap<>(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 4349d56731f..054c390ff93 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -2477,6 +2477,14 @@ + + + The Service class to check the health of the node. + + yarn.nodemanager.health-checker-service.class + org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl + + Flag to enable NodeManager disk health checker diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthDetails.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthDetails.java new file mode 100644 index 00000000000..89c8fb095ba --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthDetails.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.util.Records; + +import java.util.Map; +import java.util.StringJoiner; + +/** + * {@code NodeHealthDetails} is a summary of the overall health score + * of the node. + *

+ * It includes information such as: + *

    + *
  • + * In depth analysis of the health of the node. Even if the node is healthy + * it gives out a score based on node resources. + *
  • + *
  • + * Holds a map of information about the node resources. + * Example: SSD, HDD etc. + *
  • + *
+ * + */ +public abstract class NodeHealthDetails { + + @Private + public static NodeHealthDetails newInstance(Integer overallScore, + Map nodeResourceScore) { + NodeHealthDetails nodeHealthDetails = Records.newRecord( + NodeHealthDetails.class); + nodeHealthDetails.setOverallScore(overallScore); + nodeHealthDetails.setNodeResourceScores(nodeResourceScore); + return nodeHealthDetails; + } + + @Private + public static NodeHealthDetails newInstance(Integer overallScore) { + NodeHealthDetails nodeHealthDetails = Records.newRecord( + NodeHealthDetails.class); + nodeHealthDetails.setOverallScore(overallScore); + return nodeHealthDetails; + } + + /** + * Set the overall score of the node. This score is derived from node + * resources score. + * @param overallScore + */ + @Private + public abstract void setOverallScore(Integer overallScore); + + /** + * Holds a Map of the resources and its scores. + * @param nodeResourceScores + */ + @Private + public abstract void setNodeResourceScores( + Map nodeResourceScores); + + /** + * @return the score of the node. + */ + @Private + public abstract Integer getOverallScore(); + + /** + * @return Scores of each resources in the node. + */ + @Private + public abstract Map getNodeResourceScores(); + + @Override + public String toString() { + StringJoiner healthDetailsString = new StringJoiner(",", "[", "]"); + healthDetailsString.add("Overall Score = " + this.getOverallScore()); + this.getNodeResourceScores().forEach((key, value) -> + healthDetailsString.add(key + " = " + value)); + return healthDetailsString.toString(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java index b21b88071f5..75095de227d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeHealthStatus.java @@ -90,4 +90,22 @@ public static NodeHealthStatus newInstance(boolean isNodeHealthy, @Private @Unstable public abstract void setLastHealthReportTime(long lastHealthReport); + + /** + * Set the overall score of the Node. + * @param nodeHealthDetails contains the resources score and the total + * overall score of the node + */ + @Private + @Unstable + public abstract void setNodeHealthDetails(NodeHealthDetails + nodeHealthDetails); + + /** + * @return {@link NodeHealthDetails} Gives a detailed overall score + * of the node health resources. + */ + @Public + @Stable + public abstract NodeHealthDetails getNodeHealthDetails(); } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthDetailsPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthDetailsPBImpl.java new file mode 100644 index 00000000000..c2944d61d12 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthDetailsPBImpl.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.records.impl.pb; + +import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthDetailsProto; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthDetailsProtoOrBuilder; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; + +import java.util.Collections; +import java.util.Map; + +public class NodeHealthDetailsPBImpl extends NodeHealthDetails { + private NodeHealthDetailsProto proto = NodeHealthDetailsProto + .getDefaultInstance(); + private NodeHealthDetailsProto.Builder builder = null; + private boolean viaProto = false; + + public NodeHealthDetailsPBImpl() { + builder = NodeHealthDetailsProto.newBuilder(); + } + + public NodeHealthDetailsPBImpl(NodeHealthDetailsProto proto) { + this.proto = proto; + viaProto = true; + } + public NodeHealthDetailsProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = NodeHealthDetailsProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public void setOverallScore(Integer overallScore) { + maybeInitBuilder(); + this.builder.setOverallScore(overallScore); + } + + @Override + public Integer getOverallScore() { + NodeHealthDetailsProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + if (!p.hasOverallScore()) { + return null; + } + return (p.getOverallScore()); + } + + @Override + public void setNodeResourceScores(Map nodeResourceScores) { + maybeInitBuilder(); + this.builder.addAllNodeResourceScores(ProtoUtils + .convertStringIntMapToProtoList(nodeResourceScores)); + } + + @Override + public Map getNodeResourceScores() { + NodeHealthDetailsProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + return p.getNodeResourceScoresCount() > 0 ? + ProtoUtils.convertProtoListToStringIntMap( + p.getNodeResourceScoresList()) : Collections.emptyMap(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java index 20697834687..83f9f51d016 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeHealthStatusPBImpl.java @@ -18,8 +18,10 @@ package org.apache.hadoop.yarn.server.api.records.impl.pb; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthDetailsProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProtoOrBuilder; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.thirdparty.protobuf.TextFormat; @@ -30,6 +32,7 @@ private boolean viaProto = false; private NodeHealthStatusProto proto = NodeHealthStatusProto .getDefaultInstance(); + private NodeHealthDetails nodeHealthDetails; public NodeHealthStatusPBImpl() { this.builder = NodeHealthStatusProto.newBuilder(); @@ -70,6 +73,7 @@ public String toString() { private void mergeLocalToProto() { if (this.viaProto) maybeInitBuilder(); + mergeLocalToBuilder(); this.proto = this.builder.build(); this.viaProto = true; @@ -82,6 +86,13 @@ private void maybeInitBuilder() { this.viaProto = false; } + private void mergeLocalToBuilder() { + if(nodeHealthDetails != null) { + builder.setNodeHealthDetails(convertToProtoFormat( + nodeHealthDetails)); + } + } + @Override public boolean getIsNodeHealthy() { NodeHealthStatusProtoOrBuilder p = @@ -128,4 +139,36 @@ public void setLastHealthReportTime(long lastHealthReport) { this.builder.setLastHealthReportTime((lastHealthReport)); } + @Override + public void setNodeHealthDetails( + NodeHealthDetails nodeHealthDetails) { + maybeInitBuilder(); + if(nodeHealthDetails == null) { + this.builder.clearNodeHealthDetails(); + } + this.nodeHealthDetails = nodeHealthDetails; + } + + @Override + public NodeHealthDetails getNodeHealthDetails() { + NodeHealthStatusProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + if(this.nodeHealthDetails != null) { + return this.nodeHealthDetails; + } + if(!p.hasNodeHealthDetails()) { + return null; + } + this.nodeHealthDetails = convertFromProtoFormat(p.getNodeHealthDetails()); + return this.nodeHealthDetails; + } + + private NodeHealthDetailsPBImpl convertFromProtoFormat( + NodeHealthDetailsProto p) { + return new NodeHealthDetailsPBImpl(p); + } + + private NodeHealthDetailsProto convertToProtoFormat(NodeHealthDetails nhd) { + return ((NodeHealthDetailsPBImpl) nhd).getProto(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index ea8df4fb800..65d5f8b6d2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -58,10 +58,16 @@ message MasterKeyProto { optional bytes bytes = 2; } +message NodeHealthDetailsProto { + optional int32 overall_score = 1; + repeated StringIntMapProto node_resource_scores = 2; +} + message NodeHealthStatusProto { optional bool is_node_healthy = 1; optional string health_report = 2; optional int64 last_health_report_time = 3; + optional NodeHealthDetailsProto node_health_details = 4; } message VersionProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 42944d632b8..d953f482a58 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -29,6 +29,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.GenericOptionsParser; @@ -62,6 +63,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.logaggregation.tracker.NMLogAggregationStatusTracker; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider; @@ -85,6 +87,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.reflect.Constructor; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -245,6 +248,28 @@ protected NodeResourceMonitor createNodeResourceMonitor() { return new NodeResourceMonitorImpl(context); } + protected NodeHealthCheckerService createNodeHealthCheckerService( + Configuration conf) { + + Class clazz = conf.getClassByNameOrNull(conf.get(YarnConfiguration + .NM_HEALTH_CHECKER_SERVICE, + NodeHealthCheckerServiceImpl.class.getName())); + try { + if (clazz == null || !AbstractService.class.isAssignableFrom(clazz)) { + throw new RuntimeException(clazz + " does not implement " + + AbstractService.class); + } + Constructor cons = clazz.getConstructor( + LocalDirsHandlerService.class); + return (NodeHealthCheckerService) cons.newInstance(dirsHandler); + } catch (Exception e) { + throw new YarnRuntimeException( + "Could not instantiate NodeHealthChecker Class: " + YarnConfiguration + .NM_HEALTH_CHECKER_SERVICE, e); + } + } + + protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, @@ -410,7 +435,7 @@ protected void serviceInit(Configuration conf) throws Exception { // NodeManager level dispatcher this.dispatcher = createNMDispatcher(); - this.nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); + this.nodeHealthChecker = createNodeHealthCheckerService(conf); addService(nodeHealthChecker); ((NMContext)context).setContainerExecutor(exec); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index a98d31c2a2b..985bb410f6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -517,9 +517,12 @@ protected NodeStatus getNodeStatus(int responseId) throws IOException { nodeHealthStatus.setIsNodeHealthy(healthChecker.isHealthy()); nodeHealthStatus.setLastHealthReportTime(healthChecker .getLastHealthReportTime()); - LOG.debug("Node's health-status : {}, {}", + + healthChecker.updateNodeHealthDetails(nodeHealthStatus); + LOG.debug("Node's health-status : {}, {}, {}", nodeHealthStatus.getIsNodeHealthy(), - nodeHealthStatus.getHealthReport()); + nodeHealthStatus.getHealthReport(), + nodeHealthStatus.getNodeHealthDetails()); List containersStatuses = getContainerStatuses(); ResourceUtilization containersUtilization = getContainersUtilization(); ResourceUtilization nodeUtilization = getNodeUtilization(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java index bbf61de1e37..8b4b1724568 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerService.java @@ -18,102 +18,28 @@ package org.apache.hadoop.yarn.server.nodemanager.health; -import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; -import org.apache.hadoop.thirdparty.com.google.common.base.Strings; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.CompositeService; -import org.apache.hadoop.service.Service; -import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - /** - * This class provides functionality of checking the health of a node and - * reporting back to the service for which the health checker has been asked to - * report. - * - * It is a {@link CompositeService}: every {@link Service} must be registered - * first in serviceInit, and should also implement the {@link HealthReporter} - * interface - otherwise an exception is thrown. - * - * Calling functions of HealthReporter merge its dependent - * services' reports. - * + * This class is the base class for all NodeHealthCheckerServices. * @see HealthReporter * @see LocalDirsHandlerService * @see TimedHealthReporterService + * @see NodeHealthCheckerServiceImpl */ -public class NodeHealthCheckerService extends CompositeService +public abstract class NodeHealthCheckerService extends CompositeService implements HealthReporter { public static final Logger LOG = LoggerFactory.getLogger(NodeHealthCheckerService.class); - private static final int MAX_SCRIPTS = 4; - - private List reporters; - private LocalDirsHandlerService dirsHandler; - private ExceptionReporter exceptionReporter; - - public static final String SEPARATOR = ";"; - public NodeHealthCheckerService( - LocalDirsHandlerService dirHandlerService) { - super(NodeHealthCheckerService.class.getName()); - this.reporters = new ArrayList<>(); - this.dirsHandler = dirHandlerService; - this.exceptionReporter = new ExceptionReporter(); - } - - @Override - protected void serviceInit(Configuration conf) throws Exception { - reporters.add(exceptionReporter); - addHealthReporter(dirsHandler); - String[] configuredScripts = conf.getTrimmedStrings( - YarnConfiguration.NM_HEALTH_CHECK_SCRIPTS, - YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPTS); - if (configuredScripts.length > MAX_SCRIPTS) { - throw new IllegalArgumentException("Due to performance reasons " + - "running more than " + MAX_SCRIPTS + "scripts is not allowed."); - } - for (String configuredScript : configuredScripts) { - addHealthReporter(NodeHealthScriptRunner.newInstance( - configuredScript, conf)); - } - super.serviceInit(conf); - } - - /** - * Adds a {@link Service} implementing the {@link HealthReporter} interface, - * if that service has not been added to this {@link CompositeService} yet. - * - * @param service to add - * @throws Exception if not a {@link HealthReporter} - * implementation is provided to this function - */ - @VisibleForTesting - void addHealthReporter(Service service) throws Exception { - if (service != null) { - if (getServices().stream() - .noneMatch(x -> x.getName().equals(service.getName()))) { - if (!(service instanceof HealthReporter)) { - throw new Exception("Attempted to add service to " + - "NodeHealthCheckerService that does not implement " + - "HealthReporter."); - } - reporters.add((HealthReporter) service); - addService(service); - } else { - LOG.debug("Omitting duplicate service: {}.", service.getName()); - } - } + public NodeHealthCheckerService(String name) { + super(name); } /** @@ -121,44 +47,34 @@ void addHealthReporter(Service service) throws Exception { * * @return the report string about the health of the node */ - @Override - public String getHealthReport() { - ArrayList reports = reporters.stream() - .map(reporter -> Strings.emptyToNull(reporter.getHealthReport())) - .collect(Collectors.toCollection(ArrayList::new)); - return Joiner.on(SEPARATOR).skipNulls().join(reports); - } + public abstract String getHealthReport(); /** * @return true if the node is healthy */ - @Override - public boolean isHealthy() { - return reporters.stream().allMatch(HealthReporter::isHealthy); - } + public abstract boolean isHealthy(); + + /** + * @return the disk handler + */ + public abstract LocalDirsHandlerService getDiskHandler(); /** * @return when the last time the node health status is reported */ - @Override - public long getLastHealthReportTime() { - Optional max = reporters.stream() - .map(HealthReporter::getLastHealthReportTime).max(Long::compareTo); - return max.orElse(0L); - } + public abstract long getLastHealthReportTime(); + /** - * @return the disk handler + * Reads the score of each resource and gives out a overall score of + * the node. Node Health Details needs to be enabled otherwise the score + * is defaulted to 0. */ - public LocalDirsHandlerService getDiskHandler() { - return dirsHandler; - } + public abstract void updateNodeHealthDetails(NodeHealthStatus status); /** * Propagating an exception to {@link ExceptionReporter}. * @param exception the exception to propagate */ - public void reportException(Exception exception) { - exceptionReporter.reportException(exception); - } + public abstract void reportException(Exception exception); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerServiceImpl.java new file mode 100644 index 00000000000..635c6c90b6a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/health/NodeHealthCheckerServiceImpl.java @@ -0,0 +1,283 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.nodemanager.health; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; +import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * This class provides functionality of checking the health of a node and + * reporting back to the service for which the health checker has been asked to + * report. + * + * It is a {@link CompositeService}: every {@link Service} must be registered + * first in serviceInit, and should also implement the {@link HealthReporter} + * interface - otherwise an exception is thrown. + * + * Calling functions of HealthReporter merge its dependent + * services' reports. + * + * @see HealthReporter + * @see LocalDirsHandlerService + * @see TimedHealthReporterService + */ +public class NodeHealthCheckerServiceImpl extends NodeHealthCheckerService { + + public static final Logger LOG = + LoggerFactory.getLogger(NodeHealthCheckerServiceImpl.class); + private static final int MAX_SCRIPTS = 4; + + private List reporters; + private LocalDirsHandlerService dirsHandler; + private ExceptionReporter exceptionReporter; + + private String nodeHealthScoreFilename; + + public static final String SEPARATOR = ";"; + + private boolean nodeHealthScoreEnabled; + private volatile long lastModified = -1; + private NodeHealthDetails nodeHealthDetails; + + public NodeHealthCheckerServiceImpl( + LocalDirsHandlerService dirHandlerService) { + super(NodeHealthCheckerServiceImpl.class.getName()); + + this.reporters = new ArrayList<>(); + this.dirsHandler = dirHandlerService; + this.exceptionReporter = new ExceptionReporter(); + this.nodeHealthDetails = NodeHealthDetails.newInstance(0); + } + + @Override + protected void serviceInit(Configuration conf) throws Exception { + reporters.add(exceptionReporter); + addHealthReporter(dirsHandler); + String[] configuredScripts = conf.getTrimmedStrings( + YarnConfiguration.NM_HEALTH_CHECK_SCRIPTS, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPTS); + if (configuredScripts.length > MAX_SCRIPTS) { + throw new IllegalArgumentException("Due to performance reasons " + + "running more than " + MAX_SCRIPTS + "scripts is not allowed."); + } + for (String configuredScript : configuredScripts) { + addHealthReporter(NodeHealthScriptRunner.newInstance( + configuredScript, conf)); + } + + nodeHealthScoreEnabled = conf.getBoolean( + YarnConfiguration.NM_HEALTH_CHECK_SCORE_ENABLED, + YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCORE_ENABLED); + + if (nodeHealthScoreEnabled) { + nodeHealthScoreFilename = conf.get( + YarnConfiguration.NM_HEALTH_CHECK_SCORE_FILE); + } + + super.serviceInit(conf); + } + + /** + * Adds a {@link Service} implementing the {@link HealthReporter} interface, + * if that service has not been added to this {@link CompositeService} yet. + * + * @param service to add + * @throws Exception if not a {@link HealthReporter} + * implementation is provided to this function + */ + @VisibleForTesting + void addHealthReporter(Service service) throws Exception { + if (service != null) { + if (getServices().stream() + .noneMatch(x -> x.getName().equals(service.getName()))) { + if (!(service instanceof HealthReporter)) { + throw new Exception("Attempted to add service to " + + "NodeHealthCheckerService that does not implement " + + "HealthReporter."); + } + reporters.add((HealthReporter) service); + addService(service); + } else { + LOG.debug("Omitting duplicate service: {}.", service.getName()); + } + } + } + + /** + * Joining the health reports of the dependent services. + * + * @return the report string about the health of the node + */ + @Override + public String getHealthReport() { + ArrayList reports = reporters.stream() + .map(reporter -> Strings.emptyToNull(reporter.getHealthReport())) + .collect(Collectors.toCollection(ArrayList::new)); + return Joiner.on(SEPARATOR).skipNulls().join(reports); + } + + /** + * @return true if the node is healthy + */ + @Override + public boolean isHealthy() { + return reporters.stream().allMatch(HealthReporter::isHealthy); + } + + /** + * @return when the last time the node health status is reported + */ + @Override + public long getLastHealthReportTime() { + Optional max = reporters.stream() + .map(HealthReporter::getLastHealthReportTime).max(Long::compareTo); + return max.orElse(0L); + } + + /** + * Reads the score of each resource and gives out a summation of overall + * score of the node from a xml file. The file is read only if the last + * modified value changes. Node Health Details needs to be enabled. + * Otherwise, the score is defaulted to 0. + * @return {@link NodeHealthDetails} + */ + @Override + public void updateNodeHealthDetails(NodeHealthStatus status) { + + if (nodeHealthScoreEnabled) { + try { + File resourceScoreFile = new File(nodeHealthScoreFilename); + if (!resourceScoreFile.exists() || !resourceScoreFile.isFile()) { + LOG.warn("resourceScoreFile [{}] does not exist or is not a file !!", + resourceScoreFile); + } else if (resourceScoreFile.lastModified() <= lastModified) { + LOG.debug("resourceScoreFile [{}] has not been modified " + + "from last check", resourceScoreFile); + } else { + HashMap resourcesScore = new HashMap<>(); + InputStream fis = Files.newInputStream(resourceScoreFile.toPath()); + + readXmlFileToMapWithFileInputStream(nodeHealthScoreFilename, fis, + resourcesScore); + + Integer overallScore = + resourcesScore.values().stream().reduce(0, Integer::sum); + nodeHealthDetails.setOverallScore(overallScore); + nodeHealthDetails.setNodeResourceScores(resourcesScore); + lastModified = resourceScoreFile.lastModified(); + } + } catch (Exception e) { + reportException(e); + LOG.error("Error reading Node Health Checker score file", e); + } + } + + status.setNodeHealthDetails(nodeHealthDetails); + } + + /** + * Reads on fileInputStream with xml data and converts it into a map. + * The structure, for example, is followed + * resource135 + * resource235 + * resource330 + * + * @param filename the name of the xml file + * @param fileInputStream InputStream of the file + * @param map the resource map, to store the result. + * @throws IOException + */ + public static void readXmlFileToMapWithFileInputStream(String filename, + InputStream fileInputStream, Map map) + throws IOException { + Document dom; + DocumentBuilderFactory builder = DocumentBuilderFactory.newInstance(); + try { + DocumentBuilder db = builder.newDocumentBuilder(); + dom = db.parse(fileInputStream); + Element doc = dom.getDocumentElement(); + NodeList nodes = doc.getElementsByTagName("resource"); + for (int i = 0; i < nodes.getLength(); i++) { + Node node = nodes.item(i); + if (node.getNodeType() == Node.ELEMENT_NODE) { + Element e= (Element) node; + // Get the list of resource and their scores. + String resourceName = readFirstTagValue(e, "name"); + String score = readFirstTagValue(e, "score"); + Integer value = (score == null) ? null : Integer.parseInt(score); + map.put(resourceName, value); + } + } + } catch (IOException| SAXException | ParserConfigurationException e) { + LOG.error("error parsing " + filename, e); + throw new RuntimeException(e); + } finally { + fileInputStream.close(); + } + } + + static String readFirstTagValue(Element e, String tag) { + NodeList nodes = e.getElementsByTagName(tag); + return (nodes.getLength() == 0)? null : nodes.item(0).getTextContent(); + } + + /** + * @return the disk handler + */ + public LocalDirsHandlerService getDiskHandler() { + return dirsHandler; + } + + /** + * Propagating an exception to {@link ExceptionReporter}. + * @param exception the exception to propagate + */ + public void reportException(Exception exception) { + exceptionReporter.reportException(exception); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java index ae9b92d6e3d..8e711e7affe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java @@ -83,6 +83,7 @@ protected void render(Block html) { info.getLastNodeUpdateTime())) .__("NodeHealthReport", info.getHealthReport()) + .__("Node Health Details", info.getNodeHealthDetails()) .__("NodeManager started on", new Date( info.getNMStartupTime())) .__("NodeManager Version:", info.getNMBuildVersion() + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java index 33e2de65a98..44597286157 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java @@ -45,6 +45,7 @@ protected long lastNodeUpdateTime; protected String resourceTypes; protected boolean nodeHealthy; + protected String nodeHealthDetails; protected String nodeManagerVersion; protected String nodeManagerBuildVersion; protected String nodeManagerVersionBuiltOn; @@ -77,7 +78,9 @@ public NodeInfo(final Context context, final ResourceView resourceView) { .getLastHealthReportTime(); this.healthReport = context.getNodeHealthStatus().getHealthReport(); - + this.nodeHealthDetails = context.getNodeHealthStatus() + .getNodeHealthDetails() == null ? "" : context.getNodeHealthStatus() + .getNodeHealthDetails().toString(); this.nodeManagerVersion = YarnVersionInfo.getVersion(); this.nodeManagerBuildVersion = YarnVersionInfo.getBuildVersion(); this.nodeManagerVersionBuiltOn = YarnVersionInfo.getDate(); @@ -131,6 +134,10 @@ public String getHealthReport() { return this.healthReport; } + public String getNodeHealthDetails() { + return this.nodeHealthDetails; + } + public long getTotalVmemAllocated() { return this.totalVmemAllocatedContainersMB; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index 3f4879b23ea..b0ee4b03c91 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -106,7 +107,7 @@ public int getHttpPort() { Dispatcher dispatcher = new AsyncDispatcher(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); NodeManagerMetrics metrics = NodeManagerMetrics.create(); NodeStatusUpdater nodeStatusUpdater = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 9ee3ce6bc8b..584fc5fc9be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -27,6 +27,7 @@ import static org.mockito.Mockito.doNothing; import org.apache.hadoop.yarn.server.nodemanager.NodeResourceMonitorImpl; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -154,7 +155,6 @@ public NMLogAggregationStatusTracker getNMLogAggregationStatusTracker() { protected ContainerExecutor exec; protected DeletionService delSrvc; protected String user = "nobody"; - protected NodeHealthCheckerService nodeHealthChecker; protected LocalDirsHandlerService dirsHandler; protected final long DUMMY_RM_IDENTIFIER = 1234; private NodeResourceMonitorImpl nodeResourceMonitor = mock( @@ -172,6 +172,12 @@ public void setNodeStatusUpdater( this.nodeStatusUpdater = nodeStatusUpdater; } + public void setNodeHealthCheckerService(NodeHealthCheckerService nhcs, + Configuration yarnConfiguration) { + this.nodeHealthCheckerService = nhcs; + this.nodeHealthCheckerService.init(yarnConfiguration); + } + protected ContainerExecutor createContainerExecutor() { DefaultContainerExecutor exec = new DefaultContainerExecutor(); exec.setConf(conf); @@ -208,7 +214,7 @@ public void setup() throws IOException { dirsHandler = new LocalDirsHandlerService(); dirsHandler.init(conf); - nodeHealthCheckerService = new NodeHealthCheckerService(dirsHandler); + nodeHealthCheckerService = new NodeHealthCheckerServiceImpl(dirsHandler); nodeStatusUpdater = new NodeStatusUpdaterImpl( context, new AsyncDispatcher(), nodeHealthCheckerService, metrics) { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 826cc02219b..8d70b17ce29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -85,7 +85,6 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -106,6 +105,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.TestNodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; @@ -156,9 +156,8 @@ public void setup() throws IOException { delSrvc.init(conf); exec = createContainerExecutor(); dirsHandler = new LocalDirsHandlerService(); - nodeHealthChecker = new NodeHealthCheckerService(dirsHandler); - nodeHealthChecker.init(conf); - + setNodeHealthCheckerService(new NodeHealthCheckerServiceImpl(dirsHandler), + conf); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java index 2b40fa8d4be..01c950e3b8f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/health/TestNodeHealthCheckerService.java @@ -22,8 +22,12 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,6 +49,8 @@ import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.spy; @@ -67,9 +73,13 @@ private File nodeHealthScriptFile = new File(TEST_ROOT_DIR, Shell.appendScriptExtension("failingscript")); + private File nodeHealthDetailsFile = new File(TEST_ROOT_DIR, + "nodeScore.xml"); + @Before - public void setup() { + public void setup() throws IOException { TEST_ROOT_DIR.mkdirs(); + writeNodeHealthScoreFile(); } @After @@ -80,6 +90,15 @@ public void tearDown() throws Exception { } } + private void writeNodeHealthScoreFile() throws IOException { + String xmlData = "" + + "resource135" + + "resource265" + + ""; + FileUtils.writeStringToFile(nodeHealthDetailsFile, xmlData, + StandardCharsets.UTF_8); + } + private void writeNodeHealthScriptFile() throws IOException, InterruptedException { try (PrintWriter pw = new PrintWriter( @@ -140,8 +159,9 @@ public void testNodeHealthService() throws Exception { } nodeHealthScriptRunner = spy(nodeHealthScriptRunner); NodeHealthCheckerService nodeHealthChecker = - new NodeHealthCheckerService(dirsHandler); - nodeHealthChecker.addHealthReporter(nodeHealthScriptRunner); + new NodeHealthCheckerServiceImpl(dirsHandler); + ((NodeHealthCheckerServiceImpl) nodeHealthChecker) + .addHealthReporter(nodeHealthScriptRunner); nodeHealthChecker.init(conf); doReturn(true).when(nodeHealthScriptRunner).isHealthy(); @@ -151,9 +171,9 @@ public void testNodeHealthService() throws Exception { nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking initial healthy condition"); // Check proper report conditions. - Assert.assertTrue("Node health status reported unhealthy", healthStatus + assertTrue("Node health status reported unhealthy", healthStatus .getIsNodeHealthy()); - Assert.assertTrue("Node health status reported unhealthy", healthStatus + assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); doReturn(false).when(nodeHealthScriptRunner).isHealthy(); @@ -164,7 +184,7 @@ public void testNodeHealthService() throws Exception { LOG.info("Checking Healthy--->Unhealthy"); Assert.assertFalse("Node health status reported healthy", healthStatus .getIsNodeHealthy()); - Assert.assertTrue("Node health status reported healthy", healthStatus + assertTrue("Node health status reported healthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); doReturn(true).when(nodeHealthScriptRunner).isHealthy(); @@ -173,9 +193,9 @@ public void testNodeHealthService() throws Exception { nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking UnHealthy--->healthy"); // Check proper report conditions. - Assert.assertTrue("Node health status reported unhealthy", healthStatus + assertTrue("Node health status reported unhealthy", healthStatus .getIsNodeHealthy()); - Assert.assertTrue("Node health status reported unhealthy", healthStatus + assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Healthy to timeout transition. @@ -188,9 +208,9 @@ public void testNodeHealthService() throws Exception { LOG.info("Checking Healthy--->timeout"); Assert.assertFalse("Node health status reported healthy even after timeout", healthStatus.getIsNodeHealthy()); - Assert.assertTrue("Node script time out message not propagated", + assertTrue("Node script time out message not propagated", healthStatus.getHealthReport().equals( - Joiner.on(NodeHealthCheckerService.SEPARATOR).skipNulls().join( + Joiner.on(NodeHealthCheckerServiceImpl.SEPARATOR).skipNulls().join( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG, Strings.emptyToNull( nodeHealthChecker.getDiskHandler() @@ -230,8 +250,9 @@ public long getLastHealthReportTime() { Configuration conf = new Configuration(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService nodeHealthChecker = - new NodeHealthCheckerService(dirsHandler); - nodeHealthChecker.addHealthReporter(customHealthReporter); + new NodeHealthCheckerServiceImpl(dirsHandler); + ((NodeHealthCheckerServiceImpl) nodeHealthChecker) + .addHealthReporter(customHealthReporter); nodeHealthChecker.init(conf); assertThat(nodeHealthChecker.isHealthy()).isTrue(); @@ -246,7 +267,7 @@ public void testExceptionReported() { Configuration conf = new Configuration(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService nodeHealthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); nodeHealthChecker.init(conf); assertThat(nodeHealthChecker.isHealthy()).isTrue(); @@ -256,4 +277,61 @@ public void testExceptionReported() { assertThat(nodeHealthChecker.isHealthy()).isFalse(); assertThat(nodeHealthChecker.getHealthReport()).isEqualTo(message); } + + /** + * Test NodeHealthDetails when its configured. The resourceScore file is + * {@link #nodeHealthDetailsFile} + */ + @Test + public void testUpdateNodeHealthDetailsEnabled() { + + Configuration conf = new Configuration(); + + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + NodeHealthCheckerService nodeHealthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); + + + conf.setBoolean(YarnConfiguration.NM_HEALTH_CHECK_SCORE_ENABLED, true); + conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCORE_FILE, + nodeHealthDetailsFile.getAbsolutePath()); + nodeHealthChecker.init(conf); + + HashMap resourcesScrore = new HashMap<>(); + resourcesScrore.put("resource1", 35); + resourcesScrore.put("resource2", 65); + NodeHealthDetails nodeHealthDetails = NodeHealthDetails + .newInstance(100, resourcesScrore); + + NodeHealthStatus nodeHealthStatus = NodeHealthStatus.newInstance(true, + "", System.currentTimeMillis()); + nodeHealthChecker.updateNodeHealthDetails(nodeHealthStatus); + assertEquals(100, (int) nodeHealthStatus.getNodeHealthDetails() + .getOverallScore()); + assertEquals(nodeHealthStatus.getNodeHealthDetails().toString(), + nodeHealthDetails.toString()); + + } + + @Test + public void testNodeHealthDetailsWhenDisabled() { + Configuration conf = new Configuration(); + + LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); + NodeHealthCheckerService nodeHealthChecker = + new NodeHealthCheckerServiceImpl(dirsHandler); + + // Testing with the default conf. The score should return 0. + nodeHealthChecker.init(conf); + + NodeHealthStatus nodeHealthStatus = NodeHealthStatus.newInstance(true, + "", System.currentTimeMillis()); + nodeHealthChecker.updateNodeHealthDetails(nodeHealthStatus); + assertEquals(0, (int) nodeHealthStatus.getNodeHealthDetails() + .getOverallScore()); + assertEquals(nodeHealthStatus.getNodeHealthDetails().toString(), + "[Overall Score = 0]"); + + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java index 71716da37e7..a81ef59ce5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java @@ -64,6 +64,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.webapp.ContainerLogsPage.ContainersLogsBlock; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -78,9 +79,9 @@ public class TestContainerLogsPage { - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } @Test(timeout=30000) @@ -90,7 +91,8 @@ public void testContainerLogDirs() throws IOException, YarnException { String logdirwithFile = absLogDir.toURI().toString(); Configuration conf = new Configuration(); conf.set(YarnConfiguration.NM_LOG_DIRS, logdirwithFile); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, @@ -213,7 +215,8 @@ public void testContainerLogPageAccess() throws IOException { "kerberos"); UserGroupInformation.setConfiguration(conf); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); // Add an application and the corresponding containers diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java index 1e636650463..0a61666b43c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMContainerWebSocket.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.eclipse.jetty.websocket.api.Session; import org.eclipse.jetty.websocket.api.UpgradeRequest; @@ -104,7 +105,8 @@ public boolean isPmemCheckEnabled() { }; conf.set(YarnConfiguration.NM_LOCAL_DIRS, TESTROOTDIR.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); - NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(); + NodeHealthCheckerService healthChecker = + createNodeHealthCheckerService(); healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); @@ -118,9 +120,9 @@ public boolean isPmemCheckEnabled() { } } - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index cbfaa177921..fe8c7fde535 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; @@ -78,9 +79,9 @@ public void tearDown() { FileUtil.fullyDelete(testLogDir); } - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } private int startNMWebAppServer(String webAddr) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 30b73c1acc7..522daa9b414 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.logaggregation.TestContainerLogsUtils; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; @@ -57,6 +58,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; @@ -143,13 +145,15 @@ protected void configureServlets() { LOGSERVICEWSADDR); dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager, null, false, conf); NodeId nodeId = NodeId.newInstance("testhost.foo.com", 8042); ((NodeManager.NMContext)nmContext).setNodeId(nodeId); + nmContext.getNodeHealthStatus().setNodeHealthDetails(NodeHealthDetails + .newInstance(90, Collections.singletonMap("TEST", 90))); resourceView = new ResourceView() { @Override public long getVmemAllocatedForContainers() { @@ -808,14 +812,15 @@ public void verifyNodesXML(NodeList nodes) throws JSONException, Exception { "nodeManagerVersionBuiltOn"), WebServicesTestUtils.getXmlString( element, "nodeManagerBuildVersion"), WebServicesTestUtils.getXmlString(element, "nodeManagerVersion"), - WebServicesTestUtils.getXmlString(element, "resourceTypes")); + WebServicesTestUtils.getXmlString(element, "resourceTypes"), + WebServicesTestUtils.getXmlString(element, "nodeHealthDetails")); } } public void verifyNodeInfo(JSONObject json) throws JSONException, Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject info = json.getJSONObject("nodeInfo"); - assertEquals("incorrect number of elements", 18, info.length()); + assertEquals("incorrect number of elements", 19, info.length()); verifyNodeInfoGeneric(info.getString("id"), info.getString("healthReport"), info.getLong("totalVmemAllocatedContainersMB"), info.getLong("totalPmemAllocatedContainersMB"), @@ -827,8 +832,8 @@ public void verifyNodeInfo(JSONObject json) throws JSONException, Exception { info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"), info.getString("nodeManagerVersionBuiltOn"), info.getString("nodeManagerBuildVersion"), - info.getString("nodeManagerVersion"), - info.getString("resourceTypes") + info.getString("nodeManagerVersion"), info.getString("resourceTypes"), + info.getString("nodeHealthDetails") ); } @@ -841,7 +846,7 @@ public void verifyNodeInfoGeneric(String id, String healthReport, String hadoopVersionBuiltOn, String hadoopBuildVersion, String hadoopVersion, String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion, String resourceManagerVersion, - String resourceTypes) { + String resourceTypes, String nodeHealthDetails) { WebServicesTestUtils.checkStringMatch("id", "testhost.foo.com:8042", id); WebServicesTestUtils.checkStringMatch("healthReport", "Healthy", @@ -856,6 +861,8 @@ public void verifyNodeInfoGeneric(String id, String healthReport, assertEquals("pmemCheckEnabled incorrect", true, pmemCheckEnabled); assertTrue("lastNodeUpdateTime incorrect", lastNodeUpdateTime == nmContext .getNodeHealthStatus().getLastHealthReportTime()); + assertEquals("nodeHealthDetails incorrect", nodeHealthDetails, nmContext + .getNodeHealthStatus().getNodeHealthDetails().toString()); assertTrue("nodeHealthy isn't true", nodeHealthy); WebServicesTestUtils.checkStringMatch("nodeHostName", "testhost.foo.com", nodeHostName); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java index ab06c0f9f33..c7c9beb64af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java @@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.AppsInfo; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -105,7 +106,7 @@ protected void configureServlets() { conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java index 7ec8fcd47d3..c7ec5d6d74d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesAuxServices.java @@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServices; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceRecord; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; @@ -125,7 +126,7 @@ public boolean isPmemCheckEnabled() { conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); ApplicationACLsManager aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index 175a0b02470..4a218fe0a19 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -130,7 +131,7 @@ public boolean isPmemCheckEnabled() { conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthCheckerService healthChecker = - new NodeHealthCheckerService(dirsHandler); + new NodeHealthCheckerServiceImpl(dirsHandler); healthChecker.init(conf); dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java index d4180e48251..f1dcc957771 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebTerminal.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerServiceImpl; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.junit.After; import org.junit.Before; @@ -53,9 +54,9 @@ private WebServer server; private int port; - private NodeHealthCheckerService createNodeHealthCheckerService() { + private NodeHealthCheckerServiceImpl createNodeHealthCheckerService() { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); - return new NodeHealthCheckerService(dirsHandler); + return new NodeHealthCheckerServiceImpl(dirsHandler); } private int startNMWebAppServer(String webAddr) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java index 8f1cda94a7c..3b5d3828b21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java @@ -96,6 +96,9 @@ public String getLiveNodeManagers() { ni.getLastHealthReportTime()); info.put("HealthReport", ni.getHealthReport()); + info.put("HealthDetails", + ni.getNodeHealthDetails() != null ? + ni.getNodeHealthDetails().toString() : ""); info.put("NodeManagerVersion", ni.getNodeManagerVersion()); if(report != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java index e6205d2dac6..d008f23f1bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.api.records.NodeAttribute; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -85,13 +86,19 @@ * @return the latest health report received from this node. */ public String getHealthReport(); - + /** * the time of the latest health report received from this node. * @return the time of the latest health report received from this node. */ public long getLastHealthReportTime(); + /** + * the overall health score received from this node. + * @return the overall health score of the node. + */ + NodeHealthDetails getNodeHealthDetails(); + /** * the node manager version of the node received as part of the * registration with the resource manager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index fc7e88ba123..b70613a0119 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -36,6 +36,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; import org.apache.commons.collections.keyvalue.DefaultMapEntry; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -132,6 +133,7 @@ private String healthReport; private long lastHealthReportTime; + private NodeHealthDetails nodeHealthDetails; private String nodeManagerVersion; private Integer decommissioningTimeout; @@ -513,7 +515,27 @@ public void setLastHealthReportTime(long lastHealthReportTime) { this.writeLock.unlock(); } } - + + public void setNodeHealthDetails(NodeHealthDetails nhd) { + this.writeLock.lock(); + + try { + this.nodeHealthDetails = nhd; + } finally { + this.writeLock.unlock(); + } + } + + public NodeHealthDetails getNodeHealthDetails() { + this.readLock.lock(); + + try { + return this.nodeHealthDetails; + } finally { + this.readLock.unlock(); + } + } + @Override public long getLastHealthReportTime() { this.readLock.lock(); @@ -922,6 +944,7 @@ private static NodeHealthStatus updateRMNodeFromStatusEvents( rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime(remoteNodeHealthStatus .getLastHealthReportTime()); + rmNode.setNodeHealthDetails(remoteNodeHealthStatus.getNodeHealthDetails()); rmNode.setAggregatedContainersUtilization(statusEvent .getAggregatedContainersUtilization()); rmNode.setNodeUtilization(statusEvent.getNodeUtilization()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index 0de6c572a2c..8c27b0fb703 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.resourcetypes.ResourceTypesTestHelper; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeHealthDetails; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; @@ -127,6 +128,7 @@ public static Resource newAvailResource(Resource total, Resource used) { private ResourceUtilization nodeUtilization; private Resource physicalResource; private RMContext rmContext; + private NodeHealthDetails nodeHealthDetails; MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, Resource perNode, String rackName, String healthReport, @@ -270,6 +272,11 @@ public long getLastHealthReportTime() { return lastHealthReportTime; } + @Override + public NodeHealthDetails getNodeHealthDetails() { + return nodeHealthDetails; + } + @Override public Set getNodeLabels() { if (labels != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerRest.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerRest.md index b088c48dac7..1a0456d35d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerRest.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerRest.md @@ -99,6 +99,7 @@ Response Body: "pmemCheckEnabled": true, "lastNodeUpdateTime": 1485814574224, "nodeHealthy": true, + "nodeHealthDetails": "[Overall Score = 100,SSD = 100]", "nodeManagerVersion": "3.0.0", "nodeManagerBuildVersion": "3.0.0", "nodeManagerVersionBuiltOn": "2017-01-30T17:42Z", @@ -140,6 +141,7 @@ Response Body: true 1485815774203 true + [Overall Score = 100,SSD = 100] 3.0.0 3.0.0 2017-01-30T17:42Z diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/yarn-node.js b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/yarn-node.js index 4753983b36c..090530f8004 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/yarn-node.js +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/yarn-node.js @@ -27,6 +27,7 @@ export default DS.Model.extend({ nodeHealthy: DS.attr('boolean'), lastNodeUpdateTime: DS.attr('string'), healthReport: DS.attr('string'), + nodeHealthDetails: DS.attr('string'), nmStartupTime: DS.attr('string'), nodeManagerBuildVersion: DS.attr('string'), hadoopBuildVersion: DS.attr('string'), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-node.js b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-node.js index 10521e62a83..8d1fae77c5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-node.js +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-node.js @@ -37,6 +37,7 @@ export default DS.JSONAPISerializer.extend({ nodeHealthy: payload.nodeHealthy, lastNodeUpdateTime: Converter.timeStampToDate(payload.lastNodeUpdateTime), healthReport: payload.healthReport || 'N/A', + nodeHealthDetails: payload.nodeHealthDetails || 'N/A', nmStartupTime: payload.nmStartupTime? Converter.timeStampToDate(payload.nmStartupTime) : '', nodeManagerBuildVersion: payload.nodeManagerBuildVersion, hadoopBuildVersion: payload.hadoopBuildVersion diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs index 59cbbf0abd2..13e6b1b5634 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs @@ -67,6 +67,10 @@ Node Health Report {{model.node.healthReport}} + + Node Health Details + {{model.node.nodeHealthDetails}} + {{#if model.node.nmStartupTime}} Node Manager Start Time