diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 05efc69..a6b993d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -897,6 +897,10 @@ private NMDistributedNodeLabelsHandler( private final NodeLabelsProvider nodeLabelsProvider; private Set previousNodeLabels; private boolean updatedLabelsSentToRM; + private long lastNodeLabelSendFailMills = 0l; + // TODO : Need to check which conf to use.Currently setting as 1 min + // Planning to use DEFAULT_NM_NODE_LABELS_PROVIDER_FETCH_INTERVAL_MS/3 + private long failLabelResendInterval = 60000; @Override public Set getNodeLabelsForRegistration() { @@ -938,7 +942,8 @@ public String verifyRMRegistrationResponseForNodeLabels( // take some action only on modification of labels boolean areNodeLabelsUpdated = nodeLabelsForHeartbeat.size() != previousNodeLabels.size() - || !previousNodeLabels.containsAll(nodeLabelsForHeartbeat); + || !previousNodeLabels.containsAll(nodeLabelsForHeartbeat) + || checkResendLabelOnFailure(); updatedLabelsSentToRM = false; if (areNodeLabelsUpdated) { @@ -980,16 +985,33 @@ private void validateNodeLabels(Set nodeLabelsForHeartbeat) } } + /* + * In case of failure when RM doesnt accept labels need to resend Labels to + * RM. This method checks whether we need to resend + */ + public boolean checkResendLabelOnFailure() { + if (lastNodeLabelSendFailMills > 0l) { + long lastFailTimePassed = + System.currentTimeMillis() - lastNodeLabelSendFailMills; + if (lastFailTimePassed > failLabelResendInterval) { + return true; + } + } + return false; + } + @Override public void verifyRMHeartbeatResponseForNodeLabels( NodeHeartbeatResponse response) { if (updatedLabelsSentToRM) { if (response.getAreNodeLabelsAcceptedByRM()) { + lastNodeLabelSendFailMills = 0l; LOG.info("Node Labels {" + StringUtils.join(",", previousNodeLabels) + "} were Accepted by RM "); } else { // case where updated labels from NodeLabelsProvider is sent to RM and // RM rejected the labels + lastNodeLabelSendFailMills = System.currentTimeMillis(); LOG.error( "NM node labels {" + StringUtils.join(",", previousNodeLabels) + "} were not accepted by RM and message from RM : "