diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java index 1e8b98a..53b7fad 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java @@ -132,9 +132,24 @@ public void refreshNodes(Configuration yarnConf) throws IOException, refreshHostsReader(yarnConf); for (NodeId nodeId: rmContext.getRMNodes().keySet()) { - if (!isValidNode(nodeId.getHost())) { + if (!isValidNode(nodeId.getHost()) && + !isInvalidAndAbsent(nodeId.getHost())) { this.rmContext.getDispatcher().getEventHandler().handle( new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION)); + } else if (isInvalidAndAbsent(nodeId.getHost())) { + //this.rmContext.getRMNodes().remove(nodeId); + this.rmContext.getDispatcher().getEventHandler().handle( + new RMNodeEvent(nodeId,RMNodeEventType.SHUTDOWN)); + } + } + for(NodeId nodeId: rmContext.getInactiveRMNodes().keySet()) { + RMNode rmNode = rmContext.getInactiveRMNodes().get(nodeId); + if(isInvalidAndAbsent(nodeId.getHost()) && + rmNode.getState() + == NodeState.DECOMMISSIONED) { + rmContext.getInactiveRMNodes().remove(nodeId); + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + metrics.decrDecommisionedNMs(); } } } @@ -290,6 +305,14 @@ public boolean isValidNode(String hostName) { } } + public boolean isInvalidAndAbsent(String hostName) { + if (!includesFile.isEmpty() && !hostsReader.getHosts().contains(hostName) + && !hostsReader.getExcludedHosts().contains(hostName)) { + return true; + } + return false; + } + /** * Provides the currently unusable nodes. Copies it into provided collection. * @param unUsableNodes diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 3638a19..b560db1 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -280,7 +280,8 @@ public RegisterNodeManagerResponse registerNodeManager( } // Check if this node is a 'valid' node - if (!this.nodesListManager.isValidNode(host)) { + if (!this.nodesListManager.isValidNode(host) || + this.nodesListManager.isInvalidAndAbsent(host)) { String message = "Disallowed NodeManager from " + host + ", Sending SHUTDOWN signal to the NodeManager."; @@ -398,8 +399,9 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) // 1. Check if it's a valid (i.e. not excluded) node, if not, see if it is // in decommissioning. - if (!this.nodesListManager.isValidNode(nodeId.getHost()) - && !isNodeInDecommissioning(nodeId)) { + if ((!this.nodesListManager.isValidNode(nodeId.getHost()) + && !isNodeInDecommissioning(nodeId)) || + this.nodesListManager.isInvalidAndAbsent(nodeId.getHost())) { String message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId.getHost(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index e0d27d6..d99c646 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -915,12 +915,18 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { public static void deactivateNode(RMNodeImpl rmNode, NodeState finalState) { reportNodeUnusable(rmNode, finalState); - - // Deactivate the node - rmNode.context.getRMNodes().remove(rmNode.nodeId); - LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now " - + finalState); - rmNode.context.getInactiveRMNodes().put(rmNode.nodeId, rmNode); + if(!rmNode.context.getNodesListManager().isInvalidAndAbsent( + rmNode.nodeId.getHost())) { + // Deactivate the node + rmNode.context.getRMNodes().remove(rmNode.nodeId); + LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now " + + finalState); + rmNode.context.getInactiveRMNodes().put(rmNode.nodeId, rmNode); + } else { + rmNode.context.getRMNodes().remove(rmNode.nodeId); + LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now " + + finalState); + } } /** diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index e42ed91..d62bb57 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -159,6 +159,51 @@ public void testDecommissionWithIncludeHosts() throws Exception { } /** + * Remove a node from all lists and check if its forgotten + */ + @Test + public void testNodeRemoval() throws Exception { + + writeToHostsFile("localhost", "host1", "host2"); + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile + .getAbsolutePath()); + + rm = new MockRM(conf); + rm.start(); + + MockNM nm1 = rm.registerNode("host1:1234", 5120); + MockNM nm2 = rm.registerNode("host2:5678", 10240); + MockNM nm3 = rm.registerNode("localhost:4433", 1024); + + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + assert(metrics != null); + + NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + nodeHeartbeat = nm3.nodeHeartbeat(true); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + + // To test that IPs also work + String ip = NetUtils.normalizeHostName("localhost"); + writeToHostsFile("host1", ip); + + rm.getNodesListManager().refreshNodes(conf); + + nodeHeartbeat = nm1.nodeHeartbeat(true); + Thread.sleep(100); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + Assert.assertTrue("Node should not be in active node list", + !rm.getRMContext().getRMNodes().containsKey(nm2.getNodeId())); + Assert.assertTrue("Node should not be in inactive node list", + !rm.getRMContext().getInactiveRMNodes().containsKey(nm2.getNodeId())); + int metricCount = metrics.getNumDecommisionedNMs(); + Assert.assertTrue("The node should not be decommissioned!", + metricCount == 0); + } + /** * Decommissioning using a pre-configured exclude hosts file */ @Test