diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 3ce6416..611f988 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -419,7 +419,12 @@ private void updateMetricsForRejoinedNode(NodeState previousNodeState) { metrics.decrNumRebootedNMs(); break; case DECOMMISSIONED: - metrics.decrDecommisionedNMs(); + Set ecludedHosts = + context.getNodesListManager().getHostsReader().getExcludedHosts(); + if (!ecludedHosts.contains(hostName) + && !ecludedHosts.contains(NetUtils.normalizeHostName(hostName))) { + metrics.decrDecommisionedNMs(); + } break; case UNHEALTHY: metrics.decrNumUnhealthyNMs(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 4827620..1eef5e4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -156,18 +156,27 @@ public void testDecommissionWithExcludeHosts() throws Exception { .getAbsolutePath()); writeToHostsFile(""); - rm = new MockRM(conf); + + final DrainDispatcher dispatcher = new DrainDispatcher(); + rm = new MockRM(conf) { + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); + dispatcher.await(); int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + dispatcher.await(); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); @@ -178,6 +187,7 @@ public void testDecommissionWithExcludeHosts() throws Exception { nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue("The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); @@ -185,6 +195,20 @@ public void testDecommissionWithExcludeHosts() throws Exception { nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue("The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + + dispatcher.await(); + + writeToHostsFile(""); + rm.getNodesListManager().refreshNodes(conf); + checkDecommissionedNMCount(rm, metricCount); + + nm3 = rm.registerNode("localhost:4433", 1024); + dispatcher.await(); + nodeHeartbeat = nm3.nodeHeartbeat(true); + dispatcher.await(); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + + checkDecommissionedNMCount(rm, metricCount); } /** @@ -630,8 +654,6 @@ private void checkDecommissionedNMCount(MockRM rm, int count) wait(100); } } - Assert.assertEquals(count, ClusterMetrics.getMetrics() - .getNumDecommisionedNMs()); Assert.assertEquals("The decommisioned metrics are not updated", count, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); }