diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index b996952..4343f0c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -688,6 +688,10 @@ private synchronized void addNode(RMNode node) { private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = nodes.get(rmNode.getNodeID()); + if (node == null) { + LOG.warn(rmNode.getNodeAddress() + " has already been removed."); + return; + } Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability()); updateRootQueueMetrics(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index bfda5e5..dbee521 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -270,6 +270,33 @@ public void testAggregateCapacityTracking() throws Exception { } @Test + public void testNodeDeleteAfterDelete() throws Exception { + // Add a node + RMNode node1 = + MockNodes + .newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + assertEquals(1, scheduler.getNumClusterNodes()); + + // Add another node + RMNode node2 = + MockNodes.newNodeInfo(1, Resources.createResource(512), 2, "127.0.0.2"); + NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); + scheduler.handle(nodeEvent2); + assertEquals(2, scheduler.getNumClusterNodes()); + + // Remove the first node + NodeRemovedSchedulerEvent nodeEvent3 = new NodeRemovedSchedulerEvent(node1); + scheduler.handle(nodeEvent3); + assertEquals(1, scheduler.getNumClusterNodes()); + + // Remove the first node again + scheduler.handle(nodeEvent3); + assertEquals(1, scheduler.getNumClusterNodes()); + } + + @Test public void testSimpleFairShareCalculation() { // Add one big node (only care about aggregate capacity) RMNode node1 =