diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 9701775..52cdf88 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -520,11 +520,11 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeStartedEvent startEvent = (RMNodeStartedEvent) event; List containers = null; - String host = rmNode.nodeId.getHost(); - if (rmNode.context.getInactiveRMNodes().containsKey(host)) { + String node = rmNode.toString(); + if (rmNode.context.getInactiveRMNodes().containsKey(node)) { // Old node rejoining - RMNode previouRMNode = rmNode.context.getInactiveRMNodes().get(host); - rmNode.context.getInactiveRMNodes().remove(host); + RMNode previouRMNode = rmNode.context.getInactiveRMNodes().get(node); + rmNode.context.getInactiveRMNodes().remove(node); rmNode.updateMetricsForRejoinedNode(previouRMNode.getState()); } else { // Increment activeNodes explicitly because this is a new node. @@ -731,7 +731,7 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { rmNode.context.getRMNodes().remove(rmNode.nodeId); LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now " + finalState); - rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode); + rmNode.context.getInactiveRMNodes().put(rmNode.toString(), rmNode); //Update the metrics rmNode.updateMetricsForDeactivatedNode(initialState, finalState); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 1834b6a..06e005c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -292,7 +292,7 @@ public NodeInfo getNode(@PathParam("nodeId") String nodeId) { RMNode ni = this.rm.getRMContext().getRMNodes().get(nid); boolean isInactive = false; if (ni == null) { - ni = this.rm.getRMContext().getInactiveRMNodes().get(nid.getHost()); + ni = this.rm.getRMContext().getInactiveRMNodes().get(nid.toString()); if (ni == null) { throw new NotFoundException("nodeId, " + nodeId + ", is not found"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java index c6da3fd..7da945f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java @@ -307,6 +307,47 @@ public void testRunningExpire() { } @Test + public void testRunningExpireMultiple() { + RMNodeImpl node1 = getRunningNode(null, 10001); + RMNodeImpl node2 = getRunningNode(null, 10002); + ClusterMetrics cm = ClusterMetrics.getMetrics(); + int initialActive = cm.getNumActiveNMs(); + int initialLost = cm.getNumLostNMs(); + int initialUnhealthy = cm.getUnhealthyNMs(); + int initialDecommissioned = cm.getNumDecommisionedNMs(); + int initialRebooted = cm.getNumRebootedNMs(); + node1.handle(new RMNodeEvent(node1.getNodeID(), RMNodeEventType.EXPIRE)); + Assert.assertEquals("Active Nodes", initialActive - 1, cm.getNumActiveNMs()); + Assert.assertEquals("Lost Nodes", initialLost + 1, cm.getNumLostNMs()); + Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, + cm.getUnhealthyNMs()); + Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, + cm.getNumDecommisionedNMs()); + Assert.assertEquals("Rebooted Nodes", initialRebooted, + cm.getNumRebootedNMs()); + Assert.assertEquals(NodeState.LOST, node1.getState()); + Assert.assertTrue("Node " + node1.toString() + " should be inactive", + rmContext.getInactiveRMNodes().containsKey(node1.toString())); + Assert.assertFalse("Node " + node2.toString() + " should not be inactive", + rmContext.getInactiveRMNodes().containsKey(node2.toString())); + + node2.handle(new RMNodeEvent(node1.getNodeID(), RMNodeEventType.EXPIRE)); + Assert.assertEquals("Active Nodes", initialActive - 2, cm.getNumActiveNMs()); + Assert.assertEquals("Lost Nodes", initialLost + 2, cm.getNumLostNMs()); + Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, + cm.getUnhealthyNMs()); + Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, + cm.getNumDecommisionedNMs()); + Assert.assertEquals("Rebooted Nodes", initialRebooted, + cm.getNumRebootedNMs()); + Assert.assertEquals(NodeState.LOST, node2.getState()); + Assert.assertTrue("Node " + node1.toString() + " should be inactive", + rmContext.getInactiveRMNodes().containsKey(node1.toString())); + Assert.assertTrue("Node " + node2.toString() + " should be inactive", + rmContext.getInactiveRMNodes().containsKey(node2.toString())); + } + + @Test public void testUnhealthyExpire() { RMNodeImpl node = getUnhealthyNode(); ClusterMetrics cm = ClusterMetrics.getMetrics(); @@ -458,14 +499,18 @@ public void testUpdateHeartbeatResponseForCleanup() { } private RMNodeImpl getRunningNode() { - return getRunningNode(null); + return getRunningNode(null, 0); } private RMNodeImpl getRunningNode(String nmVersion) { - NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); + return getRunningNode(nmVersion, 0); + } + + private RMNodeImpl getRunningNode(String nmVersion, int port) { + NodeId nodeId = BuilderUtils.newNodeId("localhost", port); Resource capability = Resource.newInstance(4096, 4); - RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0, - null, capability, nmVersion); + RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, + capability, nmVersion); node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null)); Assert.assertEquals(NodeState.RUNNING, node.getState()); return node; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java index f507e17..92c6ecb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java @@ -263,8 +263,8 @@ public void testNodesQueryStateLost() throws JSONException, Exception { assertEquals("incorrect number of elements", 2, nodeArray.length()); for (int i = 0; i < nodeArray.length(); ++i) { JSONObject info = nodeArray.getJSONObject(i); - String host = info.get("id").toString().split(":")[0]; - RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get(host); + String node = info.get("id").toString(); + RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get(node); WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", "", info.getString("nodeHTTPAddress")); WebServicesTestUtils.checkStringMatch("state", rmNode.getState() @@ -295,7 +295,7 @@ public void testSingleNodeQueryStateLost() throws JSONException, Exception { assertEquals("Incorrect Node Information.", "h2:1234", id); - RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get("h2"); + RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get("h2:1234"); WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", "", info.getString("nodeHTTPAddress")); WebServicesTestUtils.checkStringMatch("state",