diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 2ebf79cfae3..47722935857 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1168,6 +1168,15 @@ public static boolean isAclEnabled(Configuration conf) { public static final int DEFAULT_RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC = 60000; + /** + * Whether to enable RM to mark inactive node which is not in include file + * and exclude file as untracked node without configured include path. + */ + public static final String RM_ENABLE_NODE_UNTRACKED_WITHOUT_INCLUDE_PATH = + RM_PREFIX + "enable-node-untracked-without-include-path"; + public static final boolean + DEFAULT_RM_ENABLE_NODE_UNTRACKED_WITHOUT_INCLUDE_PATH = false; + /** * RM proxy users' prefix */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b64efba5dca..f6e5cef93b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -4779,4 +4779,13 @@ yarn.resourcemanager.application-tag-based-placement.force-lowercase true + + + + Whether to enable RM to mark inactive node which is not in include file + and exclude file as untracked node without configured include path. + + yarn.resourcemanager.enable-node-untracked-without-include-path + false + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java index 07d78cb1c1c..ae601b50712 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java @@ -85,6 +85,7 @@ private Timer removalTimer; private int nodeRemovalCheckInterval; private Set gracefulDecommissionableNodes; + private boolean enableNodeUntrackedWithoutIncludePath; public NodesListManager(RMContext rmContext) { super(NodesListManager.class.getName()); @@ -124,6 +125,9 @@ protected void serviceInit(Configuration conf) throws Exception { disableHostsFileReader(ioe); } + enableNodeUntrackedWithoutIncludePath = conf.getBoolean( + YarnConfiguration.RM_ENABLE_NODE_UNTRACKED_WITHOUT_INCLUDE_PATH, + YarnConfiguration.DEFAULT_RM_ENABLE_NODE_UNTRACKED_WITHOUT_INCLUDE_PATH); final int nodeRemovalTimeout = conf.getInt( YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC, @@ -605,7 +609,10 @@ public boolean isUntrackedNode(String hostName) { Set hostsList = hostDetails.getIncludedHosts(); Set excludeList = hostDetails.getExcludedHosts(); - return !hostsList.isEmpty() && !hostsList.contains(hostName) + return (!hostsList.isEmpty() || (enableNodeUntrackedWithoutIncludePath + && (hostDetails.getIncludesFile() == null + || hostDetails.getIncludesFile().isEmpty()))) + && !hostsList.contains(hostName) && !hostsList.contains(ip) && !excludeList.contains(hostName) && !excludeList.contains(ip); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 5e3e67e6eca..a8410c34ad0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hadoop.net.ServerSocketUtil; @@ -3063,4 +3064,43 @@ public void testSystemCredentialsAfterTokenSequenceNoChange() resourceTrackerService.close(); } + + /** + * Decommissioning without pre-configured include hosts file + */ + @Test + public void testDecommissionWithoutIncludeFile() throws Exception { + // init conf: + // (1) set untracked removal timeout to 500ms + // (2) set exclude path (no include path) + // (3) enable node untracked without pre-configured include path + Configuration conf = new Configuration(); + conf.setInt(YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC, + 500); + conf.setBoolean( + YarnConfiguration.RM_ENABLE_NODE_UNTRACKED_WITHOUT_INCLUDE_PATH, true); + conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, + excludeHostFile.getAbsolutePath()); + + rm = new MockRM(conf); + rm.start(); + MockNM nm1 = rm.registerNode("host1:1234", 5120); + MockNM nm2 = rm.registerNode("host2:5678", 10240); + nm1.nodeHeartbeat(true); + nm2.nodeHeartbeat(true); + + // add nm1 into exclude hosts + writeToHostsFile(excludeHostFile, "host1"); + rm.getNodesListManager().refreshNodes(conf); + // wait for nm1 decommissioned + rm.waitForState(nm1.getNodeId(), NodeState.DECOMMISSIONED); + + // remove nm1 from exclude hosts + writeToHostsFile(excludeHostFile, ""); + rm.getNodesListManager().refreshNodes(conf); + // confirmed that nm1 should be removed from inactive nodes in 1 second + GenericTestUtils.waitFor( + () -> rm.getRMContext().getInactiveRMNodes().get(nm1.getNodeId()) + == null, 100, 1000); + } }