diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 5d75a21..e89ff36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -143,6 +143,16 @@ private static void addDeprecatedKeys() { RM_PREFIX + "client.thread-count"; public static final int DEFAULT_RM_CLIENT_THREAD_COUNT = 50; + /** Number of threads ApplicationMasterLauncher uses.*/ + public static final String RM_AMLAUNCHER_THREAD_COUNT = + RM_PREFIX + "amlauncher.thread-count"; + public static final int DEFAULT_RM_AMLAUNCHER_THREAD_COUNT = 50; + + /** Retry times on socket timeout for container management.*/ + public static final String RM_CONTAINER_MANAGEMENT_RETRIES = + RM_PREFIX + "container-management.retries"; + public static final int DEFAULT_RM_CONTAINER_MANAGEMENT_RETRIES = 10; + /** The Kerberos principal for the resource manager.*/ public static final String RM_PRINCIPAL = RM_PREFIX + "principal"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java index 15397e3..f26708f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java @@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; @@ -73,10 +74,15 @@ public ContainerManagementProtocolPBClientImpl(long clientVersion, UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); int expireIntvl = conf.getInt(NM_COMMAND_TIMEOUT, DEFAULT_COMMAND_TIMEOUT); + Configuration newConf = new YarnConfiguration(conf); + newConf.setInt(CommonConfigurationKeysPublic. + IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + conf.getInt(YarnConfiguration.RM_CONTAINER_MANAGEMENT_RETRIES, + YarnConfiguration.DEFAULT_RM_CONTAINER_MANAGEMENT_RETRIES)); proxy = (ContainerManagementProtocolPB) RPC.getProxy(ContainerManagementProtocolPB.class, - clientVersion, addr, ugi, conf, - NetUtils.getDefaultSocketFactory(conf), expireIntvl); + clientVersion, addr, ugi, newConf, + NetUtils.getDefaultSocketFactory(newConf), expireIntvl); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java index 5fc39fd..6715455 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java @@ -24,7 +24,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; @@ -34,7 +36,7 @@ EventHandler { private static final Log LOG = LogFactory.getLog( ApplicationMasterLauncher.class); - private final ThreadPoolExecutor launcherPool; + private ThreadPoolExecutor launcherPool; private LauncherThread launcherHandlingThread; private final BlockingQueue masterEvents @@ -45,12 +47,20 @@ public ApplicationMasterLauncher(RMContext context) { super(ApplicationMasterLauncher.class.getName()); this.context = context; - this.launcherPool = new ThreadPoolExecutor(10, 10, 1, - TimeUnit.HOURS, new LinkedBlockingQueue()); this.launcherHandlingThread = new LauncherThread(); } @Override + protected void serviceInit(Configuration conf) throws Exception { + int threadCount = conf.getInt( + YarnConfiguration.RM_AMLAUNCHER_THREAD_COUNT, + YarnConfiguration.DEFAULT_RM_AMLAUNCHER_THREAD_COUNT); + this.launcherPool = new ThreadPoolExecutor(threadCount, threadCount, 1, + TimeUnit.HOURS, new LinkedBlockingQueue()); + super.serviceInit(conf); + } + + @Override protected void serviceStart() throws Exception { launcherHandlingThread.start(); super.serviceStart();