diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 52fff14..9519e50 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1239,17 +1239,66 @@ private static void addDeprecatedKeys() { public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS = 2000; - /** Max time to wait to establish a connection to RM */ + /*is replaced by ApplicationMaster/ResourceTracker/ApplicationClient setting*/ + @Deprecated public static final String RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = RM_PREFIX + "connect.max-wait.ms"; - public static final long DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = - 15 * 60 * 1000; - - /** Time interval between each attempt to connect to RM */ + @Deprecated public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = RM_PREFIX + "connect.retry-interval.ms"; - public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS - = 30 * 1000; + + /** Max time for ResourceTracker to establish a connection to RM */ + public static final String + RESOURCETRACKER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + "yarn.resourcetracker.rm.connect.max-wait.ms"; + public static final long + DEFAULT_RESOURCETRACKER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + 10 * 60 * 1000; + + /** Time interval between each attempt in + * ResourceTracker to connect to RM */ + public static final String + RESOURCETRACKER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + "yarn.resourcetracker.rm.connect.retry-interval.ms"; + public static final long + DEFAULT_RESOURCETRACKER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + 30 * 1000; + + /** Max time for ApplicationMasterProtocol + * to wait to establish a connection to RM */ + public static final String + APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + "yarn.applicationmaster.rm.connect.max-wait.ms"; + public static final long + DEFAULT_APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + 10 * 60 * 1000; + + /** Time interval between each attempt in + * ApplicationMasterProtocol to connect to RM */ + public static final String + APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + "yarn.applicationmaster.rm.connect.retry-interval.ms"; + public static final long + DEFAULT_APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + 30 * 1000; + + /** Max time for ApplicationClientProtocol to + * wait to establish a connection to RM */ + public static final String + APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + "yarn.applicationclient.rm.connect.max-wait.ms"; + public static final long + DEFAULT_APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + 15 * 60 * 1000; + + /** Time interval between each attempt in + * ApplicationClientProtocol to connect to RM */ + public static final String + APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + "yarn.applicationclient.rm.connect.retry-interval.ms"; + public static final long + DEFAULT_APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + 30 * 1000; /** * CLASSPATH for YARN applications. A comma-separated list of CLASSPATH diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java index 28628f3..04945d6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java @@ -88,7 +88,7 @@ protected InetSocketAddress getRMAddress( YarnConfiguration conf = (configuration instanceof YarnConfiguration) ? (YarnConfiguration) configuration : new YarnConfiguration(configuration); - RetryPolicy retryPolicy = createRetryPolicy(conf); + RetryPolicy retryPolicy = createRetryPolicy(conf, protocol); if (HAUtil.isHAEnabled(conf)) { RMFailoverProxyProvider provider = instance.createRMFailoverProxyProvider(conf, protocol); @@ -119,7 +119,7 @@ protected InetSocketAddress getRMAddress( @Deprecated public static T createRMProxy(final Configuration conf, final Class protocol, InetSocketAddress rmAddress) throws IOException { - RetryPolicy retryPolicy = createRetryPolicy(conf); + RetryPolicy retryPolicy = createRetryPolicy(conf, protocol); T proxy = RMProxy.getProxy(conf, protocol, rmAddress); LOG.info("Connecting to ResourceManager at " + rmAddress); return (T) RetryProxy.create(protocol, proxy, retryPolicy); @@ -164,36 +164,58 @@ public T run() { return provider; } + private static long getRmConnectWaitMS(Configuration conf, final Class protocol) { + if (protocol.getSimpleName().equals("ApplicationMasterProtocol")) { + return conf.getLong(YarnConfiguration.APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + YarnConfiguration.DEFAULT_APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); + } else if (protocol.getSimpleName().equals("ApplicationClientProtocol")) { + return conf.getLong(YarnConfiguration.APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + YarnConfiguration.DEFAULT_APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); + } else if (protocol.getSimpleName().equals("ResourceTracker")) { + return conf.getLong(YarnConfiguration.RESOURCETRACKER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + YarnConfiguration.DEFAULT_RESOURCETRACKER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); + } else { + throw new YarnRuntimeException("unknown protocol," + protocol.getName()); + } + } + + private static long getRmConnectRetryIntervalMS(Configuration conf, final Class protocol) { + if (protocol.getSimpleName().equals("ApplicationMasterProtocol")) { + return conf.getLong(YarnConfiguration.APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_APPLICATIONMASTER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); + } else if (protocol.getSimpleName().equals("ApplicationClientProtocol")) { + return conf.getLong(YarnConfiguration.APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_APPLICATIONCLIENT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); + } else if (protocol.getSimpleName().equals("ResourceTracker")) { + return conf.getLong(YarnConfiguration.RESOURCETRACKER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_RESOURCETRACKER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); + } else { + throw new YarnRuntimeException("unknown protocol," + protocol.getName()); + } + } + /** * Fetch retry policy from Configuration */ @Private @VisibleForTesting - public static RetryPolicy createRetryPolicy(Configuration conf) { - long rmConnectWaitMS = - conf.getLong( - YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, - YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); - long rmConnectionRetryIntervalMS = - conf.getLong( - YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, - YarnConfiguration - .DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); + public static RetryPolicy createRetryPolicy(Configuration conf, final Class protocol) { + long rmConnectWaitMS = getRmConnectWaitMS(conf, protocol); + long rmConnectionRetryIntervalMS = getRmConnectRetryIntervalMS(conf, protocol); boolean waitForEver = (rmConnectWaitMS == -1); if (!waitForEver) { if (rmConnectWaitMS < 0) { throw new YarnRuntimeException("Invalid Configuration. " - + YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS + + "rm.connect.max-wait.ms for " + protocol.getSimpleName() + " can be -1, but can not be other negative numbers"); } // try connect once if (rmConnectWaitMS < rmConnectionRetryIntervalMS) { - LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS - + " is smaller than " - + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS - + ". Only try connect once."); + LOG.warn("rm.connect.max-wait.ms for" + protocol.getSimpleName() + + " is smaller than rm.connect.retry-interval.ms for" + + protocol.getSimpleName() + ". Only try connect once."); rmConnectWaitMS = 0; } } @@ -226,7 +248,7 @@ public static RetryPolicy createRetryPolicy(Configuration conf) { if (rmConnectionRetryIntervalMS < 0) { throw new YarnRuntimeException("Invalid Configuration. " + - YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + + "rm.connect.retry-interval.ms for " + protocol.getSimpleName() + " should not be negative."); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 1dd88bd..24e9162 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -160,16 +160,44 @@ - Maximum time to wait to establish connection to + Maximum time for ResourceTracker establish connection to ResourceManager. - yarn.resourcemanager.connect.max-wait.ms + yarn.resourcetracker.rm.connect.max-wait.ms + 600000 + + + + How often for ResourceTracker to try connecting to the + ResourceManager. + yarn.resourcetracker.rm.connect.retry-interval.ms + 30000 + + + + Maximum time for ApplicationMasterProtocol to wait to establish + connection to ResourceManager. + yarn.applicationmaster.rm.connect.max-wait.ms + 600000 + + + + How often for ApplicationMasterProtocol to try connecting to the + ResourceManager. + yarn.applicationmaster.rm.connect.retry-interval.ms + 30000 + + + + Maximum time for ApplicationClientProtocol to wait to establish + connection to ResourceManager. + yarn.applicationclient.rm.connect.max-wait.ms 900000 - How often to try connecting to the + How often for ApplicationClientProtocol to try connecting to the ResourceManager. - yarn.resourcemanager.connect.retry-interval.ms + yarn.applicationclient.rm.connect.retry-interval.ms 30000 diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index fc404de..5149bc9 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -389,7 +389,7 @@ protected void serviceStart() throws Exception { @Override protected ResourceTracker getRMClient() throws IOException { - RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf); + RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf, ResourceTracker.class); resourceTracker = (ResourceTracker) RetryProxy.create(ResourceTracker.class, new MyResourceTracker6(rmStartIntervalMS, rmNeverStart), @@ -422,7 +422,7 @@ public MyNodeStatusUpdater5(Context context, Dispatcher dispatcher, @Override protected ResourceTracker getRMClient() { - RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf); + RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf, ResourceTracker.class); return (ResourceTracker) RetryProxy.create(ResourceTracker.class, resourceTracker, retryPolicy); } @@ -1194,9 +1194,9 @@ public void testNMConnectionToRM() throws Exception { final long connectionRetryIntervalMs = 1000; //Waiting for rmStartIntervalMS, RM will be started final long rmStartIntervalMS = 2*1000; - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + conf.setLong(YarnConfiguration.RESOURCETRACKER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitMs); - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + conf.setLong(YarnConfiguration.RESOURCETRACKER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs); //Test NM try to connect to RM Several times, but finally fail @@ -1392,10 +1392,10 @@ public void testNodeStatusUpdaterRetryAndNMShutdown() final long connectionWaitSecs = 1000; final long connectionRetryIntervalMs = 1000; YarnConfiguration conf = createNMConfig(); - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + conf.setLong(YarnConfiguration.RESOURCETRACKER_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitSecs); conf.setLong(YarnConfiguration - .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + .RESOURCETRACKER_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs); conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000); conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);