diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java index 6024560..e9bcf8d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java @@ -38,6 +38,7 @@ import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.ipc.YarnRPC; import com.google.common.base.Preconditions; @@ -74,6 +75,7 @@ protected static RetryPolicy createRetryPolicy(Configuration conf, exceptionToPolicyMap.put(UnknownHostException.class, retryPolicy); exceptionToPolicyMap.put(RetriableException.class, retryPolicy); exceptionToPolicyMap.put(SocketException.class, retryPolicy); + exceptionToPolicyMap.put(NMNotYetReadyException.class, retryPolicy); return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java index 67f540c..7e51fb3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java @@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.client.NMProxy; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.NMTokenIdentifier; @@ -54,6 +55,7 @@ public TestNMProxy() throws UnsupportedFileSystemException { } int retryCount = 0; + boolean isExpectingNMNotYetReadyException = false; @Before public void setUp() throws Exception { @@ -70,11 +72,26 @@ public void setUp() throws Exception { @Override public StartContainersResponse startContainers( StartContainersRequest requests) throws YarnException, IOException { - if (retryCount < 5) { - retryCount++; - throw new java.net.ConnectException("start container exception"); + if (isExpectingNMNotYetReadyException) { + if (retryCount < 5) { + retryCount++; + try { + return super.startContainers(requests); + } catch (Exception e) { + Assert.assertTrue(e instanceof NMNotYetReadyException); + throw e; + } + } else { + containerManager.setBlockNewContainerRequests(false); + return super.startContainers(requests); + } + } else { + if (retryCount < 5) { + retryCount++; + throw new java.net.ConnectException("start container exception"); + } + return super.startContainers(requests); } - return super.startContainers(requests); } @Override @@ -126,16 +143,26 @@ public void testNMProxyRetry() throws Exception { NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, ugi, YarnRPC.create(conf), address); + retryCount = 0; + isExpectingNMNotYetReadyException = false; proxy.startContainers(allRequests); Assert.assertEquals(5, retryCount); retryCount = 0; + isExpectingNMNotYetReadyException = false; proxy.stopContainers(Records.newRecord(StopContainersRequest.class)); Assert.assertEquals(5, retryCount); retryCount = 0; + isExpectingNMNotYetReadyException = false; proxy.getContainerStatuses(Records .newRecord(GetContainerStatusesRequest.class)); Assert.assertEquals(5, retryCount); + + containerManager.setBlockNewContainerRequests(true); + isExpectingNMNotYetReadyException = true; + retryCount = 0; + proxy.startContainers(allRequests); + Assert.assertEquals(5, retryCount); } }