diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java
index 2bf9c93..1e638f2 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/NMClientImpl.java
@@ -65,6 +65,17 @@
* continue to run even after this client is stopped and till the application
* runs at which point ResourceManager will forcefully kill them.
*
+ *
+ *
+ * Note that the blocking APIs ensure the RPC calls to NodeManager
+ * are executed immediately, and the responses are received before these APIs
+ * return. However, when {@link #startContainer} or {@link #stopContainer}
+ * returns, NodeManager may still need some time to make the
+ * container actually started or stopped because of its asynchronous
+ * implementation. Therefore, {@link #getContainerStatus} is likely to return a
+ * transit container status if it is executed immediately after
+ * {@link #startContainer} or {@link #stopContainer}.
+ *
*/
public class NMClientImpl extends AbstractService implements NMClient {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java
index 3b4439e..1e3561a 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestNMClient.java
@@ -269,16 +269,24 @@ private void testContainerManagement(NMClientImpl nmClient,
// leave one container unclosed
if (++i < size) {
- try {
- ContainerStatus status = nmClient.getContainerStatus(container.getId(),
- container.getNodeId(), container.getContainerToken());
- // verify the container is started and in good shape
- assertEquals(container.getId(), status.getContainerId());
- assertEquals(ContainerState.RUNNING, status.getState());
- assertEquals("", status.getDiagnostics());
- assertEquals(-1000, status.getExitStatus());
- } catch (YarnRemoteException e) {
- fail("Exception is not expected");
+ // NodeManager may still need some time to make the container started
+ while (true) {
+ try {
+ ContainerStatus status = nmClient.getContainerStatus
+ (container.getId(), container.getNodeId(),
+ container.getContainerToken());
+ if (status.getState() == ContainerState.RUNNING) {
+ assertEquals(container.getId(), status.getContainerId());
+ assertEquals("", status.getDiagnostics());
+ assertEquals(-1000, status.getExitStatus());
+ break;
+ }
+ Thread.sleep(100);
+ } catch (YarnRemoteException e) {
+ fail("Exception is not expected");
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
}
try {
@@ -289,17 +297,27 @@ private void testContainerManagement(NMClientImpl nmClient,
}
// getContainerStatus can be called after stopContainer
- try {
- ContainerStatus status = nmClient.getContainerStatus(
- container.getId(), container.getNodeId(),
- container.getContainerToken());
- assertEquals(container.getId(), status.getContainerId());
- assertEquals(ContainerState.RUNNING, status.getState());
- assertTrue("" + i, status.getDiagnostics().contains(
- "Container killed by the ApplicationMaster."));
- assertEquals(-1000, status.getExitStatus());
- } catch (YarnRemoteException e) {
- fail("Exception is not expected");
+ while (true) {
+ try {
+ ContainerStatus status = nmClient.getContainerStatus(
+ container.getId(), container.getNodeId(),
+ container.getContainerToken());
+ // NodeManager may still need some time to ensure the container
+ // stopped
+ if (status.getState() == ContainerState.COMPLETE) {
+ assertEquals(container.getId(), status.getContainerId());
+ assertTrue("" + i + ": " + status.getDiagnostics(),
+ status.getDiagnostics().contains(
+ "Container killed by the ApplicationMaster."));
+ assertEquals(143, status.getExitStatus());
+ break;
+ }
+ Thread.sleep(100);
+ } catch (YarnRemoteException e) {
+ fail("Exception is not expected");
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
}
}
}