Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (revision 1564637) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (working copy) @@ -226,11 +226,15 @@ new Thread() { @Override public void run() { - LOG.info("Notifying ContainerManager to block new container-requests"); - containerManager.setBlockNewContainerRequests(true); - LOG.info("Cleaning up running containers on resync"); - containerManager.cleanupContainersOnNMResync(); - ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater(); + try { + LOG.info("Notifying ContainerManager to block new container-requests"); + containerManager.setBlockNewContainerRequests(true); + LOG.info("Cleaning up running containers on resync"); + containerManager.cleanupContainersOnNMResync(); + ((NodeStatusUpdaterImpl) nodeStatusUpdater).rebootNodeStatusUpdater(); + } catch (YarnRuntimeException e) { + shutDown(); + } } }.start(); } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java (revision 1564637) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java (working copy) @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; @@ -137,6 +138,24 @@ Assert.assertFalse(assertionFailedInThread.get()); nm.stop(); } + + @SuppressWarnings("unchecked") + @Test + public void testNMshutdownWhenResyncThrowException() throws IOException, + InterruptedException, YarnException { + NodeManager nm = new TestNodeManager3(); + YarnConfiguration conf = createNMConfig(); + nm.init(conf); + nm.start(); + Assert.assertEquals(1, ((TestNodeManager3) nm).getNMRegistrationCount()); + nm.getNMDispatcher().getEventHandler() + .handle(new NodeManagerEvent(NodeManagerEventType.RESYNC)); + try { + Thread.sleep(2 * 1000); + } catch (InterruptedException e) { + } + Assert.assertTrue(assertionFailedInThread.get()); + } private YarnConfiguration createNMConfig() { YarnConfiguration conf = new YarnConfiguration(); @@ -322,4 +341,43 @@ } } } + + class TestNodeManager3 extends NodeManager { + + private int registrationCount=0; + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + return new TestNodeStatusUpdaterImpl3(context, dispatcher, + healthChecker, metrics); + } + + public int getNMRegistrationCount() { + return registrationCount; + } + + @Override + protected void shutDown() { + assertionFailedInThread.set(true); + } + + class TestNodeStatusUpdaterImpl3 extends MockNodeStatusUpdater { + + public TestNodeStatusUpdaterImpl3(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + } + + @Override + protected void registerWithRM() throws YarnException, IOException { + super.registerWithRM(); + registrationCount++; + } + + @Override + protected void rebootNodeStatusUpdater() { + throw new YarnRuntimeException("Registration with RM failed."); + } + } + } }