diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index e2548873a6..d56b3eaa3d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -1393,16 +1393,12 @@ public static String getExitCodeFile(String pidFile) { private void recordContainerLogDir(ContainerId containerId, String logDir) throws IOException{ container.setLogDir(logDir); - if (container.isRetryContextSet()) { - context.getNMStateStore().storeContainerLogDir(containerId, logDir); - } + context.getNMStateStore().storeContainerLogDir(containerId, logDir); } private void recordContainerWorkDir(ContainerId containerId, String workDir) throws IOException{ container.setWorkDir(workDir); - if (container.isRetryContextSet()) { - context.getNMStateStore().storeContainerWorkDir(containerId, workDir); - } + context.getNMStateStore().storeContainerWorkDir(containerId, workDir); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 6eea77b5a1..0f6bd354bd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -61,6 +61,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersRequestPBImpl; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; @@ -87,6 +88,8 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceManagerConstants; +import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.CMgrSignalContainersEvent; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; @@ -103,6 +106,11 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; +import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; +import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.junit.Assert; import org.junit.Before; @@ -1021,6 +1029,102 @@ public Boolean get() { } } + @Test + public void testLocalingResourceWhileContainerRunningAfterNMRestart() + throws Exception { + conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true); + NMStateStoreService stateStore = new NMMemoryStateStoreService(); + stateStore.init(conf); + stateStore.start(); + context = new NodeManager.NMContext(new NMContainerTokenSecretManager( + conf), new NMTokenSecretManagerInNM(), null, + new ApplicationACLsManager(conf), stateStore, false, + conf) { + public int getHttpPort() { + return HTTP_PORT; + } + @Override + public ContainerExecutor getContainerExecutor() { + return exec; + } + }; + // simulate registration with RM + MasterKey masterKey = new MasterKeyPBImpl(); + masterKey.setKeyId(123); + masterKey.setBytes(ByteBuffer.wrap(new byte[] { new Integer(123) + .byteValue() })); + context.getContainerTokenSecretManager().setMasterKey(masterKey); + context.getNMTokenSecretManager().setMasterKey(masterKey); + + // Real del service + delSrvc = new DeletionService(exec); + delSrvc.init(conf); + ((NodeManager.NMContext)context).setContainerExecutor(exec); + ((NodeManager.NMContext)context).setContainerManager(containerManager); + containerManager = createContainerManager(delSrvc); + containerManager.init(conf); + containerManager.start(); + // set up local resources + Map localResource = + setupLocalResources("file", "symLink1"); + ContainerLaunchContext context = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + context.setLocalResources(localResource); + + // a long running container - sleep + context.setCommands(Arrays.asList("sleep 15")); + ContainerId cId = createContainerId(0); + + // start the container + StartContainerRequest scRequest = StartContainerRequest.newInstance(context, + createContainerToken(cId, DUMMY_RM_IDENTIFIER, this.context.getNodeId(), + user, this.context.getContainerTokenSecretManager())); + StartContainersRequest allRequests = + StartContainersRequest.newInstance(Arrays.asList(scRequest)); + containerManager.startContainers(allRequests); + BaseContainerManagerTest + .waitForContainerState(containerManager, cId, ContainerState.RUNNING); + + BaseContainerManagerTest.waitForApplicationState(containerManager, + cId.getApplicationAttemptId().getApplicationId(), + ApplicationState.RUNNING); + checkResourceLocalized(cId, "symLink1"); + + // restart and localize a new resource + containerManager.stop(); + containerManager = createContainerManager(delSrvc); + containerManager.init(conf); + containerManager.start(); + assertEquals(1, this.context.getApplications().size()); + BaseContainerManagerTest + .waitForContainerState(containerManager, cId, ContainerState.RUNNING); + BaseContainerManagerTest.waitForApplicationState(containerManager, + cId.getApplicationAttemptId().getApplicationId(), + ApplicationState.RUNNING); + // Localize new local resources while container is running + Map localResource2 = + setupLocalResources("file2", "symLink2"); + ResourceLocalizationRequest request = + ResourceLocalizationRequest.newInstance(cId, localResource2); + // Container work dir should be recovered and not null + Assert.assertNotNull(this.context.getContainers().get(cId).getWorkDir()); + containerManager.localize(request); + + // Verify resource is localized and symlink is created. + GenericTestUtils.waitFor(new Supplier() { + public Boolean get() { + try { + checkResourceLocalized(cId, "symLink2"); + return true; + } catch (Throwable e) { + return false; + } + } + }, 500, 20000); + containerManager.stop(); + } + private void checkResourceLocalized(ContainerId containerId, String symLink) { String appId = containerId.getApplicationAttemptId().getApplicationId().toString();