diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index fa778994c8..2aacf8a696 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -1849,17 +1849,13 @@ public static String getExitCodeFile(String pidFile) { private void recordContainerLogDir(ContainerId containerId, String logDir) throws IOException{ container.setLogDir(logDir); - if (container.isRetryContextSet()) { - context.getNMStateStore().storeContainerLogDir(containerId, logDir); - } + context.getNMStateStore().storeContainerLogDir(containerId, logDir); } private void recordContainerWorkDir(ContainerId containerId, String workDir) throws IOException{ container.setWorkDir(workDir); - if (container.isRetryContextSet()) { - context.getNMStateStore().storeContainerWorkDir(containerId, workDir); - } + context.getNMStateStore().storeContainerWorkDir(containerId, workDir); } protected Path getContainerWorkDir() throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 93d0afb118..ccd9c10330 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.IOException; +import java.io.PrintWriter; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -60,6 +61,10 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -90,6 +95,8 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.junit.After; @@ -499,4 +506,64 @@ public static ContainerId createContainerId(int cId, int aId) { ContainerId.newContainerId(appAttemptId, cId); return containerId; } + + Map setupLocalResources(String fileName, + String symLink) throws Exception { + // ////// Create the resources for the container + File dir = new File(tmpDir, "dir"); + dir.mkdirs(); + File file = new File(dir, fileName); + PrintWriter fileWriter = new PrintWriter(file); + fileWriter.write("Hello World!"); + fileWriter.close(); + + URL resourceURL = URL.fromPath(FileContext.getLocalFSFileContext() + .makeQualified(new Path(file.getAbsolutePath()))); + LocalResource resource = + recordFactory.newRecordInstance(LocalResource.class); + resource.setResource(resourceURL); + resource.setSize(-1); + resource.setVisibility(LocalResourceVisibility.APPLICATION); + resource.setType(LocalResourceType.FILE); + resource.setTimestamp(file.lastModified()); + Map localResources = + new HashMap(); + localResources.put(symLink, resource); + return localResources; + } + + void checkResourceLocalized(ContainerId containerId, String symLink) { + String appId = + containerId.getApplicationAttemptId().getApplicationId().toString(); + File userCacheDir = new File(localDir, ContainerLocalizer.USERCACHE); + File userDir = new File(userCacheDir, user); + File appCache = new File(userDir, ContainerLocalizer.APPCACHE); + // localDir/usercache/nobody/appcache/application_0_0000 + File appDir = new File(appCache, appId); + // localDir/usercache/nobody/appcache/application_0_0000/container_0_0000_01_000000 + File containerDir = new File(appDir, containerId.toString()); + // localDir/usercache/nobody/appcache/application_0_0000/container_0_0000_01_000000/symLink1 + File targetFile = new File(containerDir, symLink); + + File sysDir = + new File(localDir, ResourceLocalizationService.NM_PRIVATE_DIR); + // localDir/nmPrivate/application_0_0000 + File appSysDir = new File(sysDir, appId); + // localDir/nmPrivate/application_0_0000/container_0_0000_01_000000 + File containerSysDir = new File(appSysDir, containerId.toString()); + + Assert.assertTrue("AppDir " + appDir.getAbsolutePath() + " doesn't exist!!", + appDir.exists()); + Assert.assertTrue( + "AppSysDir " + appSysDir.getAbsolutePath() + " doesn't exist!!", + appSysDir.exists()); + Assert.assertTrue( + "containerDir " + containerDir.getAbsolutePath() + " doesn't exist !", + containerDir.exists()); + Assert.assertTrue("containerSysDir " + containerSysDir.getAbsolutePath() + + " doesn't exist !", containerDir.exists()); + Assert.assertTrue( + "targetFile " + targetFile.getAbsolutePath() + " doesn't exist !!", + targetFile.exists()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 6d198a444b..40fd011b5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -1172,31 +1172,6 @@ public void testContainerLaunchAndExitFailure() throws IOException, testContainerLaunchAndExit(exitCode); } - private Map setupLocalResources(String fileName, - String symLink) throws Exception { - // ////// Create the resources for the container - File dir = new File(tmpDir, "dir"); - dir.mkdirs(); - File file = new File(dir, fileName); - PrintWriter fileWriter = new PrintWriter(file); - fileWriter.write("Hello World!"); - fileWriter.close(); - - URL resourceURL = URL.fromPath(FileContext.getLocalFSFileContext() - .makeQualified(new Path(file.getAbsolutePath()))); - LocalResource resource = - recordFactory.newRecordInstance(LocalResource.class); - resource.setResource(resourceURL); - resource.setSize(-1); - resource.setVisibility(LocalResourceVisibility.APPLICATION); - resource.setType(LocalResourceType.FILE); - resource.setTimestamp(file.lastModified()); - Map localResources = - new HashMap(); - localResources.put(symLink, resource); - return localResources; - } - // Start the container // While the container is running, localize new resources. // Verify the symlink is created properly @@ -1268,41 +1243,6 @@ public Boolean get() { } } - private void checkResourceLocalized(ContainerId containerId, String symLink) { - String appId = - containerId.getApplicationAttemptId().getApplicationId().toString(); - File userCacheDir = new File(localDir, ContainerLocalizer.USERCACHE); - File userDir = new File(userCacheDir, user); - File appCache = new File(userDir, ContainerLocalizer.APPCACHE); - // localDir/usercache/nobody/appcache/application_0_0000 - File appDir = new File(appCache, appId); - // localDir/usercache/nobody/appcache/application_0_0000/container_0_0000_01_000000 - File containerDir = new File(appDir, containerId.toString()); - // localDir/usercache/nobody/appcache/application_0_0000/container_0_0000_01_000000/symLink1 - File targetFile = new File(containerDir, symLink); - - File sysDir = - new File(localDir, ResourceLocalizationService.NM_PRIVATE_DIR); - // localDir/nmPrivate/application_0_0000 - File appSysDir = new File(sysDir, appId); - // localDir/nmPrivate/application_0_0000/container_0_0000_01_000000 - File containerSysDir = new File(appSysDir, containerId.toString()); - - Assert.assertTrue("AppDir " + appDir.getAbsolutePath() + " doesn't exist!!", - appDir.exists()); - Assert.assertTrue( - "AppSysDir " + appSysDir.getAbsolutePath() + " doesn't exist!!", - appSysDir.exists()); - Assert.assertTrue( - "containerDir " + containerDir.getAbsolutePath() + " doesn't exist !", - containerDir.exists()); - Assert.assertTrue("containerSysDir " + containerSysDir.getAbsolutePath() - + " doesn't exist !", containerDir.exists()); - Assert.assertTrue( - "targetFile " + targetFile.getAbsolutePath() + " doesn't exist !!", - targetFile.exists()); - } - @Test public void testLocalFilesCleanup() throws InterruptedException, IOException, YarnException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index bf8b500b87..c99909e852 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; +import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; @@ -49,10 +50,13 @@ import org.apache.hadoop.net.ServerSocketUtil; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.protocolrecords.ContainerUpdateRequest; import org.apache.hadoop.yarn.api.protocolrecords.ContainerUpdateResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceLocalizationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceLocalizationResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse; @@ -491,6 +495,61 @@ public void testContainerSchedulerRecovery() throws Exception { cm.stop(); } + @Test + public void testContainerWorkDirRecovery() + throws Exception { + conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true); + NMStateStoreService stateStore = new NMMemoryStateStoreService(); + stateStore.init(conf); + stateStore.start(); + context = createContext(conf, stateStore); + ContainerManagerImpl cm = createContainerManager(context, delSrvc); + ((NMContext) context).setContainerManager(cm); + cm.init(conf); + cm.start(); + + ApplicationId appId = ApplicationId.newInstance(0, 1); + ApplicationAttemptId attemptId = + ApplicationAttemptId.newInstance(appId, 1); + ContainerId cid = ContainerId.newContainerId(attemptId, 1); + commonLaunchContainer(appId, cid, cm); + + // restart and check workDir + cm.stop(); + context = createContext(conf, stateStore); + ((NodeManager.NMContext)context).setContainerExecutor(exec); + cm = createContainerManager(context, delSrvc); + ((NMContext) context).setContainerManager(cm); + cm.init(conf); + cm.start(); + assertEquals(1, this.context.getApplications().size()); + waitForNMContainerState(cm, cid, ContainerState.RUNNING); + waitForApplicationState(cm, appId, ApplicationState.RUNNING); + // Container work dir should be recovered and not null + assertNotNull(this.context.getContainers().get(cid).getWorkDir()); + // Localize new local resources while container is running + // and make sure no Exception will be thrown + Map localResource2 = + setupLocalResources("file2", "symLink2"); + ResourceLocalizationRequest request = + ResourceLocalizationRequest.newInstance(cid, localResource2); + localize(context, cm, cid, request); + + // Verify resource is localized and symlink is created. + GenericTestUtils.waitFor(new Supplier() { + public Boolean get() { + try { + checkResourceLocalized(cid, "symLink2"); + return true; + } catch (Throwable e) { + return false; + } + } + }, 500, 20000); + cm.stop(); + } + @Test public void testResourceMappingRecoveryForContainer() throws Exception { conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); @@ -742,7 +801,7 @@ private StartContainersResponse startContainer(Context context, cid.getApplicationAttemptId().toString()); StartContainerRequest scReq = StartContainerRequest.newInstance( clc, TestContainerManager.createContainerToken(cid, 0, - context.getNodeId(), user.getShortUserName(), + context.getNodeId(), this.user, context.getContainerTokenSecretManager(), logAggregationContext)); final List scReqList = new ArrayList(); @@ -761,6 +820,26 @@ public StartContainersResponse run() throws Exception { }); } + private ResourceLocalizationResponse localize(Context context, + final ContainerManagerImpl cm, ContainerId cid, + ResourceLocalizationRequest request) + throws Exception { + UserGroupInformation ugi = UserGroupInformation.createRemoteUser( + cid.getApplicationAttemptId().toString()); + NMTokenIdentifier nmToken = new NMTokenIdentifier( + cid.getApplicationAttemptId(), context.getNodeId(), + ugi.getShortUserName(), + context.getNMTokenSecretManager().getCurrentKey().getKeyId()); + ugi.addTokenIdentifier(nmToken); + return ugi.doAs( + new PrivilegedExceptionAction() { + @Override + public ResourceLocalizationResponse run() throws Exception { + return cm.localize(request); + } + }); + } + private ContainerUpdateResponse updateContainers( Context context, final ContainerManagerImpl cm, ContainerId cid, Resource capability) throws Exception {