diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index a5262b0..5ac1124 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -22,6 +22,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -69,7 +70,8 @@ private static final String DELEGATION_TOKEN_PREFIX = "RMDelegationToken_"; private static final String DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX = "RMDTSequenceNumber_"; - + private static final int HDFS_SAFEMODE_MAX_TIME_WAIT_MS = 60000; + private static final int HDFS_SAFEMODE_WAIT_INTTERVAL_MS = 5000; private FileSystem fs; private Path rootDirPath; @@ -90,6 +92,30 @@ public synchronized void initInternal(Configuration conf) // create filesystem fs = fsWorkingPath.getFileSystem(conf); + long time = System.currentTimeMillis(); + IOException safeModeException = null; + boolean initialized = false; + while (System.currentTimeMillis() - time < HDFS_SAFEMODE_MAX_TIME_WAIT_MS) { + try { + createRootDirs(fs); + initialized = true; + break; + } catch (IOException exception) { + // If we are using HDFS and HDFS is in SafeMode, wait and retry. + if (exception.getMessage().contains("Name node is in safe mode.")) { + safeModeException = exception; + LOG.info("HDFS is still in safe mode, waiting..."); + Thread.sleep(HDFS_SAFEMODE_WAIT_INTTERVAL_MS); + continue; + } + throw exception; + } + } + if (!initialized) + throw safeModeException; + } + + protected void createRootDirs(FileSystem fs) throws IOException { fs.mkdirs(rmDTSecretManagerRoot); fs.mkdirs(rmAppRoot); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java index a24af25..4c836db 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java @@ -25,6 +25,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -121,6 +122,24 @@ public void testFSRMStateStore() throws Exception { } } + @Test + public void testRMRetryHDFSInSafeMode() throws Exception { + YarnConfiguration yarnConf = new YarnConfiguration(); + RMStateStore store = new FileSystemRMStateStore() { + int num = 0; + + @Override + protected void createRootDirs(FileSystem fs) throws IOException { + if (num < 2) { + num++; + throw new IOException("Name node is in safe mode."); + } + } + }; + // init should succeed + store.init(yarnConf); + } + class TestFSRMStateStoreTester implements RMStateStoreHelper { Path workingDirPathURI; FileSystemRMStateStore store;