diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index a5262b0..103042d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -22,6 +22,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -35,6 +36,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.delegation.DelegationKey; @@ -69,7 +71,8 @@ private static final String DELEGATION_TOKEN_PREFIX = "RMDelegationToken_"; private static final String DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX = "RMDTSequenceNumber_"; - + private static final int HDFS_SAFEMODE_MAX_TIME_WAIT_MS = 60000; + private static final int HDFS_SAFEMODE_WAIT_INTTERVAL_MS = 5000; private FileSystem fs; private Path rootDirPath; @@ -90,6 +93,26 @@ public synchronized void initInternal(Configuration conf) // create filesystem fs = fsWorkingPath.getFileSystem(conf); + long time = System.currentTimeMillis(); + SafeModeException safeModeException = null; + boolean initialized = false; + while (System.currentTimeMillis() - time < HDFS_SAFEMODE_MAX_TIME_WAIT_MS) { + try { + createRootDirs(fs); + initialized = true; + break; + } catch (SafeModeException exception) { + // If we are using HDFS and HDFS is in SafeMode, wait and retry. + safeModeException = exception; + LOG.info("HDFS is still in safe mode, waiting..."); + Thread.sleep(HDFS_SAFEMODE_WAIT_INTTERVAL_MS); + } + } + if (!initialized) + throw safeModeException; + } + + protected void createRootDirs(FileSystem fs) throws IOException { fs.mkdirs(rmDTSecretManagerRoot); fs.mkdirs(rmAppRoot); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java index a24af25..182212b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestRMStateStore.java @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.DelegationKey; @@ -121,6 +122,24 @@ public void testFSRMStateStore() throws Exception { } } + @Test + public void testRMRetryHDFSInSafeMode() throws Exception { + YarnConfiguration yarnConf = new YarnConfiguration(); + RMStateStore store = new FileSystemRMStateStore() { + int num = 0; + + @Override + protected void createRootDirs(FileSystem fs) throws SafeModeException { + if (num < 2) { + num++; + throw new SafeModeException(); + } + } + }; + // init should succeed + store.init(yarnConf); + } + class TestFSRMStateStoreTester implements RMStateStoreHelper { Path workingDirPathURI; FileSystemRMStateStore store;