diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index ad1c9f1..65c1548 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -237,15 +237,6 @@ public void run() { } catch (YarnRuntimeException e) { LOG.error("Error communicating with RM: " + e.getMessage() , e); return; - } catch (InvalidToken e) { - // This can happen if the RM has been restarted, since currently - // when RM restarts AMRMToken is not populated back to - // AMRMTokenSecretManager yet. Once this is fixed, no need - // to send JOB_AM_REBOOT event in this method any more. - eventHandler.handle(new JobEvent(job.getID(), - JobEventType.JOB_AM_REBOOT)); - LOG.error("Error in authencating with RM: " ,e); - return; } catch (Exception e) { LOG.error("ERROR IN CONTACTING RM. ", e); continue; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index c69aed3..f1af127 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -569,7 +569,7 @@ public void handle(RMAppEvent event) { } @Override - public void recover(RMState state) { + public void recover(RMState state) throws Exception{ ApplicationState appState = state.getApplicationState().get(getApplicationId()); LOG.info("Recovering app: " + getApplicationId() + " with " + + appState.getAttemptCount() + " attempts"); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 00397cf..9026e64 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.yarn.util.StringHelper.pjoin; +import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; @@ -652,7 +653,7 @@ public ApplicationResourceUsageReport getApplicationResourceUsageReport() { } @Override - public void recover(RMState state) { + public void recover(RMState state) throws Exception{ ApplicationState appState = state.getApplicationState().get(getAppAttemptId().getApplicationId()); ApplicationAttemptState attemptState = appState.getAttempt(getAppAttemptId()); @@ -667,7 +668,8 @@ public void recover(RMState state) { RMAppAttemptEventType.RECOVER)); } - private void recoverAppAttemptCredentials(Credentials appAttemptTokens) { + private void recoverAppAttemptCredentials(Credentials appAttemptTokens) + throws IOException { if (appAttemptTokens == null) { return; } @@ -684,11 +686,7 @@ private void recoverAppAttemptCredentials(Credentials appAttemptTokens) { this.amrmToken = (Token) appAttemptTokens .getToken(RMStateStore.AM_RM_TOKEN_SERVICE); - - // For now, no need to populate tokens back to AMRMTokenSecretManager, - // because running attempts are rebooted. Later in work-preserve restart, - // we'll create NEW->RUNNING transition in which the restored tokens will be - // added to the secret manager + rmContext.getAMRMTokenSecretManager().addPersistedPassword(this.amrmToken); } private static class BaseTransition implements diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/AMRMTokenSecretManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/AMRMTokenSecretManager.java index 3f50e1b..5d21ec0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/AMRMTokenSecretManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/AMRMTokenSecretManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.security; +import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Timer; @@ -30,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; @@ -124,6 +126,19 @@ synchronized void rollMasterKey() { } /** + * Populate persisted password of AMRMToken back to AMRMTokenSecretManager. + */ + public synchronized void + addPersistedPassword(Token token) throws IOException { + AMRMTokenIdentifier identifier = token.decodeIdentifier(); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding password for " + identifier.getApplicationAttemptId()); + } + this.passwords.put(identifier.getApplicationAttemptId(), + token.getPassword()); + } + + /** * Retrieve the password for the given {@link AMRMTokenIdentifier}. * Used by RPC layer to validate a remote {@link AMRMTokenIdentifier}. */ diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index c0f480b..ee970be 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.NodeAction; @@ -577,14 +578,16 @@ public void testAppAttemptTokensRestoredOnRMRestart() throws Exception { attempt1.getClientTokenMasterKey(), loadedAttempt1.getClientTokenMasterKey()); - // assert secret manager also knows about the key + // assert ClientTokenSecretManager also knows about the key Assert.assertArrayEquals(clientTokenMasterKey, rm2.getClientToAMTokenSecretManager().getMasterKey(attemptId1) .getEncoded()); - // Not testing ApplicationTokenSecretManager has the password populated back, - // that is needed in work-preserving restart - + // assert AMRMTokenSecretManager also knows about the AMRMToken password + Token amrmToken = loadedAttempt1.getAMRMToken(); + Assert.assertArrayEquals(amrmToken.getPassword(), + rm2.getAMRMTokenSecretManager().retrievePassword( + amrmToken.decodeIdentifier())); rm1.stop(); rm2.stop(); }