diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index ad1c9f1..d098493 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -45,6 +45,7 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; @@ -57,6 +58,8 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import com.sun.research.ws.wadl.Response; + /** * Registers/unregisters to RM and sends heartbeats to RM. */ @@ -194,7 +197,11 @@ protected void unregister() { FinishApplicationMasterRequest request = FinishApplicationMasterRequest.newInstance(finishState, sb.toString(), historyUrl); - scheduler.finishApplicationMaster(request); + FinishApplicationMasterResponse response = null; + do { + response = scheduler.finishApplicationMaster(request); + Thread.sleep(rmPollInterval); + } while (response.getIsRemovedFromRMStateSore()); } catch(Exception are) { LOG.error("Exception while unregistering ", are); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java index 4317b67..08e1220 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java @@ -23,13 +23,25 @@ import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.util.Records; /** - *

The response sent by the ResourceManager to a - * ApplicationMaster on it's completion.

+ *

+ * The response sent by the ResourceManager to a + * ApplicationMaster on it's completion. + *

* - *

Currently, this is empty.

+ *

+ * The response, includes: + *

+ *

+ * Note: If RM recovery is enabled, user is expected to retry this flag until it + * becomes true. Otherwise, will risk, though not likely, restarting the already + * finished application after RM restarts. * * @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest) */ @@ -38,9 +50,27 @@ public abstract class FinishApplicationMasterResponse { @Private @Unstable - public static FinishApplicationMasterResponse newInstance() { + public static FinishApplicationMasterResponse newInstance(boolean isRemovedFromRMStateStore) { FinishApplicationMasterResponse response = Records.newRecord(FinishApplicationMasterResponse.class); + response.setIsRemovedFromRMStateSore(isRemovedFromRMStateStore); return response; } + + /** + * Get the flag which indicates that application information has been removed from + * RMStateStore. + */ + @Public + @Stable + public abstract boolean getIsRemovedFromRMStateSore(); + + /** + * Set the flag which indicates that application information has been removed from + * RMStateStore. + */ + @Private + @Unstable + public abstract boolean setIsRemovedFromRMStateSore( + boolean isRemovedFromRMStateStore); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index bd009e0..0bf9c72 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -52,6 +52,7 @@ message FinishApplicationMasterRequestProto { } message FinishApplicationMasterResponseProto { + optional bool is_removed_from_RMStateStore = 1 [default = false]; } message AllocateRequestProto { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index 99e896e..4ee0721 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -278,11 +279,20 @@ public void unregisterApplicationMaster(FinalApplicationStatus appStatus, String appMessage, String appTrackingUrl) throws YarnException, IOException { Preconditions.checkArgument(appStatus != null, - "AppStatus should not be null."); + "AppStatus should not be null."); FinishApplicationMasterRequest request = FinishApplicationMasterRequest.newInstance(appStatus, appMessage, appTrackingUrl); - rmClient.finishApplicationMaster(request); + FinishApplicationMasterResponse response = null; + try { + do { + response = rmClient.finishApplicationMaster(request); + Thread.sleep(1000); + } while (response.getIsRemovedFromRMStateSore()); + } catch (InterruptedException e) { + LOG.info("Interrupted while waiting for application" + + " to be removed from RMStateStore"); + } } @Override diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java index ff57eb4..f4eb7dd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java @@ -22,7 +22,9 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationMasterResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationMasterResponseProtoOrBuilder; import com.google.protobuf.TextFormat; @@ -67,4 +69,25 @@ public boolean equals(Object other) { public String toString() { return TextFormat.shortDebugString(getProto()); } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = FinishApplicationMasterResponseProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public boolean getIsRemovedFromRMStateSore() { + FinishApplicationMasterResponseProtoOrBuilder p = + viaProto ? proto : builder; + return p.getIsRemovedFromRMStateStore(); + } + + @Override + public boolean setIsRemovedFromRMStateSore(boolean isRemovedFromRMStateStore) { + maybeInitBuilder(); + builder.setIsRemovedFromRMStateStore(isRemovedFromRMStateStore); + return false; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index a41792d..c8b2db2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -72,6 +72,7 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; @@ -298,13 +299,19 @@ public FinishApplicationMasterResponse finishApplicationMaster( this.amLivelinessMonitor.receivedPing(applicationAttemptId); + // App is in finishing state, that means app info has been deleted from + // RMStateStore. + if (RMAppState.FINISHING.equals(rmContext.getRMApps() + .get(applicationAttemptId.getApplicationId()).getState())) { + return FinishApplicationMasterResponse.newInstance(true); + } rmContext.getDispatcher().getEventHandler().handle( new RMAppAttemptUnregistrationEvent(applicationAttemptId, request .getTrackingUrl(), request.getFinalApplicationStatus(), request .getDiagnostics())); - FinishApplicationMasterResponse response = recordFactory - .newRecordInstance(FinishApplicationMasterResponse.class); + FinishApplicationMasterResponse response = + FinishApplicationMasterResponse.newInstance(false); return response; } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 6439df1..634ea36 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -187,7 +187,8 @@ protected synchronized void finishApplication(ApplicationId applicationId) { completedApps.add(applicationId); writeAuditLog(applicationId); - // application completely done. Remove from state + // application completely done and remove from state store. + // App state may be already removed during RMAppRemovingTransition. RMStateStore store = rmContext.getStateStore(); store.removeApplication(rmContext.getRMApps().get(applicationId)); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index de1f65a..8516ff7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -336,6 +336,8 @@ private Path getAppDir(Path root, String appId) { // FileSystem related code private void deleteFile(Path deletePath) throws Exception { + if(!fs.exists(deletePath)) + return; if(!fs.delete(deletePath, true)) { throw new Exception("Failed to delete " + deletePath); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 865e726..6c7c55c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppStoredEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRemovedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; @@ -477,12 +478,15 @@ private synchronized void handleStoreEvent(RMStateStoreEvent event) { ApplicationState appState = ((RMStateStoreRemoveAppEvent) event).getAppState(); ApplicationId appId = appState.getAppId(); - + Exception removedException = null; LOG.info("Removing info for app: " + appId); try { removeApplicationState(appState); } catch (Exception e) { LOG.error("Error removing app: " + appId, e); + removedException = e; + } finally { + notifyDoneRemovingApplcation(appId, removedException); } } break; @@ -516,7 +520,18 @@ private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId, rmDispatcher.getEventHandler().handle( new RMAppAttemptStoredEvent(attemptId, storedException)); } - + + @SuppressWarnings("unchecked") + /** + * This is to notify RMApp that this application has been removed from + * RMStateStore + */ + private void notifyDoneRemovingApplcation(ApplicationId appId, + Exception removedException) { + rmDispatcher.getEventHandler().handle( + new RMAppRemovedEvent(appId, removedException)); + } + /** * EventHandler implementation which forward events to the FSRMStateStore * This hides the EventHandle methods of the store from its public interface diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java index d15e12e..b77b08a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java @@ -27,11 +27,14 @@ // Source: RMAppAttempt APP_REJECTED, APP_ACCEPTED, - APP_SAVED, ATTEMPT_REGISTERED, ATTEMPT_FINISHING, ATTEMPT_FINISHED, // Will send the final state ATTEMPT_FAILED, ATTEMPT_KILLED, - NODE_UPDATE + NODE_UPDATE, + + // Source: RMStateStore + APP_SAVED, + APP_REMOVED } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index a11b05e..8f5ef9d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNodeUpdateEvent.RMAppNodeUpdateType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; @@ -168,8 +169,10 @@ // Transitions from RUNNING state .addTransition(RMAppState.RUNNING, RMAppState.RUNNING, RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) - .addTransition(RMAppState.RUNNING, RMAppState.FINISHING, - RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition()) + .addTransition(RMAppState.RUNNING, + EnumSet.of(RMAppState.FINISHING, RMAppState.APP_REMOVING), + RMAppEventType.ATTEMPT_FINISHING, + new RMAppFinishingOrRemovingTransition()) .addTransition(RMAppState.RUNNING, RMAppState.FINISHED, RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION) .addTransition(RMAppState.RUNNING, @@ -179,6 +182,19 @@ .addTransition(RMAppState.RUNNING, RMAppState.KILLED, RMAppEventType.KILL, new KillAppAndAttemptTransition()) + // Transitions from APP_REMOVING state + .addTransition(RMAppState.APP_REMOVING, RMAppState.FINISHING, + RMAppEventType.APP_REMOVED, new RMAppFinishingTransition()) + .addTransition(RMAppState.APP_REMOVING, RMAppState.FINISHED, + RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION) + .addTransition(RMAppState.APP_REMOVING, RMAppState.KILLED, + RMAppEventType.KILL, new KillAppAndAttemptTransition()) + // ignorable transitions + .addTransition(RMAppState.APP_REMOVING, RMAppState.APP_REMOVING, + RMAppEventType.ATTEMPT_FINISHING) + .addTransition(RMAppState.APP_REMOVING, RMAppState.APP_REMOVING, + RMAppEventType.NODE_UPDATE) + // Transitions from FINISHING state .addTransition(RMAppState.FINISHING, RMAppState.FINISHED, RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION) @@ -186,7 +202,7 @@ RMAppEventType.KILL, new KillAppAndAttemptTransition()) // ignorable transitions .addTransition(RMAppState.FINISHING, RMAppState.FINISHING, - RMAppEventType.NODE_UPDATE) + EnumSet.of(RMAppEventType.NODE_UPDATE, RMAppEventType.APP_REMOVED)) // Transitions from FINISHED state .addTransition(RMAppState.FINISHED, RMAppState.FINISHED, @@ -196,11 +212,13 @@ EnumSet.of( RMAppEventType.NODE_UPDATE, RMAppEventType.ATTEMPT_FINISHING, - RMAppEventType.ATTEMPT_FINISHED)) + RMAppEventType.ATTEMPT_FINISHED, + RMAppEventType.APP_REMOVED)) // Transitions from FAILED state .addTransition(RMAppState.FAILED, RMAppState.FAILED, - EnumSet.of(RMAppEventType.KILL, RMAppEventType.APP_SAVED)) + EnumSet.of(RMAppEventType.KILL, RMAppEventType.APP_SAVED, + RMAppEventType.APP_REMOVED)) // ignorable transitions .addTransition(RMAppState.FAILED, RMAppState.FAILED, RMAppEventType.NODE_UPDATE) @@ -212,7 +230,8 @@ EnumSet.of(RMAppEventType.APP_ACCEPTED, RMAppEventType.APP_REJECTED, RMAppEventType.KILL, RMAppEventType.ATTEMPT_FINISHED, RMAppEventType.ATTEMPT_FAILED, - RMAppEventType.ATTEMPT_KILLED, RMAppEventType.APP_SAVED)) + RMAppEventType.ATTEMPT_KILLED, RMAppEventType.APP_SAVED, + RMAppEventType.APP_REMOVED)) // ignorable transitions .addTransition(RMAppState.KILLED, RMAppState.KILLED, RMAppEventType.NODE_UPDATE) @@ -682,6 +701,23 @@ public void transition(RMAppImpl app, RMAppEvent event) { } } + private static final class RMAppFinishingOrRemovingTransition implements + MultipleArcTransition { + @Override + public RMAppState transition(RMAppImpl app, RMAppEvent event) { + boolean isRecoveryEnabled = + app.conf.getBoolean(YarnConfiguration.RECOVERY_ENABLED, + YarnConfiguration.DEFAULT_RM_RECOVERY_ENABLED); + if (isRecoveryEnabled) { + LOG.info("Removing application with id " + app.applicationId); + app.rmContext.getStateStore().removeApplication(app); + return RMAppState.APP_REMOVING; + } else + new RMAppFinishingTransition().transition(app, event); + return RMAppState.FINISHING; + } + } + private static class AppFinishedTransition extends FinalTransition { public void transition(RMAppImpl app, RMAppEvent event) { RMAppFinishedAttemptEvent finishedEvent = diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppRemovedEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppRemovedEvent.java new file mode 100644 index 0000000..b780f39 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppRemovedEvent.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp; + +import org.apache.hadoop.yarn.api.records.ApplicationId; + +public class RMAppRemovedEvent extends RMAppEvent { + + private final Exception removedException; + + public RMAppRemovedEvent(ApplicationId appId, Exception storedException) { + super(appId, RMAppEventType.APP_REMOVED); + this.removedException = storedException; + } + + public Exception getStoredException() { + return removedException; + } + +} \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java index b7f9325..fa6e25f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java @@ -24,6 +24,7 @@ SUBMITTED, ACCEPTED, RUNNING, + APP_REMOVING, FINISHING, FINISHED, FAILED, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index a4b7404..9649211 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; import java.io.IOException; @@ -55,6 +56,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; +import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -67,6 +69,8 @@ YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS; private static int appId = 1; private DrainDispatcher rmDispatcher; + private RMStateStore store; + private Configuration conf; // ignore all the RM application attempt events private static final class TestApplicationAttemptEventDispatcher implements @@ -135,13 +139,13 @@ public void handle(SchedulerEvent event) { @Before public void setUp() throws Exception { - Configuration conf = new Configuration(); + conf = new Configuration(); rmDispatcher = new DrainDispatcher(); ContainerAllocationExpirer containerAllocationExpirer = mock(ContainerAllocationExpirer.class); AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class); AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class); - RMStateStore store = mock(RMStateStore.class); + store = mock(RMStateStore.class); this.rmContext = new RMContextImpl(rmDispatcher, store, containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor, @@ -166,12 +170,16 @@ public void setUp() throws Exception { rmDispatcher.start(); } + @After + public void tearDown() throws Exception { + conf = new Configuration(); + } + protected RMApp createNewTestApp(ApplicationSubmissionContext submissionContext) { ApplicationId applicationId = MockApps.newAppID(appId++); String user = MockApps.newUserName(); String name = MockApps.newAppName(); String queue = MockApps.newQueue(); - Configuration conf = new YarnConfiguration(); // ensure max application attempts set to known value conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, maxAppAttempts); YarnScheduler scheduler = mock(YarnScheduler.class); @@ -247,6 +255,10 @@ private static void assertTimesAtFinish(RMApp application) { (application.getFinishTime() >= application.getStartTime())); } + private void assertAppRemoved(RMApp application){ + verify(store).removeApplication(application); + } + private static void assertKilled(RMApp application) { assertTimesAtFinish(application); assertAppState(RMAppState.KILLED, application); @@ -335,6 +347,22 @@ protected RMApp testCreateAppRunning( return application; } + protected RMApp testCreateAppRemoving( + ApplicationSubmissionContext submissionContext) throws IOException { + conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true); + RMApp application = testCreateAppRunning(submissionContext); + // If RM recovery is enabled: + // RUNNING -> APP_REMOVING event RMAppEventType.ATTEMPT_FINISHING + RMAppEvent finishingEvent = + new RMAppEvent(application.getApplicationId(), + RMAppEventType.ATTEMPT_FINISHING); + application.handle(finishingEvent); + assertAppState(RMAppState.APP_REMOVING, application); + assertAppRemoved(application); + return application; + } + + protected RMApp testCreateAppFinishing( ApplicationSubmissionContext submissionContext) throws IOException { // unmanaged AMs don't use the FINISHING state @@ -608,6 +636,43 @@ public void testAppRunningFailed() throws IOException { } @Test + public void testAppRemovingFinishing() throws IOException { + LOG.info("--- START: testAppRemovingFinishing ---"); + RMApp application = testCreateAppRemoving(null); + // APP_REMOVING => FINISHING event RMAppEventType.APP_REMOVED + RMAppEvent event = + new RMAppEvent(application.getApplicationId(), + RMAppEventType.APP_REMOVED); + application.handle(event); + rmDispatcher.await(); + assertAppState(RMAppState.FINISHING, application); + } + + @Test + public void testAppRemovingFinished() throws IOException { + LOG.info("--- START: testAppRemovingFINISHED ---"); + RMApp application = testCreateAppRemoving(null); + // APP_REMOVING => FINISHED event RMAppEventType.ATTEMPT_FINISHED + RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent( + application.getApplicationId(), null); + application.handle(finishedEvent); + rmDispatcher.await(); + assertAppState(RMAppState.FINISHED, application); + } + + @Test + public void testAppRemovingKilledD() throws IOException { + LOG.info("--- START: testAppRemovingKilledD ---"); + RMApp application = testCreateAppRemoving(null); + // APP_REMOVING => KILLED event RMAppEventType.KILL + RMAppEvent event = + new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); + application.handle(event); + rmDispatcher.await(); + assertAppState(RMAppState.KILLED, application); + } + + @Test public void testAppFinishingKill() throws IOException { LOG.info("--- START: testAppFinishedFinished ---");