diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 0fd346f..b28511d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -70,10 +70,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSOpDurations; import org.apache.hadoop.yarn.state.InvalidStateTransitionException; import org.apache.hadoop.yarn.state.MultipleArcTransition; import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachineFactory; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; @Private @Unstable @@ -102,6 +105,7 @@ protected ResourceManager resourceManager; private final ReadLock readLock; private final WriteLock writeLock; + static FSOpDurations fsOpDurations; public static final Log LOG = LogFactory.getLog(RMStateStore.class); @@ -216,7 +220,10 @@ public RMStateStoreState transition(RMStateStore store, appState.getApplicationSubmissionContext().getApplicationId(); LOG.info("Storing info for app: " + appId); try { + long start = getClock().getTime(); store.storeApplicationStateInternal(appId, appState); + fsOpDurations.addStoreAppCallDuration( + getClock().getTime() - start); store.notifyApplication(new RMAppEvent(appId, RMAppEventType.APP_NEW_SAVED)); } catch (Exception e) { @@ -247,7 +254,10 @@ public RMStateStoreState transition(RMStateStore store, appState.getApplicationSubmissionContext().getApplicationId(); LOG.info("Updating info for app: " + appId); try { + long start = getClock().getTime(); store.updateApplicationStateInternal(appId, appState); + fsOpDurations.addUpdateStoreAppCallDuration( + getClock().getTime() - start); if (((RMStateUpdateAppEvent) event).isNotifyApplication()) { store.notifyApplication(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED)); @@ -287,7 +297,10 @@ public RMStateStoreState transition(RMStateStore store, appState.getApplicationSubmissionContext().getApplicationId(); LOG.info("Removing info for app: " + appId); try { + long start = getClock().getTime(); store.removeApplicationStateInternal(appState); + fsOpDurations.addRemoveStoreAppCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error removing app: " + appId, e); isFenced = store.notifyStoreOperationFailedInternal(e); @@ -314,8 +327,11 @@ public RMStateStoreState transition(RMStateStore store, if (LOG.isDebugEnabled()) { LOG.debug("Storing info for attempt: " + attemptState.getAttemptId()); } + long start = getClock().getTime(); store.storeApplicationAttemptStateInternal(attemptState.getAttemptId(), attemptState); + fsOpDurations.addStoreAppAttemptCallDuration( + getClock().getTime() - start); store.notifyApplicationAttempt(new RMAppAttemptEvent (attemptState.getAttemptId(), RMAppAttemptEventType.ATTEMPT_NEW_SAVED)); @@ -345,8 +361,11 @@ public RMStateStoreState transition(RMStateStore store, if (LOG.isDebugEnabled()) { LOG.debug("Updating info for attempt: " + attemptState.getAttemptId()); } + long start = getClock().getTime(); store.updateApplicationAttemptStateInternal(attemptState.getAttemptId(), attemptState); + fsOpDurations.addUpdateAppAttemptCallDuration( + getClock().getTime() - start); store.notifyApplicationAttempt(new RMAppAttemptEvent (attemptState.getAttemptId(), RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED)); @@ -373,8 +392,11 @@ public RMStateStoreState transition(RMStateStore store, RMStateStoreRMDTEvent dtEvent = (RMStateStoreRMDTEvent) event; try { LOG.info("Storing RMDelegationToken and SequenceNumber"); + long start = getClock().getTime(); store.storeRMDelegationTokenState( dtEvent.getRmDTIdentifier(), dtEvent.getRenewDate()); + fsOpDurations.addStoreDelegationTokenCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error While Storing RMDelegationToken and SequenceNumber ", e); @@ -399,7 +421,10 @@ public RMStateStoreState transition(RMStateStore store, RMStateStoreRMDTEvent dtEvent = (RMStateStoreRMDTEvent) event; try { LOG.info("Removing RMDelegationToken and SequenceNumber"); + long start = getClock().getTime(); store.removeRMDelegationTokenState(dtEvent.getRmDTIdentifier()); + fsOpDurations.addRemoveDelegationTokenCalllDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error While Removing RMDelegationToken and SequenceNumber ", e); @@ -424,8 +449,11 @@ public RMStateStoreState transition(RMStateStore store, RMStateStoreRMDTEvent dtEvent = (RMStateStoreRMDTEvent) event; try { LOG.info("Updating RMDelegationToken and SequenceNumber"); + long start = getClock().getTime(); store.updateRMDelegationTokenState( dtEvent.getRmDTIdentifier(), dtEvent.getRenewDate()); + fsOpDurations.addUpdateDelegationTokenCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error While Updating RMDelegationToken and SequenceNumber ", e); @@ -451,7 +479,10 @@ public RMStateStoreState transition(RMStateStore store, (RMStateStoreRMDTMasterKeyEvent) event; try { LOG.info("Storing RMDTMasterKey."); + long start = getClock().getTime(); store.storeRMDTMasterKeyState(dtEvent.getDelegationKey()); + fsOpDurations.addStoreMasterKeyCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error While Storing RMDTMasterKey.", e); isFenced = store.notifyStoreOperationFailedInternal(e); @@ -476,7 +507,10 @@ public RMStateStoreState transition(RMStateStore store, (RMStateStoreRMDTMasterKeyEvent) event; try { LOG.info("Removing RMDTMasterKey."); + long start = getClock().getTime(); store.removeRMDTMasterKeyState(dtEvent.getDelegationKey()); + fsOpDurations.addRemoveMasterKeyCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error While Removing RMDTMasterKey.", e); isFenced = store.notifyStoreOperationFailedInternal(e); @@ -500,8 +534,11 @@ public RMStateStoreState transition(RMStateStore store, boolean isFenced = false; try { LOG.info("Updating AMRMToken"); + long start = getClock().getTime(); store.storeOrUpdateAMRMTokenSecretManagerState( amrmEvent.getAmrmTokenSecretManagerState(), amrmEvent.isUpdate()); + fsOpDurations.addUpdateAMRMTokenCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error storing info for AMRMTokenSecretManager", e); isFenced = store.notifyStoreOperationFailedInternal(e); @@ -527,10 +564,13 @@ public RMStateStoreState transition(RMStateStore store, try { LOG.info("Storing reservation allocation." + reservationEvent .getReservationIdName()); + long start = getClock().getTime(); store.storeReservationState( reservationEvent.getReservationAllocation(), reservationEvent.getPlanName(), reservationEvent.getReservationIdName()); + fsOpDurations.addStoreReservationAllocationCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error while storing reservation allocation.", e); isFenced = store.notifyStoreOperationFailedInternal(e); @@ -556,9 +596,12 @@ public RMStateStoreState transition(RMStateStore store, try { LOG.info("Removing reservation allocation." + reservationEvent .getReservationIdName()); + long start = getClock().getTime(); store.removeReservationState( reservationEvent.getPlanName(), reservationEvent.getReservationIdName()); + fsOpDurations.addRemoveReservationAllocationCallDuration( + getClock().getTime() - start); } catch (Exception e) { LOG.error("Error while removing reservation allocation.", e); isFenced = store.notifyStoreOperationFailedInternal(e); @@ -603,6 +646,11 @@ public RMStateStore() { this.readLock = lock.readLock(); this.writeLock = lock.writeLock(); stateMachine = stateMachineFactory.make(this); + fsOpDurations = FSOpDurations.getInstance(true); + } + + public static Clock getClock() { + return SystemClock.getInstance(); } public static class RMDTSecretManagerState { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSOpDurations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSOpDurations.java index 20d2af9..a629bd5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSOpDurations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSOpDurations.java @@ -56,6 +56,46 @@ @Metric("Duration for a preempt call") MutableRate preemptCall; + //RMStateStore Metric + @Metric("Duration for a storeApp call") + MutableRate storeAppCall; + + @Metric("Duration for a updateApp call") + MutableRate updateAppCall; + + @Metric("Duration for a removeApp call") + MutableRate removeAppCall; + + @Metric("Duration for a storeAppAttempt call") + MutableRate storeAppAttemptCall; + + @Metric("Duration for a updateAppAttempt call") + MutableRate updateAppAttemptCall; + + @Metric("Duration for a storeMasterKey call") + MutableRate storeMasterKeyCall; + + @Metric("Duration for a removeMasterKey call") + MutableRate removeMasterKeyCall; + + @Metric("Duration for a storeDelegationToken call") + MutableRate storeDelegationTokenCall; + + @Metric("Duration for a removeDelegationToken call") + MutableRate removeDelegationTokenCall; + + @Metric("Duration for a updateDelegationToken call") + MutableRate updateDelegationTokenCall; + + @Metric("Duration for a updateAMRMToken call") + MutableRate updateAMRMTokenCall; + + @Metric("Duration for a storeReservationAllocation call") + MutableRate storeReservationAllocationCall; + + @Metric("Duration for a storeReservationAllocation call") + MutableRate removeReservationAllocationCall; + private static final MetricsInfo RECORD_INFO = info("FSOpDurations", "Durations of FairScheduler calls or thread-runs"); @@ -90,6 +130,20 @@ private synchronized void setExtended(boolean isExtended) { updateCall.setExtended(isExtended); preemptCall.setExtended(isExtended); + storeAppCall.setExtended(isExtended); + updateAppCall.setExtended(isExtended); + removeAppCall.setExtended(isExtended); + storeAppAttemptCall.setExtended(isExtended); + updateAppAttemptCall.setExtended(isExtended); + storeMasterKeyCall.setExtended(isExtended); + removeMasterKeyCall.setExtended(isExtended); + storeDelegationTokenCall.setExtended(isExtended); + removeDelegationTokenCall.setExtended(isExtended); + updateDelegationTokenCall.setExtended(isExtended); + updateAMRMTokenCall.setExtended(isExtended); + storeReservationAllocationCall.setExtended(isExtended); + removeReservationAllocationCall.setExtended(isExtended); + INSTANCE.isExtended = isExtended; } @@ -118,6 +172,58 @@ public void addPreemptCallDuration(long value) { preemptCall.add(value); } + public void addStoreAppCallDuration(long value) { + storeAppCall.add(value); + } + + public void addUpdateStoreAppCallDuration(long value) { + updateAppCall.add(value); + } + + public void addRemoveStoreAppCallDuration(long value) { + removeAppCall.add(value); + } + + public void addStoreAppAttemptCallDuration(long value) { + storeAppAttemptCall.add(value); + } + + public void addUpdateAppAttemptCallDuration(long value) { + updateAppAttemptCall.add(value); + } + + public void addStoreMasterKeyCallDuration(long value) { + storeMasterKeyCall.add(value); + } + + public void addRemoveMasterKeyCallDuration(long value) { + removeMasterKeyCall.add(value); + } + + public void addStoreDelegationTokenCallDuration(long value) { + storeDelegationTokenCall.add(value); + } + + public void addRemoveDelegationTokenCalllDuration(long value) { + removeDelegationTokenCall.add(value); + } + + public void addUpdateDelegationTokenCallDuration(long value) { + updateDelegationTokenCall.add(value); + } + + public void addUpdateAMRMTokenCallDuration(long value) { + updateAMRMTokenCall.add(value); + } + + public void addStoreReservationAllocationCallDuration(long value) { + storeReservationAllocationCall.add(value); + } + + public void addRemoveReservationAllocationCallDuration(long value) { + storeReservationAllocationCall.add(value); + } + @VisibleForTesting public boolean hasUpdateThreadRunChanged() { return updateThreadRun.changed();