diff --git hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index a65f776..50b9d0c 100644 --- hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -940,4 +940,9 @@ protected void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { // do nothing } + + @Override + public QueueMetrics getQueueMetrics(ApplicationId appId) { + return scheduler.getQueueMetrics(appId); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index b5a6237..a4a73ad 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -89,6 +89,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; @@ -125,6 +126,7 @@ private final EventHandler eventHandler; private final YarnScheduler scheduler; private final ApplicationMasterService masterService; + private final QueueMetrics queueMetrics; private final ReadLock readLock; private final WriteLock writeLock; @@ -152,8 +154,9 @@ private String proxiedTrackingUrl = "N/A"; private long startTime = 0; private long finishTime = 0; + private long launchAMStartTime = 0; - // Set to null initially. Will eventually get set + // Set to null initially. Will eventually get set // if an RMAppAttemptUnregistrationEvent occurs private FinalApplicationStatus finalStatus = null; private final StringBuilder diagnostics = new StringBuilder(); @@ -447,6 +450,8 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, new RMAppAttemptMetrics(applicationAttemptId, rmContext); this.amReq = amReq; + this.queueMetrics = + scheduler.getQueueMetrics(appAttemptId.getApplicationId()); } @Override @@ -1255,6 +1260,15 @@ public void transition(RMAppAttemptImpl appAttempt, @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { + if (event.getType() == RMAppAttemptEventType.LAUNCHED && + appAttempt.queueMetrics != null) { + String user = appAttempt.rmContext.getRMApps().get( + appAttempt.getAppAttemptId().getApplicationId()).getUser(); + long delay = System.currentTimeMillis() - + appAttempt.launchAMStartTime; + appAttempt.queueMetrics.addAMLaunchDelay(user, delay); + appAttempt.launchAMStartTime = System.currentTimeMillis(); + } // Register with AMLivelinessMonitor appAttempt.attemptLaunched(); @@ -1339,7 +1353,13 @@ public void transition(RMAppAttemptImpl appAttempt, @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { - + if (appAttempt.queueMetrics != null) { + String user = appAttempt.rmContext.getRMApps().get( + appAttempt.getAppAttemptId().getApplicationId()).getUser(); + long delay = System.currentTimeMillis() - + appAttempt.launchAMStartTime; + appAttempt.queueMetrics.addAMRegisterDelay(user, delay); + } RMAppAttemptRegistrationEvent registrationEvent = (RMAppAttemptRegistrationEvent) event; appAttempt.host = registrationEvent.getHost(); @@ -1816,6 +1836,7 @@ public YarnApplicationAttemptState createApplicationAttemptState() { } private void launchAttempt(){ + launchAMStartTime = System.currentTimeMillis(); // Send event to launch the AM Container eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this)); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 507b798..cf0260b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -38,6 +38,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -74,6 +75,8 @@ @Metric("# of reserved containers") MutableGaugeInt reservedContainers; @Metric("# of active users") MutableGaugeInt activeUsers; @Metric("# of active applications") MutableGaugeInt activeApplications; + @Metric("AM container launch delay") MutableRate aMLaunchDelay; + @Metric("AM register delay") MutableRate aMRegisterDelay; private final MutableGaugeInt[] runningTime; private TimeBucketMetrics runBuckets; @@ -433,6 +436,29 @@ public void unreserveResource(String user, Resource res) { } } + public void addAMLaunchDelay(String user, long delay) { + addAMdelay(user, delay, true); + } + + public void addAMRegisterDelay(String user, long delay) { + addAMdelay(user, delay, false); + } + + private void addAMdelay(String user, long delay, boolean isLaunchDelay) { + if (isLaunchDelay) { + aMLaunchDelay.add(delay); + } else { + aMRegisterDelay.add(delay); + } + QueueMetrics userMetrics = getUserMetrics(user); + if (userMetrics != null) { + userMetrics.addAMdelay(user, delay, isLaunchDelay); + } + if (parent != null) { + parent.addAMdelay(user, delay, isLaunchDelay); + } + } + public void incrActiveUsers() { activeUsers.incr(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index d1b5275..f9d1405 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; /** @@ -272,4 +273,10 @@ void setEntitlement(String queue, QueueEntitlement entitlement) * @return an EnumSet containing the resource types */ public EnumSet getSchedulingResourceTypes(); + + /** + * Get metrics for the queue the given application assigned + * @return the QueueMetrics + */ + public QueueMetrics getQueueMetrics(ApplicationId appId); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 9332228..c8f811f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1531,4 +1531,14 @@ private String handleMoveToPlanQueue(String targetQueueName) { } return ret; } + + @Override + public QueueMetrics getQueueMetrics(ApplicationId appId) { + SchedulerApplication application = + applications.get(appId); + if (application == null) { + return null; + } + return application.getQueue().getMetrics(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 3fc3019..7db1de1 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1539,4 +1539,14 @@ public synchronized void updateNodeResource(RMNode nm, return EnumSet .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU); } + + @Override + public QueueMetrics getQueueMetrics(ApplicationId appId) { + SchedulerApplication application = + applications.get(appId); + if (application == null) { + return null; + } + return application.getQueue().getMetrics(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 532edc7..a54e9ec 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -961,4 +961,9 @@ public synchronized boolean checkAccess(UserGroupInformation callerUGI, public Resource getUsedResource() { return usedResource; } + + @Override + public QueueMetrics getQueueMetrics(ApplicationId appId) { + return metrics; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java index 8ad71d2..d9fbfc3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java @@ -70,6 +70,8 @@ public void setUp() { MetricsSource userSource = userSource(ms, queueName, user); checkApps(queueSource, 1, 0, 0, 0, 0, 0, true); metrics.submitAppAttempt(user); + metrics.addAMLaunchDelay(user, 1); + metrics.addAMRegisterDelay(user, 1); checkApps(queueSource, 1, 1, 0, 0, 0, 0, true); metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100)); @@ -109,6 +111,8 @@ public void testQueueAppMetricsForMultipleFailures() { MetricsSource userSource = userSource(ms, queueName, user); checkApps(queueSource, 1, 0, 0, 0, 0, 0, true); metrics.submitAppAttempt(user); + metrics.addAMLaunchDelay(user, 1); + metrics.addAMRegisterDelay(user, 1); checkApps(queueSource, 1, 1, 0, 0, 0, 0, true); metrics.runAppAttempt(app.getApplicationId(), user); @@ -166,6 +170,8 @@ public void testQueueAppMetricsForMultipleFailures() { checkApps(userSource, 1, 0, 0, 0, 0, 0, true); metrics.submitAppAttempt(user); + metrics.addAMLaunchDelay(user, 1); + metrics.addAMRegisterDelay(user, 1); checkApps(queueSource, 1, 1, 0, 0, 0, 0, true); checkApps(userSource, 1, 1, 0, 0, 0, 0, true); @@ -223,6 +229,8 @@ public void testQueueAppMetricsForMultipleFailures() { checkApps(parentUserSource, 1, 0, 0, 0, 0, 0, true); metrics.submitAppAttempt(user); + metrics.addAMLaunchDelay(user, 1); + metrics.addAMRegisterDelay(user, 1); checkApps(queueSource, 1, 1, 0, 0, 0, 0, true); checkApps(parentQueueSource, 1, 1, 0, 0, 0, 0, true); checkApps(userSource, 1, 1, 0, 0, 0, 0, true);