diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 507b798a56280ed19bcdc6a2fcda254f5114dd37..18f973c4bd33dba39c04490b5b7c09efaa3008e8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -85,7 +85,7 @@ static final Splitter Q_SPLITTER = Splitter.on('.').omitEmptyStrings().trimResults(); - final MetricsRegistry registry; + protected final MetricsRegistry registry; final String queueName; final QueueMetrics parent; final MetricsSystem metricsSystem; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 84975b6b84e93b9f983159f5ad53c3f7a143660d..634108ed85522081b5aa82116f903344b9843e22 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; +import java.util.concurrent.atomic.AtomicLong; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -93,6 +94,8 @@ private boolean unmanagedAM = true; private boolean amRunning = false; private LogAggregationContext logAggregationContext; + private AtomicLong allocationRequestStart = new AtomicLong(0); + private AtomicLong firstContainerAllocation = new AtomicLong(0); protected List newlyAllocatedContainers = new ArrayList(); @@ -246,6 +249,22 @@ public synchronized int getReReservations(Priority priority) { return reReservations.count(priority); } + public void setAllocationRequestStart(long value) { + allocationRequestStart.compareAndSet(0, value); + } + + public long getAllocationRequestStart() { + return allocationRequestStart.longValue(); + } + + public boolean setContainerFirstAllocation(long value) { + return firstContainerAllocation.compareAndSet(0, value); + } + + public long getContainerFirstAllocation() { + return firstContainerAllocation.longValue(); + } + /** * Get total current reservations. * Used only by unit tests diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java index 82c422b820781486d225a8b07154b0121056a0a4..e310ee5ff7acfd6ac52fee25a58b99a00610015c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java @@ -23,8 +23,11 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; @@ -39,6 +42,9 @@ @Metric("Minimum share of CPU in vcores") MutableGaugeInt minShareVCores; @Metric("Maximum share of memory in MB") MutableGaugeInt maxShareMB; @Metric("Maximum share of CPU in vcores") MutableGaugeInt maxShareVCores; + + @Metric("Container First Attempt Allocation Delay") MutableRate + containerFirstAttemptAllocationDelay; FSQueueMetrics(MetricsSystem ms, String queueName, Queue parent, boolean enableUserMetrics, Configuration conf) { @@ -96,6 +102,10 @@ public int getMaxShareMB() { public int getMaxShareVirtualCores() { return maxShareVCores.value(); } + + public void addContainerFirstAttemptAllocationDelay(long latency) { + containerFirstAttemptAllocationDelay.add(latency); + } public synchronized static FSQueueMetrics forQueue(String queueName, Queue parent, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 3fc3019a8b58ac8979e544f4876919c9de0a43e7..65854b85ed6246c48a32f316ae62a97a256319bf 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.ContainersAndNMTokensAllocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; @@ -879,6 +880,10 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, SchedulerUtils.normalizeRequests(ask, new DominantResourceCalculator(), clusterResource, minimumAllocation, maximumAllocation, incrAllocation); + // Record container allocation start time + long start = clock.getTime(); + application.setAllocationRequestStart(start); + // Set amResource for this app if (!application.getUnmanagedAM() && ask.size() == 1 && application.getLiveContainers().isEmpty()) { @@ -921,6 +926,19 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, application.updateBlacklist(blacklistAdditions, blacklistRemovals); ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens(); + + // Calculate container allocation latency + if (!(allocation.getContainerList().isEmpty())) { + SchedulerApplicationAttempt attempt = getApplicationAttempt(appAttemptId); + long allocRequestStart = clock.getTime(); + boolean changed = attempt.setContainerFirstAllocation(allocRequestStart); + if (changed) { + rootMetrics.addContainerFirstAttemptAllocationDelay( + attempt.getContainerFirstAllocation() + - attempt.getAllocationRequestStart()); + } + } + return new Allocation(allocation.getContainerList(), application.getHeadroom(), preemptionContainerIds, null, null, allocation.getNMTokenList());