diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index d5b6ce669873646173fb4501594e10db677b4698..ea80907d700699603172945355e8f2658ca76595 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -93,6 +93,8 @@ private boolean unmanagedAM = true; private boolean amRunning = false; private LogAggregationContext logAggregationContext; + private volatile long firstAllocationRequestSentTime = 0; + private volatile long firstContainerAllocatedTime = 0; protected List newlyAllocatedContainers = new ArrayList(); @@ -247,6 +249,38 @@ public synchronized int getReReservations(Priority priority) { } /** + * Requires locking by calling method + */ + public boolean trySetFirstAllocationRequestSentTime(long value) { + if (firstAllocationRequestSentTime!=0) { + return false; + } else { + firstAllocationRequestSentTime = value; + return true; + } + } + + public long getFirstAllocationRequestSentTime() { + return firstAllocationRequestSentTime; + } + + /** + * Requires locking by calling method + */ + public boolean trySetFirstContainerAllocatedTime(long value) { + if (firstContainerAllocatedTime!=0) { + return false; + } else { + firstContainerAllocatedTime = value; + return true; + } + } + + public long getFirstContainerAllocatedTime() { + return firstContainerAllocatedTime; + } + + /** * Get total current reservations. * Used only by unit tests * @return total current reservations diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java index 82c422b820781486d225a8b07154b0121056a0a4..054930da8f929d3760970681a4d049359cfcabf6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java @@ -24,6 +24,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; @@ -39,6 +40,9 @@ @Metric("Minimum share of CPU in vcores") MutableGaugeInt minShareVCores; @Metric("Maximum share of memory in MB") MutableGaugeInt maxShareMB; @Metric("Maximum share of CPU in vcores") MutableGaugeInt maxShareVCores; + + @Metric("Container First Attempt Allocation Delay") MutableRate + containerFirstAttemptAllocationDelay; FSQueueMetrics(MetricsSystem ms, String queueName, Queue parent, boolean enableUserMetrics, Configuration conf) { @@ -96,6 +100,10 @@ public int getMaxShareMB() { public int getMaxShareVirtualCores() { return maxShareVCores.value(); } + + public void addContainerFirstAttemptAllocationDelay(long latency) { + containerFirstAttemptAllocationDelay.add(latency); + } public synchronized static FSQueueMetrics forQueue(String queueName, Queue parent, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 1ace6040b25bb512575ac8863b780ddddbac505a..9e83f715b70eb0d460fd52cae2b5c994da25fd6d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -68,6 +68,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.ContainersAndNMTokensAllocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; @@ -896,6 +897,12 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, clusterResource, minimumAllocation, getMaximumResourceCapability(), incrAllocation); + // Record container allocation start time + long start = getClock().getTime(); + synchronized (application) { + boolean initialized = application.trySetFirstAllocationRequestSentTime(start); + } + // Set amResource for this app if (!application.getUnmanagedAM() && ask.size() == 1 && application.getLiveContainers().isEmpty()) { @@ -938,6 +945,29 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, application.updateBlacklist(blacklistAdditions, blacklistRemovals); ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens(); + + // Calculate container allocation latency + if (!(allocation.getContainerList().isEmpty())) { + long firstContainerAllocatedTime = getClock().getTime(); + boolean changed = application + .trySetFirstContainerAllocatedTime(firstContainerAllocatedTime); + if (changed) { + long timediff = application.getFirstContainerAllocatedTime() + - application.getFirstAllocationRequestSentTime(); + if (timediff>0) { + rootMetrics.addContainerFirstAttemptAllocationDelay(timediff); + } + RMApp rmApp = rmContext.getRMApps().get(appAttemptId.getApplicationId()); + if (rmApp!=null) { + String metricsQueue = rmApp.getQueue(); + if (metricsQueue!=null) { + FSQueue queue = queueMgr.getQueue(metricsQueue); + queue.getMetrics().addContainerFirstAttemptAllocationDelay(timediff); + } + } + } + } + return new Allocation(allocation.getContainerList(), application.getHeadroom(), preemptionContainerIds, null, null, allocation.getNMTokenList());