diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java index 942ec81..5fa36bc 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java @@ -30,6 +30,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableRate; import com.google.common.annotations.VisibleForTesting; @InterfaceAudience.Private @@ -43,7 +44,9 @@ @Metric("# of lost NMs") MutableGaugeInt numLostNMs; @Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs; @Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs; - + @Metric("AM container launch delay") MutableRate aMLaunchDelay; + @Metric("AM register delay") MutableRate aMRegisterDelay; + private static final MetricsInfo RECORD_INFO = info("ClusterMetrics", "Metrics for the Yarn Cluster"); @@ -147,4 +150,12 @@ public void decrNumActiveNodes() { numActiveNMs.decr(); } + public void addAMLaunchDelay(long delay) { + aMLaunchDelay.add(delay); + } + + public void addAMRegisterDelay(long delay) { + aMRegisterDelay.add(delay); + } + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 0d7e334..87f67e9 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService; +import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; @@ -152,8 +153,9 @@ private String proxiedTrackingUrl = "N/A"; private long startTime = 0; private long finishTime = 0; + private long launchAMStartTime = 0; - // Set to null initially. Will eventually get set + // Set to null initially. Will eventually get set // if an RMAppAttemptUnregistrationEvent occurs private FinalApplicationStatus finalStatus = null; private final StringBuilder diagnostics = new StringBuilder(); @@ -1261,6 +1263,12 @@ public void transition(RMAppAttemptImpl appAttempt, @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { + if (event.getType() == RMAppAttemptEventType.LAUNCHED) { + long delay = System.currentTimeMillis() - + appAttempt.launchAMStartTime; + ClusterMetrics.getMetrics().addAMLaunchDelay(delay); + appAttempt.launchAMStartTime = System.currentTimeMillis(); + } // Register with AMLivelinessMonitor appAttempt.attemptLaunched(); @@ -1345,7 +1353,8 @@ public void transition(RMAppAttemptImpl appAttempt, @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { - + long delay = System.currentTimeMillis() - appAttempt.launchAMStartTime; + ClusterMetrics.getMetrics().addAMRegisterDelay(delay); RMAppAttemptRegistrationEvent registrationEvent = (RMAppAttemptRegistrationEvent) event; appAttempt.host = registrationEvent.getHost(); @@ -1822,6 +1831,7 @@ public YarnApplicationAttemptState createApplicationAttemptState() { } private void launchAttempt(){ + launchAMStartTime = System.currentTimeMillis(); // Send event to launch the AM Container eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this)); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 7c12848..8239574 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -116,6 +116,12 @@ public void testDecommissionWithIncludeHosts() throws Exception { ClusterMetrics metrics = ClusterMetrics.getMetrics(); assert(metrics != null); int metricCount = metrics.getNumDecommisionedNMs(); + Assert.assertTrue(!metrics.aMLaunchDelay.changed()); + Assert.assertTrue(!metrics.aMRegisterDelay.changed()); + metrics.addAMLaunchDelay(1); + metrics.addAMRegisterDelay(1); + Assert.assertTrue(metrics.aMLaunchDelay.changed()); + Assert.assertTrue(metrics.aMRegisterDelay.changed()); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));