Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java (revision 877ef944f96eb986d0816f8a0491981f31858289) +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java (revision 3134c90201cf703368b93e8166afe5a80d8bf696) @@ -51,6 +51,8 @@ private MutableGaugeInt numAppsFailedRetrieved; @Metric("# of multiple applications reports failed to be retrieved") private MutableGaugeInt numMultipleAppsFailedRetrieved; + @Metric("# of cluster metrics failed to be retrieved") + private MutableGaugeInt numClusteMetricsFailedRetrieved; // Aggregate metrics are shared, and don't have to be looked up per call @Metric("Total number of successful Submitted apps and latency(ms)") @@ -64,6 +66,9 @@ @Metric("Total number of successful Retrieved multiple apps reports and " + "latency(ms)") private MutableRate totalSucceededMultipleAppsRetrieved; + @Metric("Total number of successful Retrieved cluster metrics and " + + "latency(ms)") + private MutableRate totalSucceededClusterMetricsRetrieved; /** * Provide quantile counters for all latencies. @@ -73,6 +78,7 @@ private MutableQuantiles killApplicationLatency; private MutableQuantiles getApplicationReportLatency; private MutableQuantiles getApplicationsReportLatency; + private MutableQuantiles getClusterMetricsLatency; private static volatile RouterMetrics INSTANCE = null; private static MetricsRegistry registry; @@ -92,6 +98,8 @@ getApplicationsReportLatency = registry.newQuantiles("getApplicationsReportLatency", "latency of get applications report", "ops", "latency", 10); + getClusterMetricsLatency = registry.newQuantiles("getClusterMetricsLatency", + "latency of get cluster metrics", "ops", "latency", 10); } public static RouterMetrics getMetrics() { @@ -213,6 +221,11 @@ getApplicationsReportLatency.add(duration); } + public void successedClusterMetricsRetrieved(long duration) { + totalSucceededClusterMetricsRetrieved.add(duration); + getClusterMetricsLatency.add(duration); + } + public void incrAppsFailedCreated() { numAppsFailedCreated.incr(); } @@ -233,4 +246,8 @@ numMultipleAppsFailedRetrieved.incr(); } + public void incrClusterNodesFailedRetrieved() { + numClusteMetricsFailedRetrieved.incr(); + } + } \ No newline at end of file Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java (revision 877ef944f96eb986d0816f8a0491981f31858289) +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java (revision 3134c90201cf703368b93e8166afe5a80d8bf696) @@ -608,15 +608,29 @@ @Override public GetClusterMetricsResponse getClusterMetrics( GetClusterMetricsRequest request) throws YarnException, IOException { - Map subclusters = - federationFacade.getSubClusters(true); - ClientMethod remoteMethod = new ClientMethod("getClusterMetrics", - new Class[] {GetClusterMetricsRequest.class}, new Object[] {request}); - ArrayList clusterList = new ArrayList<>(subclusters.keySet()); - Map clusterMetrics = - invokeConcurrent(clusterList, remoteMethod, - GetClusterMetricsResponse.class); - return RouterYarnClientUtils.merge(clusterMetrics.values()); + if (request == null) { + RouterServerUtil.logAndThrowException("Missing getClusterMetrics request.", null); + } + GetClusterMetricsResponse response = null; + long startTime = clock.getTime(); + try{ + Map subclusters = + federationFacade.getSubClusters(true); + ClientMethod remoteMethod = new ClientMethod("getClusterMetrics", + new Class[] {GetClusterMetricsRequest.class}, new Object[] {request}); + ArrayList clusterList = new ArrayList<>(subclusters.keySet()); + Map clusterMetrics = + invokeConcurrent(clusterList, remoteMethod, + GetClusterMetricsResponse.class); + response = RouterYarnClientUtils.merge(clusterMetrics.values()); + } catch(YarnException | IOException ex) { + routerMetrics.incrClusterNodesFailedRetrieved(); + LOG.error("No response when get cluster metrics."); + throw ex; + } + long stopTime = clock.getTime(); + routerMetrics.successedClusterMetricsRetrieved(stopTime - startTime); + return response; } Map invokeConcurrent(ArrayList clusterIds,