.../nodemanager/containermanager/ContainerManagerImpl.java | 2 ++ .../containermanager/application/ApplicationImpl.java | 3 +++ .../yarn/server/nodemanager/metrics/NodeManagerMetrics.java | 10 ++++++++++ .../containermanager/TestContainerManagerRecovery.java | 9 ++++----- .../server/nodemanager/metrics/TestNodeManagerMetrics.java | 12 ++++++++---- 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 5eb36ba558e..ee1a5bf2852 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -442,6 +442,7 @@ private void recoverApplication(ContainerManagerApplicationProto p) ApplicationImpl app = new ApplicationImpl(dispatcher, p.getUser(), fc, appId, creds, context, p.getAppLogAggregationInitedTime()); context.getApplications().put(appId, app); + metrics.runningApplication(); app.handle(new ApplicationInitEvent(appId, acls, logAggregationContext)); } @@ -1137,6 +1138,7 @@ protected void startContainerInternal( applicationID, credentials, context); if (context.getApplications().putIfAbsent(applicationID, application) == null) { + metrics.runningApplication(); LOG.info("Creating a new application reference for app " + applicationID); LogAggregationContext logAggregationContext = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java index 8fe9651045c..d42097430cc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java @@ -623,6 +623,9 @@ public void transition(ApplicationImpl app, ApplicationEvent event) { public void transition(ApplicationImpl app, ApplicationEvent event) { ApplicationId appId = event.getApplicationID(); app.context.getApplications().remove(appId); + if (null != app.context.getNodeManagerMetrics()) { + app.context.getNodeManagerMetrics().endRunningApplication(); + } app.aclsManager.removeApplication(appId); try { app.context.getNMStateStore().removeApplication(appId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 848b9445289..a469f653ebc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -100,6 +100,8 @@ MutableGaugeFloat nodeCpuUtilization; @Metric("Current GPU utilization") MutableGaugeFloat nodeGpuUtilization; + @Metric("Current running apps") + MutableGaugeInt applicationsRunning; @Metric("Missed localization requests in bytes") MutableCounterLong localizedCacheMissBytes; @@ -187,6 +189,14 @@ public void endReInitingContainer() { containersReIniting.decr(); } + public void runningApplication() { + applicationsRunning.incr(); + } + + public void endRunningApplication() { + applicationsRunning.decr(); + } + public void pausedContainer() { containersPaused.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index c67ae86f95e..9f21d646bd7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -85,7 +85,6 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -105,7 +104,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; - +import org.apache.hadoop.yarn.server.nodemanager.health.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.TestNodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; @@ -438,7 +437,7 @@ public void testNodeManagerMetricsRecovery() throws Exception { org.apache.hadoop.yarn.server.nodemanager .containermanager.container.ContainerState.RUNNING); TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, - 1, 1, 1, 9, 1, 7, 0F); + 1, 1, 1, 9, 1, 7, 0F, 1); // restart and verify metrics could be recovered cm.stop(); @@ -446,7 +445,7 @@ public void testNodeManagerMetricsRecovery() throws Exception { metrics = NodeManagerMetrics.create(); metrics.addResource(Resource.newInstance(10240, 8)); TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0, - 0, 0, 10, 0, 8, 0F); + 0, 0, 10, 0, 8, 0F, 1); context = createContext(conf, stateStore); cm = createContainerManager(context, delSrvc); cm.init(conf); @@ -455,7 +454,7 @@ public void testNodeManagerMetricsRecovery() throws Exception { app = context.getApplications().get(appId); assertNotNull(app); TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, - 1, 1, 1, 9, 1, 7, 0F); + 1, 1, 1, 9, 1, 7, 0F, 1); cm.stop(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index 37454747c92..d612b2617f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -102,13 +102,17 @@ public void testReferenceOfSingletonJvmMetrics() { // Set node gpu utilization metrics.setNodeGpuUtilization(35.5F); + + // ApplicationsRunning expected to be 1 + metrics.runningApplication(); + metrics.runningApplication(); + metrics.endRunningApplication(); // availableGB is expected to be floored, // while allocatedGB is expected to be ceiled. // allocatedGB: 3.75GB allocated memory is shown as 4GB // availableGB: 4.25GB available memory is shown as 4GB - checkMetrics(10, 1, 1, 1, 1, - 1, 4, 7, 4, 13, 3, 35.5F); + checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3, 35.5F, 1); // Update resource and check available resource again metrics.addResource(total); @@ -120,7 +124,7 @@ public void testReferenceOfSingletonJvmMetrics() { public static void checkMetrics(int launched, int completed, int failed, int killed, int initing, int running, int allocatedGB, int allocatedContainers, int availableGB, int allocatedVCores, - int availableVCores, Float nodeGpuUtilization) { + int availableVCores, Float nodeGpuUtilization, int applicationsRunning) { MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics"); assertCounter("ContainersLaunched", launched, rb); assertCounter("ContainersCompleted", completed, rb); @@ -134,6 +138,6 @@ public static void checkMetrics(int launched, int completed, int failed, assertGauge("AvailableGB", availableGB, rb); assertGauge("AvailableVCores",availableVCores, rb); assertGauge("NodeGpuUtilization", nodeGpuUtilization, rb); - + assertGauge("ApplicationsRunning", applicationsRunning, rb); } }