.../nodemanager/containermanager/ContainerManagerImpl.java | 2 ++ .../containermanager/application/ApplicationImpl.java | 1 + .../yarn/server/nodemanager/metrics/NodeManagerMetrics.java | 12 +++++++++++- .../containermanager/TestContainerManagerRecovery.java | 6 +++--- .../server/nodemanager/metrics/TestNodeManagerMetrics.java | 11 ++++++++--- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 5eb36ba558e..668b210fea4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -442,6 +442,7 @@ private void recoverApplication(ContainerManagerApplicationProto p) ApplicationImpl app = new ApplicationImpl(dispatcher, p.getUser(), fc, appId, creds, context, p.getAppLogAggregationInitedTime()); context.getApplications().put(appId, app); + metrics.runningApplication(); app.handle(new ApplicationInitEvent(appId, acls, logAggregationContext)); } @@ -1137,6 +1138,7 @@ protected void startContainerInternal( applicationID, credentials, context); if (context.getApplications().putIfAbsent(applicationID, application) == null) { + metrics.runningApplication(); LOG.info("Creating a new application reference for app " + applicationID); LogAggregationContext logAggregationContext = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java index 8fe9651045c..dfa8f410abf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java @@ -623,6 +623,7 @@ public void transition(ApplicationImpl app, ApplicationEvent event) { public void transition(ApplicationImpl app, ApplicationEvent event) { ApplicationId appId = event.getApplicationID(); app.context.getApplications().remove(appId); + app.context.getNodeManagerMetrics().endRunningApplication(); app.aclsManager.removeApplication(appId); try { app.context.getNMStateStore().removeApplication(appId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index abe45298168..501405a820e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -98,7 +98,9 @@ MutableGaugeInt nodeUsedVMemGB; @Metric("Current CPU utilization") MutableGaugeFloat nodeCpuUtilization; - + @Metric("Current running apps") + MutableGaugeInt applicationsRunning; + @Metric("Missed localization requests in bytes") MutableCounterLong localizedCacheMissBytes; @Metric("Cached localization requests in bytes") @@ -185,6 +187,14 @@ public void endReInitingContainer() { containersReIniting.decr(); } + public void runningApplication() { + applicationsRunning.incr(); + } + + public void endRunningApplication() { + applicationsRunning.decr(); + } + public void pausedContainer() { containersPaused.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 826cc02219b..22e2f8961dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -437,14 +437,14 @@ public void testNodeManagerMetricsRecovery() throws Exception { waitForNMContainerState(cm, cid, org.apache.hadoop.yarn.server.nodemanager .containermanager.container.ContainerState.RUNNING); - TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, 1, 1, 1, 9, 1, 7); + TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, 1, 1, 1, 9, 1, 7, 1); // restart and verify metrics could be recovered cm.stop(); DefaultMetricsSystem.shutdown(); metrics = NodeManagerMetrics.create(); metrics.addResource(Resource.newInstance(10240, 8)); - TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 8); + TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 8, 1); context = createContext(conf, stateStore); cm = createContainerManager(context, delSrvc); cm.init(conf); @@ -452,7 +452,7 @@ public void testNodeManagerMetricsRecovery() throws Exception { assertEquals(1, context.getApplications().size()); app = context.getApplications().get(appId); assertNotNull(app); - TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, 1, 1, 1, 9, 1, 7); + TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, 1, 1, 1, 9, 1, 7, 1); cm.stop(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index c5f80ba958a..ab5b01fdfec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -100,11 +100,16 @@ public void testReferenceOfSingletonJvmMetrics() { metrics.addContainerLaunchDuration(1); Assert.assertTrue(metrics.containerLaunchDuration.changed()); + // ApplicationsRunning expected to be 1 + metrics.runningApplication(); + metrics.runningApplication(); + metrics.endRunningApplication(); + // availableGB is expected to be floored, // while allocatedGB is expected to be ceiled. // allocatedGB: 3.75GB allocated memory is shown as 4GB // availableGB: 4.25GB available memory is shown as 4GB - checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3); + checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3, 1); // Update resource and check available resource again metrics.addResource(total); @@ -116,7 +121,7 @@ public void testReferenceOfSingletonJvmMetrics() { public static void checkMetrics(int launched, int completed, int failed, int killed, int initing, int running, int allocatedGB, int allocatedContainers, int availableGB, int allocatedVCores, - int availableVCores) { + int availableVCores, int applicationsRunning) { MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics"); assertCounter("ContainersLaunched", launched, rb); assertCounter("ContainersCompleted", completed, rb); @@ -129,6 +134,6 @@ public static void checkMetrics(int launched, int completed, int failed, assertGauge("AllocatedContainers", allocatedContainers, rb); assertGauge("AvailableGB", availableGB, rb); assertGauge("AvailableVCores",availableVCores, rb); - + assertGauge("ApplicationsRunning", applicationsRunning, rb); } }