diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index a1c474f958d..6774cf0e5d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -125,4 +126,6 @@ ContainerStateTransitionListener getContainerStateTransitionListener(); ResourcePluginManager getResourcePluginManager(); + + NodeManagerMetrics getNodeManagerMetrics(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index bddc7c34ee7..b55edcf44d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -205,7 +205,7 @@ protected NodeLabelsProvider createNodeLabelsProvider(Configuration conf) } protected NodeResourceMonitor createNodeResourceMonitor() { - return new NodeResourceMonitorImpl(); + return new NodeResourceMonitorImpl(context); } protected ContainerManagerImpl createContainerManager(Context context, @@ -241,7 +241,7 @@ protected NMContext createNMContext( ContainerStateTransitionListener.class); NMContext nmContext = new NMContext(containerTokenSecretManager, nmTokenSecretManager, dirsHandler, aclsManager, stateStore, - isDistSchedulerEnabled, conf); + metrics, isDistSchedulerEnabled, conf); DefaultContainerStateListener defaultListener = new DefaultContainerStateListener(); nmContext.setContainerStateTransitionListener(defaultListener); @@ -574,6 +574,8 @@ protected void reregisterCollectors() { private Configuration conf = null; + private NodeManagerMetrics metrics = null; + protected final ConcurrentMap applications = new ConcurrentHashMap(); @@ -621,8 +623,8 @@ protected void reregisterCollectors() { public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager, - NMStateStoreService stateStore, boolean isDistSchedulingEnabled, - Configuration conf) { + NMStateStoreService stateStore, NodeManagerMetrics nmMetrics, + boolean isDistSchedulingEnabled, Configuration conf) { if (YarnConfiguration.timelineServiceV2Enabled(conf)) { this.registeringCollectors = new ConcurrentHashMap<>(); this.knownCollectors = new ConcurrentHashMap<>(); @@ -639,6 +641,17 @@ public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, LogAggregationReport>(); this.isDistSchedulingEnabled = isDistSchedulingEnabled; this.conf = conf; + this.metrics = nmMetrics; + } + + public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, + NMTokenSecretManagerInNM nmTokenSecretManager, + LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager, + NMStateStoreService stateStore, + boolean isDistSchedulingEnabled, Configuration conf) { + this(containerTokenSecretManager, nmTokenSecretManager, + dirsHandler, aclsManager, stateStore, null, + isDistSchedulingEnabled, conf); } /** @@ -823,6 +836,11 @@ public ResourcePluginManager getResourcePluginManager() { return resourcePluginManager; } + @Override + public NodeManagerMetrics getNodeManagerMetrics() { + return metrics; + } + public void setResourcePluginManager( ResourcePluginManager resourcePluginManager) { this.resourcePluginManager = resourcePluginManager; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java index 8b96ba58749..850ae694cc0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java @@ -22,6 +22,7 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,12 +49,14 @@ /** Current resource utilization of the node. */ private ResourceUtilization nodeUtilization; + private Context nmContext; + /** * Initialize the node resource monitor. */ - public NodeResourceMonitorImpl() { + public NodeResourceMonitorImpl(Context context) { super(NodeResourceMonitorImpl.class.getName()); - + this.nmContext = context; this.monitoringThread = new MonitoringThread(); } @@ -149,6 +152,13 @@ public void run() { (int) (vmem >> 20), // B -> MB vcores); // Used Virtual Cores + // Publish the node utilization metrics to node manager + // metrics system. + NodeManagerMetrics nmMetrics = nmContext.getNodeManagerMetrics(); + nmMetrics.setNodeUsedMemGB(nodeUtilization.getPhysicalMemory()); + nmMetrics.setNodeUsedVMemGB(nodeUtilization.getVirtualMemory()); + nmMetrics.setNodeCpuUtilization(nodeUtilization.getCPU()); + try { Thread.sleep(monitoringInterval); } catch (InterruptedException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 2b99cc7254b..52e716ac5ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -488,6 +489,16 @@ public void run() { // Save the aggregated utilization of the containers setContainersUtilization(trackedContainersUtilization); + // Publish the container utilization metrics to node manager + // metrics system. + NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics(); + nmMetrics.setContainerUsedMemGB(trackedContainersUtilization + .getPhysicalMemory()); + nmMetrics.setContainerUsedVMemGB(trackedContainersUtilization + .getVirtualMemory()); + nmMetrics.setContainerCpuUtilization(trackedContainersUtilization + .getCPU()); + try { Thread.sleep(monitoringInterval); } catch (InterruptedException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index a59bb5c6098..f0abfd4ff75 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -24,6 +24,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeFloat; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.yarn.api.records.Resource; @@ -77,6 +78,18 @@ MutableGaugeLong publicBytesDeleted; @Metric("# of bytes deleted from the private local cache") MutableGaugeLong privateBytesDeleted; + @Metric("Current used physical memory by all containers in GB") + MutableGaugeInt containerUsedMemGB; + @Metric("Current used virtual memory by all containers in GB") + MutableGaugeInt containerUsedVMemGB; + @Metric("Aggregated CPU utilization of all containers") + MutableGaugeFloat containerCpuUtilization; + @Metric("Current used memory by this node in GB") + MutableGaugeInt nodeUsedMemGB; + @Metric("Current used virtual memory by this node in GB") + MutableGaugeInt nodeUsedVMemGB; + @Metric("Current CPU utilization") + MutableGaugeFloat nodeCpuUtilization; // CHECKSTYLE:ON:VisibilityModifier @@ -316,4 +329,52 @@ public long getPublicBytesDeleted() { public long getPrivateBytesDeleted() { return this.privateBytesDeleted.value(); } + + public void setContainerUsedMemGB(long usedMem) { + this.containerUsedMemGB.set((int)Math.floor(usedMem/1024d)); + } + + public int getContainerUsedMemGB() { + return this.containerUsedMemGB.value(); + } + + public void setContainerUsedVMemGB(long usedVMem) { + this.containerUsedVMemGB.set((int)Math.floor(usedVMem/1024d)); + } + + public int getContainerUsedVMemGB() { + return this.containerUsedVMemGB.value(); + } + + public void setContainerCpuUtilization(float cpuUtilization) { + this.containerCpuUtilization.set(cpuUtilization); + } + + public float getContainerCpuUtilization() { + return this.containerCpuUtilization.value(); + } + + public void setNodeUsedMemGB(long totalUsedMemGB) { + this.nodeUsedMemGB.set((int)Math.floor(totalUsedMemGB/1024d)); + } + + public int getNodeUsedMemGB() { + return nodeUsedMemGB.value(); + } + + public void setNodeUsedVMemGB(long totalUsedVMemGB) { + this.nodeUsedVMemGB.set((int)Math.floor(totalUsedVMemGB/1024d)); + } + + public int getNodeUsedVMemGB() { + return nodeUsedVMemGB.value(); + } + + public float getNodeCpuUtilization() { + return nodeCpuUtilization.value(); + } + + public void setNodeCpuUtilization(float cpuUtilization) { + this.nodeCpuUtilization.set(cpuUtilization); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java index 3c2c3860ee7..9cf829a6fe5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java @@ -18,18 +18,46 @@ package org.apache.hadoop.yarn.server.nodemanager; +import java.io.IOException; + import org.apache.hadoop.fs.UnsupportedFileSystemException; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager + .BaseContainerManagerTest; +import org.apache.hadoop.yarn.server.nodemanager.containermanager + .monitor.MockResourceCalculatorPlugin; +import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.spy; public class TestNodeResourceMonitor extends BaseContainerManagerTest { public TestNodeResourceMonitor() throws UnsupportedFileSystemException { super(); } + @Before + public void setup() throws IOException { + // Enable node resource monitor with a mocked resource calculator. + conf.set( + YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, + MockResourceCalculatorPlugin.class.getCanonicalName()); + super.setup(); + } + @Test - public void testNodeResourceMonitor() { - NodeResourceMonitor nrm = new NodeResourceMonitorImpl(); + public void testMetricsUpdate() throws Exception { + // This test doesn't verify the correction of those metrics + // updated by the monitor, it only verifies that the monitor + // do publish these info to node manager metrics system in + // each monitor interval. + Context spyContext = spy(context); + NodeResourceMonitor nrm = new NodeResourceMonitorImpl(spyContext); + nrm.init(conf); + nrm.start(); + Thread.sleep(500); + Mockito.verify(spyContext, atLeastOnce()).getNodeManagerMetrics(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index da1d047d957..c81d2e733ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; @@ -787,5 +788,10 @@ public ContainerExecutor getContainerExecutor() { public ResourcePluginManager getResourcePluginManager() { return null; } + + @Override + public NodeManagerMetrics getNodeManagerMetrics() { + return null; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index fc9e6c417ce..ea44b6da91b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -121,8 +121,8 @@ public BaseContainerManagerTest() throws UnsupportedFileSystemException { protected Configuration conf = new YarnConfiguration(); protected Context context = new NMContext(new NMContainerTokenSecretManager( conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), new NMNullStateStoreService(), false, - conf) { + new ApplicationACLsManager(conf), new NMNullStateStoreService(), + metrics, false, conf) { public int getHttpPort() { return HTTP_PORT; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java index 0dc5c5b3ce7..64d117a5a5f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java @@ -70,4 +70,9 @@ public long getCumulativeCpuTime() { public float getCpuUsagePercentage() { return 0; } + + @Override + public float getNumVCoresUsed() { + return 0; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 6f7fadfa059..6f93678019c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -22,6 +22,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.atLeastOnce; import java.io.BufferedReader; import java.io.File; @@ -67,7 +69,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin; import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; @@ -75,6 +76,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; import org.slf4j.LoggerFactory; public class TestContainersMonitor extends BaseContainerManagerTest { @@ -95,6 +97,22 @@ public void setup() throws IOException { super.setup(); } + @Test + public void testMetricsUpdate() throws Exception { + // This test doesn't verify the correction of those metrics + // updated by the monitor, it only verifies that the monitor + // do publish these info to node manager metrics system in + // each monitor interval. + Context spyContext = spy(context); + ContainersMonitorImpl cm = + new ContainersMonitorImpl(mock(ContainerExecutor.class), + mock(AsyncDispatcher.class), spyContext); + cm.init(getConfForCM(false, true, 1024, 2.1f)); + cm.start(); + Thread.sleep(500); + Mockito.verify(spyContext, atLeastOnce()).getNodeManagerMetrics(); + } + /** * Test to verify the check for whether a process tree is over limit or not. *