diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6b660f7..ffe90c1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -955,13 +955,24 @@ private static void addDeprecatedKeys() { public static final int DEFAULT_NM_WEBAPP_HTTPS_PORT = 8044; public static final String DEFAULT_NM_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:" + DEFAULT_NM_WEBAPP_HTTPS_PORT; - + + /** How often to monitor resource in a node.*/ + public final static String NM_RESOURCE_MON_INTERVAL_MS = + NM_PREFIX + "resource-monitor.interval-ms"; + public final static int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000; + /** How often to monitor containers.*/ + @Deprecated public final static String NM_CONTAINER_MON_INTERVAL_MS = NM_PREFIX + "container-monitor.interval-ms"; + @Deprecated public final static int DEFAULT_NM_CONTAINER_MON_INTERVAL_MS = 3000; + /** Class that calculates current resource utilization.*/ + public static final String NM_RESOURCE_MON_CALCULATOR = + NM_PREFIX + "resource-calculator.class"; /** Class that calculates containers current resource utilization.*/ + @Deprecated public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR = NM_PREFIX + "container-monitor.resource-calculator.class"; /** Class that calculates process tree resource utilization.*/ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java index be13d22..6c5a15a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java @@ -19,7 +19,15 @@ package org.apache.hadoop.yarn.server.nodemanager; import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.server.api.records.ResourceUtilization; +/** + * Interface for monitoring the resources of a node. + */ public interface NodeResourceMonitor extends Service { - + /** + * Get the resource utilization of the node. + * @return resource utilization of the node. + */ + public ResourceUtilization getUtilization(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java index ea82546..4471c00 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java @@ -18,13 +18,153 @@ package org.apache.hadoop.yarn.server.nodemanager; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; +/** + * Implementation of the node resource monitor. It periodically tracks the + * resource utilization of the node and reports it to the NM. + */ public class NodeResourceMonitorImpl extends AbstractService implements NodeResourceMonitor { + /** Logging infrastructure. */ + final static Log LOG = LogFactory + .getLog(NodeResourceMonitorImpl.class); + + /** Interval to monitor the node resource utilization. */ + private long monitoringInterval; + /** Thread to monitor the node resource utilization. */ + private MonitoringThread monitoringThread; + + /** Resource calculator. */ + private ResourceCalculatorPlugin resourceCalculatorPlugin; + + /** Current resource utilization of the node. */ + private ResourceUtilization nodeUtilization; + + /** + * Initialize the node resource monitor. + */ public NodeResourceMonitorImpl() { super(NodeResourceMonitorImpl.class.getName()); + + this.monitoringThread = new MonitoringThread(); + } + + /** + * Initialize the service with the proper parameters. + */ + @Override + protected void serviceInit(Configuration conf) throws Exception { + this.monitoringInterval = + conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS); + + Class clazz = + conf.getClass(YarnConfiguration.NM_RESOURCE_MON_CALCULATOR, null, + ResourceCalculatorPlugin.class); + + this.resourceCalculatorPlugin = + ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); + + LOG.info(" Using ResourceCalculatorPlugin : " + + this.resourceCalculatorPlugin); } + /** + * Check if we should be monitoring. + * @return true if we can monitor the node resource utilization. + */ + private boolean isEnabled() { + if (resourceCalculatorPlugin == null) { + LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + + this.getClass().getName() + " is disabled."); + return false; + } + return true; + } + + /** + * Start the thread that does the node resource utilization monitoring. + */ + @Override + protected void serviceStart() throws Exception { + if (this.isEnabled()) { + this.monitoringThread.start(); + } + super.serviceStart(); + } + + /** + * Stop the thread that does the node resource utilization monitoring. + */ + @Override + protected void serviceStop() throws Exception { + if (this.isEnabled()) { + this.monitoringThread.interrupt(); + try { + this.monitoringThread.join(10 * 1000); + } catch (InterruptedException e) { + LOG.info("Could not wait for the node resource monitoring thread"); + } + } + super.serviceStop(); + } + + /** + * Thread that monitors the resource utilization of this node. + */ + private class MonitoringThread extends Thread { + /** + * Initialize the node resource monitoring thread. + */ + public MonitoringThread() { + super("Node Resource Monitor"); + this.setDaemon(true); + } + + /** + * Periodically monitor the resource utilization of the node. + */ + @Override + public void run() { + while (true) { + // Get node utilization and save it into the health status + long pmem = resourceCalculatorPlugin.getPhysicalMemorySize() - + resourceCalculatorPlugin.getAvailablePhysicalMemorySize(); + long vmem = + resourceCalculatorPlugin.getVirtualMemorySize() + - resourceCalculatorPlugin.getAvailableVirtualMemorySize(); + float cpu = resourceCalculatorPlugin.getCpuUsage(); + nodeUtilization = + ResourceUtilization.newInstance( + (int) (pmem >> 20), // B -> MB + (int) (vmem >> 20), // B -> MB + cpu); // 1 CPU at 100% is 1 + + try { + Thread.sleep(monitoringInterval); + } catch (InterruptedException e) { + LOG.warn(NodeResourceMonitorImpl.class.getName() + + " is interrupted. Exiting."); + break; + } + } + } + } + + /** + * Get the resource utilization of the node. + * @return resource utilization of the node. + */ + @Override + public ResourceUtilization getUtilization() { + return this.nodeUtilization; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 57d1bad..8929ed1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -99,11 +99,11 @@ public ContainersMonitorImpl(ContainerExecutor exec, @Override protected void serviceInit(Configuration conf) throws Exception { this.monitoringInterval = - conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, - YarnConfiguration.DEFAULT_NM_CONTAINER_MON_INTERVAL_MS); + conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS); Class clazz = - conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, null, + conf.getClass(YarnConfiguration.NM_RESOURCE_MON_CALCULATOR, null, ResourceCalculatorPlugin.class); this.resourceCalculatorPlugin = ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java new file mode 100644 index 0000000..3c2c386 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; + +import org.junit.Test; + +public class TestNodeResourceMonitor extends BaseContainerManagerTest { + public TestNodeResourceMonitor() throws UnsupportedFileSystemException { + super(); + } + + @Test + public void testNodeResourceMonitor() { + NodeResourceMonitor nrm = new NodeResourceMonitorImpl(); + } +}