diff --git hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
index 45aa868830c..e6dcefb2099 100644
--- hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@@ -633,4 +633,12 @@
+
+
+
+
+
+
+
+
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
index 530d8c91ae5..724fd7f14de 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
@@ -18,10 +18,15 @@
package org.apache.hadoop.yarn.api.records;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import java.util.Map;
+import com.google.common.collect.Lists;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.NotImplementedException;
+import org.apache.curator.shaded.com.google.common.reflect.ClassPath;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
@@ -232,6 +237,22 @@ public void setMemorySize(long memory) {
return resources;
}
+ /**
+ * Get list of resource information, this will be used by JAXB.
+ * @return list of resources copy.
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ public List getAllResourcesListCopy() {
+ List list = new ArrayList<>();
+ for (ResourceInformation i : resources) {
+ ResourceInformation ri = new ResourceInformation();
+ ResourceInformation.copy(i, ri);
+ list.add(ri);
+ }
+ return list;
+ }
+
/**
* Get ResourceInformation for a specified resource.
*
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
index e8280ba9cac..a4c1f6c6de3 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java
@@ -18,10 +18,13 @@
package org.apache.hadoop.yarn.api.records;
+import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
import org.apache.hadoop.yarn.util.UnitsConversionUtil;
+import java.util.Map;
+
/**
* Class to encapsulate information about a Resource - the name of the resource,
* the units(milli, micro, etc), the type(countable), and the value.
@@ -35,13 +38,20 @@
private long minimumAllocation;
private long maximumAllocation;
+ // Known resource types
public static final String MEMORY_URI = "memory-mb";
public static final String VCORES_URI = "vcores";
+ public static final String GPU_URI = "yarn.io/gpu";
public static final ResourceInformation MEMORY_MB =
ResourceInformation.newInstance(MEMORY_URI, "Mi");
public static final ResourceInformation VCORES =
ResourceInformation.newInstance(VCORES_URI);
+ public static final ResourceInformation GPUS =
+ ResourceInformation.newInstance(GPU_URI);
+
+ public static final Map MANDATORY_RESOURCES =
+ ImmutableMap.of(MEMORY_URI, MEMORY_MB, VCORES_URI, VCORES, GPU_URI, GPUS);
/**
* Get the name for the resource.
@@ -215,6 +225,12 @@ public static ResourceInformation newInstance(String name, String units,
Long.MAX_VALUE);
}
+ public static ResourceInformation newInstance(String name, String units,
+ long minRes, long maxRes) {
+ return ResourceInformation.newInstance(name, units, 0L,
+ ResourceTypes.COUNTABLE, minRes, maxRes);
+ }
+
public static ResourceInformation newInstance(String name, long value) {
return ResourceInformation
.newInstance(name, "", value, ResourceTypes.COUNTABLE, 0L,
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 6c65b197981..4bde72009ec 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1430,6 +1430,39 @@ public static boolean isAclEnabled(Configuration conf) {
public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT =
NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit";
+ /**
+ * Prefix for computation resources, example of computation resources like
+ * GPU / FPGA / TPU, etc.
+ */
+ @Private
+ public static final String NM_RESOURCE_PLUGINS =
+ NM_PREFIX + "resource-plugins";
+
+ /**
+ * Prefix for gpu configurations. Work in progress: This configuration
+ * parameter may be changed/removed in the future.
+ */
+ @Private
+ public static final String NM_GPU_RESOURCE_PREFIX =
+ NM_RESOURCE_PLUGINS + ".gpu.";
+
+ @Private
+ public static final String NM_GPU_ALLOWED_DEVICES =
+ NM_GPU_RESOURCE_PREFIX + "allowed-gpu-devices";
+ @Private
+ public static final String AUTOMATICALLY_DISCOVER_GPU_DEVICES = "auto";
+
+ /**
+ * This setting controls where to how to invoke GPU binaries
+ */
+ @Private
+ public static final String NM_GPU_PATH_TO_EXEC =
+ NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables";
+
+ @Private
+ public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = "";
+
+
/** NM Webapp address.**/
public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address";
public static final int DEFAULT_NM_WEBAPP_PORT = 8042;
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
index b1d0b754c4f..6e8eb8115c1 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java
@@ -47,6 +47,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
/**
* Helper class to read the resource-types to be supported by the system.
*/
@@ -89,33 +91,32 @@ private static void checkMandatoryResources(
*/
String key = "memory";
if (resourceInformationMap.containsKey(key)) {
- LOG.warn("Attempt to define resource '" + key +
- "', but it is not allowed.");
- throw new YarnRuntimeException("Attempt to re-define mandatory resource '"
- + key + "'.");
- }
-
- if (resourceInformationMap.containsKey(MEMORY)) {
- ResourceInformation memInfo = resourceInformationMap.get(MEMORY);
- String memUnits = ResourceInformation.MEMORY_MB.getUnits();
- ResourceTypes memType = ResourceInformation.MEMORY_MB.getResourceType();
- if (!memInfo.getUnits().equals(memUnits) || !memInfo.getResourceType()
- .equals(memType)) {
- throw new YarnRuntimeException(
- "Attempt to re-define mandatory resource 'memory-mb'. It can only"
- + " be of type 'COUNTABLE' and have units 'Mi'.");
- }
+ LOG.warn(
+ "Attempt to define resource '" + key + "', but it is not allowed.");
+ throw new YarnRuntimeException(
+ "Attempt to re-define mandatory resource '" + key + "'.");
}
- if (resourceInformationMap.containsKey(VCORES)) {
- ResourceInformation vcoreInfo = resourceInformationMap.get(VCORES);
- String vcoreUnits = ResourceInformation.VCORES.getUnits();
- ResourceTypes vcoreType = ResourceInformation.VCORES.getResourceType();
- if (!vcoreInfo.getUnits().equals(vcoreUnits) || !vcoreInfo
- .getResourceType().equals(vcoreType)) {
- throw new YarnRuntimeException(
- "Attempt to re-define mandatory resource 'vcores'. It can only be"
- + " of type 'COUNTABLE' and have units ''(no units).");
+ for (Map.Entry mandatoryResourceEntry :
+ ResourceInformation.MANDATORY_RESOURCES.entrySet()) {
+ key = mandatoryResourceEntry.getKey();
+ ResourceInformation mandatoryRI = mandatoryResourceEntry.getValue();
+
+ ResourceInformation newDefinedRI = resourceInformationMap.get(key);
+ if (newDefinedRI != null) {
+ String expectedUnit = mandatoryRI.getUnits();
+ ResourceTypes expectedType = mandatoryRI.getResourceType();
+ String actualUnit = newDefinedRI.getUnits();
+ ResourceTypes actualType = newDefinedRI.getResourceType();
+
+ if (!expectedUnit.equals(actualUnit) || !expectedType.equals(
+ actualType)) {
+ throw new YarnRuntimeException("Defined mandatory resource type="
+ + key + " inside resource-types.xml, however its type or "
+ + "unit is conflict to mandatory resource types, expected type="
+ + expectedType + ", unit=" + expectedUnit + "; actual type="
+ + actualType + " actual unit=" + actualUnit);
+ }
}
}
}
@@ -200,9 +201,23 @@ static void validateNameOfResourceNameAndThrowException(String resourceName)
}
}
- @VisibleForTesting
- static void initializeResourcesMap(Configuration conf) {
+ /**
+ * Get maximum allocation from config, *THIS WILL NOT UPDATE INTERNAL DATA*
+ * @param conf config
+ * @return maximum allocation
+ */
+ public static Resource fetchMaximumAllocationFromConfig(Configuration conf) {
+ Map resourceInformationMap =
+ getResourceInformationMapFromConfig(conf);
+ Resource ret = Resource.newInstance(0, 0);
+ for (ResourceInformation entry : resourceInformationMap.values()) {
+ ret.setResourceValue(entry.getName(), entry.getMaximumAllocation());
+ }
+ return ret;
+ }
+ private static Map getResourceInformationMapFromConfig(
+ Configuration conf) {
Map resourceInformationMap = new HashMap<>();
String[] resourceNames = conf.getStrings(YarnConfiguration.RESOURCE_TYPES);
@@ -248,6 +263,13 @@ static void initializeResourcesMap(Configuration conf) {
setAllocationForMandatoryResources(resourceInformationMap, conf);
+ return resourceInformationMap;
+ }
+
+ @VisibleForTesting
+ static void initializeResourcesMap(Configuration conf) {
+ Map resourceInformationMap =
+ getResourceInformationMapFromConfig(conf);
initializeResourcesFromResourceInformationMap(resourceInformationMap);
}
@@ -545,19 +567,8 @@ public static Resource getResourceTypesMinimumAllocation() {
public static Resource getResourceTypesMaximumAllocation() {
Resource ret = Resource.newInstance(0, 0);
for (ResourceInformation entry : resourceTypesArray) {
- String name = entry.getName();
- if (name.equals(ResourceInformation.MEMORY_MB.getName())) {
- ret.setMemorySize(entry.getMaximumAllocation());
- } else if (name.equals(ResourceInformation.VCORES.getName())) {
- Long tmp = entry.getMaximumAllocation();
- if (tmp > Integer.MAX_VALUE) {
- tmp = (long) Integer.MAX_VALUE;
- }
- ret.setVirtualCores(tmp.intValue());
- continue;
- } else {
- ret.setResourceValue(name, entry.getMaximumAllocation());
- }
+ ret.setResourceValue(entry.getName(),
+ entry.getMaximumAllocation());
}
return ret;
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 6d69a10032d..91935adce85 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -3442,6 +3442,45 @@
+
+
+ When yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto specified,
+ YARN NodeManager needs to run GPU discovery binary (now only support
+ nvidia-smi) to get GPU-related information.
+ When value is empty (default), YARN NodeManager will try to locate
+ discovery executable itself.
+ An example of the config value is: /usr/local/bin/nvidia-smi
+
+ yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables
+
+
+
+
+
+ Enable additional discovery/isolation of resources on the NodeManager,
+ split by comma. By default, this is empty. Acceptable values: { "yarn-io/gpu" }.
+
+ yarn.nodemanager.resource-plugins
+
+
+
+
+
+ Specify GPU devices which can be managed by YARN NodeManager, split by comma
+ Number of GPU devices will be reported to RM to make scheduling decisions.
+ Set to auto (default) let YARN automatically discover GPU resource from
+ system.
+ Manually specify GPU devices if auto detect GPU device failed or admin
+ only want subset of GPU devices managed by YARN. GPU device is identified
+ by their minor device number. A common approach to get minor device number
+ of GPUs is using "nvidia-smi -q" and search "Minor Number" output. An
+ example of manual specification is "0,1,2,4" to allow YARN NodeManager
+ to manage GPU devices with minor number 0/1/2/4.
+
+ yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices
+ auto
+
+
Provides an option for client to load supported resource types from RM
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
index 0e5e8a80d41..0ad029c67d3 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java
@@ -53,6 +53,23 @@ public ResourceFileInformation(String name, int count) {
}
}
+ public static void addNewTypesToResources(String... resourceTypes) {
+ // Initialize resource map
+ Map riMap = new HashMap<>();
+
+ // Initialize mandatory resources
+ riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB);
+ riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES);
+
+ for (String newResource : resourceTypes) {
+ riMap.put(newResource, ResourceInformation
+ .newInstance(newResource, "", 0, ResourceTypes.COUNTABLE, 0,
+ Integer.MAX_VALUE));
+ }
+
+ ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
+ }
+
@Before
public void setup() {
ResourceUtils.resetResourceTypes();
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 3b532c9b430..f43b1ee0105 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -113,9 +113,10 @@ public Configuration getConf() {
* Run the executor initialization steps.
* Verify that the necessary configs and permissions are in place.
*
+ * @param nmContext Context of NM
* @throws IOException if initialization fails
*/
- public abstract void init() throws IOException;
+ public abstract void init(Context nmContext) throws IOException;
/**
* This function localizes the JAR file on-demand.
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index a2d00a4cc24..a1c474f958d 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -34,6 +34,7 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
@@ -122,4 +123,6 @@
ContainerExecutor getContainerExecutor();
ContainerStateTransitionListener getContainerStateTransitionListener();
+
+ ResourcePluginManager getResourcePluginManager();
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
index ac88e8cd336..5772403567e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
@@ -133,7 +133,7 @@ protected void setScriptExecutable(Path script, String owner)
}
@Override
- public void init() throws IOException {
+ public void init(Context nmContext) throws IOException {
// nothing to do or verify here
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 64f3d58327e..da1989eaf5c 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -20,6 +20,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -282,7 +283,7 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
}
@Override
- public void init() throws IOException {
+ public void init(Context nmContext) throws IOException {
Configuration conf = super.getConf();
// Send command to executor which will just start up,
@@ -306,7 +307,7 @@ public void init() throws IOException {
try {
resourceHandlerChain = ResourceHandlerModule
- .getConfiguredResourceHandlerChain(conf);
+ .getConfiguredResourceHandlerChain(conf, nmContext);
if (LOG.isDebugEnabled()) {
LOG.debug("Resource handler chain enabled = " + (resourceHandlerChain
!= null));
@@ -871,4 +872,9 @@ public void mountCgroups(List cgroupKVs, String hierarchy)
e);
}
}
+
+ @VisibleForTesting
+ public ResourceHandler getResourceHandler() {
+ return resourceHandlerChain;
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 44133dfd59d..54a235c8762 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -18,23 +18,7 @@
package org.apache.hadoop.yarn.server.nodemanager;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
-import org.apache.hadoop.yarn.state.MultiStateTransitionListener;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -65,12 +49,16 @@
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.AppCollectorData;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
import org.apache.hadoop.yarn.server.nodemanager.collectormanager.NMCollectorService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
@@ -78,14 +66,25 @@
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
-import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer;
+import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
+import org.apache.hadoop.yarn.state.MultiStateTransitionListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.atomic.AtomicBoolean;
public class NodeManager extends CompositeService
implements EventHandler {
@@ -332,6 +331,18 @@ public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration con
nmCheckintervalTime, scriptTimeout, scriptArgs);
}
+ @VisibleForTesting
+ protected ResourcePluginManager createResourcePluginManager() {
+ return new ResourcePluginManager();
+ }
+
+ @VisibleForTesting
+ protected ContainerExecutor createContainerExecutor(Configuration conf) {
+ return ReflectionUtils.newInstance(
+ conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
+ DefaultContainerExecutor.class, ContainerExecutor.class), conf);
+ }
+
@Override
protected void serviceInit(Configuration conf) throws Exception {
rmWorkPreservingRestartEnabled = conf.getBoolean(YarnConfiguration
@@ -357,11 +368,22 @@ protected void serviceInit(Configuration conf) throws Exception {
this.aclsManager = new ApplicationACLsManager(conf);
- ContainerExecutor exec = ReflectionUtils.newInstance(
- conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
- DefaultContainerExecutor.class, ContainerExecutor.class), conf);
+ this.dirsHandler = new LocalDirsHandlerService(metrics);
+
+ boolean isDistSchedulingEnabled =
+ conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED,
+ YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED);
+
+ this.context = createNMContext(containerTokenSecretManager,
+ nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf);
+
+ ResourcePluginManager pluginManager = createResourcePluginManager();
+ pluginManager.initialize(context);
+ ((NMContext)context).setResourcePluginManager(pluginManager);
+
+ ContainerExecutor exec = createContainerExecutor(conf);
try {
- exec.init();
+ exec.init(context);
} catch (IOException e) {
throw new YarnRuntimeException("Failed to initialize container executor", e);
}
@@ -371,19 +393,11 @@ protected void serviceInit(Configuration conf) throws Exception {
// NodeManager level dispatcher
this.dispatcher = createNMDispatcher();
- dirsHandler = new LocalDirsHandlerService(metrics);
nodeHealthChecker =
new NodeHealthCheckerService(
getNodeHealthScriptRunner(conf), dirsHandler);
addService(nodeHealthChecker);
- boolean isDistSchedulingEnabled =
- conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED,
- YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED);
-
- this.context = createNMContext(containerTokenSecretManager,
- nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf);
-
((NMContext)context).setContainerExecutor(exec);
@@ -457,6 +471,12 @@ protected void serviceStop() throws Exception {
try {
super.serviceStop();
DefaultMetricsSystem.shutdown();
+
+ // Cleanup ResourcePluginManager
+ ResourcePluginManager rpm = context.getResourcePluginManager();
+ if (rpm != null) {
+ rpm.cleanup();
+ }
} finally {
// YARN-3641: NM's services stop get failed shouldn't block the
// release of NMLevelDBStore.
@@ -604,6 +624,8 @@ protected void reregisterCollectors() {
private ContainerStateTransitionListener containerStateTransitionListener;
+ private ResourcePluginManager resourcePluginManager;
+
public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
NMTokenSecretManagerInNM nmTokenSecretManager,
LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager,
@@ -804,6 +826,15 @@ public void setContainerStateTransitionListener(
ContainerStateTransitionListener transitionListener) {
this.containerStateTransitionListener = transitionListener;
}
+
+ public ResourcePluginManager getResourcePluginManager() {
+ return resourcePluginManager;
+ }
+
+ public void setResourcePluginManager(
+ ResourcePluginManager resourcePluginManager) {
+ this.resourcePluginManager = resourcePluginManager;
+ }
}
/**
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index ee85042979e..91217ddb463 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -33,6 +33,9 @@
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -178,14 +181,15 @@ protected void serviceInit(Configuration conf) throws Exception {
long memoryMb = totalResource.getMemorySize();
float vMemToPMem =
conf.getFloat(
- YarnConfiguration.NM_VMEM_PMEM_RATIO,
- YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
+ YarnConfiguration.NM_VMEM_PMEM_RATIO,
+ YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
long virtualMemoryMb = (long)Math.ceil(memoryMb * vMemToPMem);
-
int virtualCores = totalResource.getVirtualCores();
- LOG.info("Nodemanager resources: memory set to " + memoryMb + "MB.");
- LOG.info("Nodemanager resources: vcores set to " + virtualCores + ".");
- LOG.info("Nodemanager resources: " + totalResource);
+
+ // Update configured resources via plugins.
+ updateConfiguredResourcesViaPlugins(totalResource);
+
+ LOG.info("Nodemanager resources is set to: " + totalResource);
metrics.addResource(totalResource);
@@ -342,12 +346,27 @@ protected ResourceTracker getRMClient() throws IOException {
return ServerRMProxy.createRMProxy(conf, ResourceTracker.class);
}
+ private void updateConfiguredResourcesViaPlugins(
+ Resource configuredResource) throws YarnException {
+ ResourcePluginManager pluginManager = context.getResourcePluginManager();
+ if (pluginManager != null && pluginManager.getNameToPlugins() != null) {
+ // Update configured resource
+ for (ResourcePlugin resourcePlugin : pluginManager.getNameToPlugins()
+ .values()) {
+ if (resourcePlugin.getNodeResourceHandlerInstance() != null) {
+ resourcePlugin.getNodeResourceHandlerInstance()
+ .updateConfiguredResource(configuredResource);
+ }
+ }
+ }
+ }
+
@VisibleForTesting
protected void registerWithRM()
throws YarnException, IOException {
RegisterNodeManagerResponse regNMResponse;
Set nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration();
-
+
// Synchronize NM-RM registration with
// ContainerManagerImpl#increaseContainersResource and
// ContainerManagerImpl#startContainers to avoid race condition
@@ -358,6 +377,7 @@ protected void registerWithRM()
RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
nodeManagerVersionId, containerReports, getRunningApplications(),
nodeLabels, physicalResource);
+
if (containerReports != null) {
LOG.info("Registering with RM using containers :" + containerReports);
}
@@ -406,7 +426,7 @@ protected void registerWithRM()
if (masterKey != null) {
this.context.getContainerTokenSecretManager().setMasterKey(masterKey);
}
-
+
masterKey = regNMResponse.getNMTokenMasterKey();
if (masterKey != null) {
this.context.getNMTokenSecretManager().setMasterKey(masterKey);
@@ -738,7 +758,7 @@ public void removeVeryOldStoppedContainersFromCache() {
}
}
}
-
+
@Override
public long getRMIdentifier() {
return this.rmIdentifier;
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
index ae83b8832a6..ae3552fbebb 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
@@ -98,4 +98,11 @@
boolean isRecovering();
void sendPauseEvent(String description);
+
+ /**
+ * Get assigned resource mappings to the container.
+ *
+ * @return Resource Mappings of the container
+ */
+ ResourceMappings getResourceMappings();
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 91c51c5ea11..a91973f99a1 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -188,6 +188,7 @@ private ReInitializationContext createContextForRollback() {
private boolean recoveredAsKilled = false;
private Context context;
private ResourceSet resourceSet;
+ private ResourceMappings resourceMappings;
public ContainerImpl(Configuration conf, Dispatcher dispatcher,
ContainerLaunchContext launchContext, Credentials creds,
@@ -246,6 +247,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
context.getContainerStateTransitionListener());
this.context = context;
this.resourceSet = new ResourceSet();
+ this.resourceMappings = new ResourceMappings();
}
private static ContainerRetryContext configureRetryContext(
@@ -286,6 +288,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
this.remainingRetryAttempts = rcs.getRemainingRetryAttempts();
this.workDir = rcs.getWorkDir();
this.logDir = rcs.getLogDir();
+ this.resourceMappings = rcs.getResourceMappings();
}
private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION =
@@ -2174,4 +2177,14 @@ public boolean isRecovering() {
getContainerState() == ContainerState.NEW);
return isRecovering;
}
+
+ /**
+ * Get assigned resource mappings to the container.
+ *
+ * @return Resource Mappings of the container
+ */
+ @Override
+ public ResourceMappings getResourceMappings() {
+ return resourceMappings;
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java
new file mode 100644
index 00000000000..d673341b01c
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.container;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+
+/**
+ * This class is used to store assigned resource to a single container by
+ * resource types.
+ *
+ * Assigned resource could be list of String
+ *
+ * For example, we can assign container to:
+ * "numa": ["numa0"]
+ * "gpu": ["0", "1", "2", "3"]
+ * "fpga": ["1", "3"]
+ *
+ * This will be used for NM restart container recovery.
+ */
+public class ResourceMappings {
+
+ private Map assignedResourcesMap = new HashMap<>();
+
+ /**
+ * Get all resource mappings.
+ * @param resourceType resourceType
+ * @return map of resource mapping
+ */
+ public List getAssignedResources(String resourceType) {
+ AssignedResources ar = assignedResourcesMap.get(resourceType);
+ if (null == ar) {
+ return Collections.emptyList();
+ }
+ return ar.getAssignedResources();
+ }
+
+ /**
+ * Adds the resources for a given resource type.
+ *
+ * @param resourceType Resource Type
+ * @param assigned Assigned resources to add
+ */
+ public void addAssignedResources(String resourceType,
+ AssignedResources assigned) {
+ assignedResourcesMap.put(resourceType, assigned);
+ }
+
+ /**
+ * Stores resources assigned to a container for a given resource type.
+ */
+ public static class AssignedResources implements Serializable {
+ private static final long serialVersionUID = -1059491941955757926L;
+ private List resources = Collections.emptyList();
+
+ public List getAssignedResources() {
+ return Collections.unmodifiableList(resources);
+ }
+
+ public void updateAssignedResources(List list) {
+ this.resources = new ArrayList<>(list);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static AssignedResources fromBytes(byte[] bytes)
+ throws IOException {
+ ObjectInputStream ois = null;
+ List resources;
+ try {
+ ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
+ ois = new ObjectInputStream(bis);
+ resources = (List) ois.readObject();
+ } catch (ClassNotFoundException e) {
+ throw new IOException(e);
+ } finally {
+ IOUtils.closeQuietly(ois);
+ }
+ AssignedResources ar = new AssignedResources();
+ ar.updateAssignedResources(resources);
+ return ar;
+ }
+
+ public byte[] toBytes() throws IOException {
+ ObjectOutputStream oos = null;
+ byte[] bytes;
+ try {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ oos = new ObjectOutputStream(bos);
+ oos.writeObject(resources);
+ bytes = bos.toByteArray();
+ } finally {
+ IOUtils.closeQuietly(oos);
+ }
+ return bytes;
+ }
+ }
+}
\ No newline at end of file
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
index 8402a16339d..db0b2251578 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
@@ -51,6 +51,7 @@
TC_READ_STATS("--tc-read-stats"),
ADD_PID_TO_CGROUP(""), //no CLI switch supported yet.
RUN_DOCKER_CMD("--run-docker"),
+ GPU("--module-gpu"),
LIST_AS_USER(""); //no CLI switch supported yet.
private final String option;
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
index 955d2169fec..72bf30ce871 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java
@@ -20,6 +20,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -135,7 +136,8 @@ public ResourceHandlerChain(List resourceHandlers) {
return allOperations;
}
- List getResourceHandlerList() {
+ @VisibleForTesting
+ public List getResourceHandlerList() {
return Collections.unmodifiableList(resourceHandlers);
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
index 3c61cd4b5be..ce850ab3b7c 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
@@ -21,25 +21,28 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler;
import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Arrays;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
/**
* Provides mechanisms to get various resource handlers - cpu, memory, network,
@@ -206,22 +209,41 @@ private static void addHandlerIfNotNull(List handlerList,
}
private static void initializeConfiguredResourceHandlerChain(
- Configuration conf) throws ResourceHandlerException {
+ Configuration conf, Context nmContext)
+ throws ResourceHandlerException {
ArrayList handlerList = new ArrayList<>();
addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
+ addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
resourceHandlerChain = new ResourceHandlerChain(handlerList);
}
+ private static void addHandlersFromConfiguredResourcePlugins(
+ List handlerList, Configuration conf,
+ Context nmContext) throws ResourceHandlerException {
+ ResourcePluginManager pluginManager = nmContext.getResourcePluginManager();
+ if (pluginManager != null) {
+ Map pluginMap = pluginManager.getNameToPlugins();
+ if (pluginMap != null) {
+ for (ResourcePlugin plugin : pluginMap.values()) {
+ addHandlerIfNotNull(handlerList, plugin
+ .createResourceHandler(nmContext,
+ getInitializedCGroupsHandler(conf),
+ PrivilegedOperationExecutor.getInstance(conf)));
+ }
+ }
+ }
+ }
+
public static ResourceHandlerChain getConfiguredResourceHandlerChain(
- Configuration conf) throws ResourceHandlerException {
+ Configuration conf, Context nmContext) throws ResourceHandlerException {
if (resourceHandlerChain == null) {
synchronized (ResourceHandlerModule.class) {
if (resourceHandlerChain == null) {
- initializeConfiguredResourceHandlerChain(conf);
+ initializeConfiguredResourceHandlerChain(conf, nmContext);
}
}
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
new file mode 100644
index 00000000000..5bdffc369b2
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+/**
+ * Allocate GPU resources according to requirements
+ */
+public class GpuResourceAllocator {
+ final static Log LOG = LogFactory.getLog(GpuResourceAllocator.class);
+
+ private Set allowedGpuDevices = new TreeSet<>();
+ private Map usedDevices = new TreeMap<>();
+ private Context nmContext;
+
+ public GpuResourceAllocator(Context ctx) {
+ this.nmContext = ctx;
+ }
+
+ /**
+ * Contains allowed and denied devices
+ * Denied devices will be useful for cgroups devices module to do blacklisting
+ */
+ static class GpuAllocation {
+ private Set allowed = Collections.emptySet();
+ private Set denied = Collections.emptySet();
+
+ GpuAllocation(Set allowed, Set denied) {
+ if (allowed != null) {
+ this.allowed = ImmutableSet.copyOf(allowed);
+ }
+ if (denied != null) {
+ this.denied = ImmutableSet.copyOf(denied);
+ }
+ }
+
+ public Set getAllowedGPUs() {
+ return allowed;
+ }
+
+ public Set getDeniedGPUs() {
+ return denied;
+ }
+ }
+
+ /**
+ * Add GPU to allowed list
+ * @param gpuDevice gpu device
+ */
+ public synchronized void addGpu(GpuDevice gpuDevice) {
+ allowedGpuDevices.add(gpuDevice);
+ }
+
+ private String getResourceHandlerExceptionMessage(int numRequestedGpuDevices,
+ ContainerId containerId) {
+ return "Failed to find enough GPUs, requestor=" + containerId
+ + ", #RequestedGPUs=" + numRequestedGpuDevices + ", #availableGpus="
+ + getAvailableGpus();
+ }
+
+ @VisibleForTesting
+ public synchronized int getAvailableGpus() {
+ return allowedGpuDevices.size() - usedDevices.size();
+ }
+
+ public synchronized void recoverAssignedGpus(ContainerId containerId)
+ throws ResourceHandlerException {
+ Container c = nmContext.getContainers().get(containerId);
+ if (null == c) {
+ throw new ResourceHandlerException(
+ "This shouldn't happen, cannot find container with id="
+ + containerId);
+ }
+
+ for (Serializable gpuDeviceSerializable : c.getResourceMappings()
+ .getAssignedResources(GPU_URI)) {
+ if (!(gpuDeviceSerializable instanceof GpuDevice)) {
+ throw new ResourceHandlerException(
+ "Trying to recover device id, however it"
+ + " is not GpuDevice, this shouldn't happen");
+ }
+
+ GpuDevice gpuDevice = (GpuDevice) gpuDeviceSerializable;
+
+ // Make sure it is in allowed GPU device.
+ if (!allowedGpuDevices.contains(gpuDevice)) {
+ throw new ResourceHandlerException(
+ "Try to recover device = " + gpuDevice
+ + " however it is not in allowed device list:" + StringUtils
+ .join(",", allowedGpuDevices));
+ }
+
+ // Make sure it is not occupied by anybody else
+ if (usedDevices.containsKey(gpuDevice)) {
+ throw new ResourceHandlerException(
+ "Try to recover device id = " + gpuDevice
+ + " however it is already assigned to container=" + usedDevices
+ .get(gpuDevice) + ", please double check what happened.");
+ }
+
+ usedDevices.put(gpuDevice, containerId);
+ }
+ }
+
+ /**
+ * Get number of requested GPUs from resource.
+ * @param requestedResource requested resource
+ * @return #gpus.
+ */
+ public static int getRequestedGpus(Resource requestedResource) {
+ try {
+ return Long.valueOf(requestedResource.getResourceValue(
+ GPU_URI)).intValue();
+ } catch (ResourceNotFoundException e) {
+ return 0;
+ }
+ }
+
+ /**
+ * Assign GPU to requestor
+ * @param container container to allocate
+ * @return allocation results.
+ * @throws ResourceHandlerException When failed to assign GPUs.
+ */
+ public synchronized GpuAllocation assignGpus(Container container)
+ throws ResourceHandlerException {
+ Resource requestedResource = container.getResource();
+ ContainerId containerId = container.getContainerId();
+ int numRequestedGpuDevices = getRequestedGpus(requestedResource);
+ // Assign Gpus to container if requested some.
+ if (numRequestedGpuDevices > 0) {
+ if (numRequestedGpuDevices > getAvailableGpus()) {
+ throw new ResourceHandlerException(
+ getResourceHandlerExceptionMessage(numRequestedGpuDevices,
+ containerId));
+ }
+
+ Set assignedGpus = new TreeSet<>();
+
+ for (GpuDevice gpu : allowedGpuDevices) {
+ if (!usedDevices.containsKey(gpu)) {
+ usedDevices.put(gpu, containerId);
+ assignedGpus.add(gpu);
+ if (assignedGpus.size() == numRequestedGpuDevices) {
+ break;
+ }
+ }
+ }
+
+ // Record in state store if we allocated anything
+ if (!assignedGpus.isEmpty()) {
+ try {
+ // Update state store.
+ nmContext.getNMStateStore().storeAssignedResources(container, GPU_URI,
+ new ArrayList<>(assignedGpus));
+ } catch (IOException e) {
+ cleanupAssignGpus(containerId);
+ throw new ResourceHandlerException(e);
+ }
+ }
+
+ return new GpuAllocation(assignedGpus,
+ Sets.difference(allowedGpuDevices, assignedGpus));
+ }
+ return new GpuAllocation(null, allowedGpuDevices);
+ }
+
+ /**
+ * Clean up all Gpus assigned to containerId
+ * @param containerId containerId
+ */
+ public synchronized void cleanupAssignGpus(ContainerId containerId) {
+ Iterator> iter =
+ usedDevices.entrySet().iterator();
+ while (iter.hasNext()) {
+ if (iter.next().getValue().equals(containerId)) {
+ iter.remove();
+ }
+ }
+ }
+
+ @VisibleForTesting
+ public synchronized Map getDeviceAllocationMappingCopy() {
+ return new HashMap<>(usedDevices);
+ }
+
+ public synchronized List getAllowedGpusCopy() {
+ return new ArrayList<>(allowedGpuDevices);
+ }
+
+ public synchronized List getAssignedGpusCopy() {
+ List assigns = new ArrayList<>();
+ for (Map.Entry entry : usedDevices.entrySet()) {
+ assigns.add(new AssignedGpuDevice(entry.getKey().getIndex(),
+ entry.getKey().getMinorNumber(), entry.getValue()));
+ }
+ return assigns;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
new file mode 100644
index 00000000000..500382162fb
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class GpuResourceHandlerImpl implements ResourceHandler {
+ final static Log LOG = LogFactory
+ .getLog(GpuResourceHandlerImpl.class);
+
+ // This will be used by container-executor to add necessary clis
+ public static final String EXCLUDED_GPUS_CLI_OPTION = "--excluded_gpus";
+ public static final String CONTAINER_ID_CLI_OPTION = "--container_id";
+
+ private GpuResourceAllocator gpuAllocator;
+ private CGroupsHandler cGroupsHandler;
+ private PrivilegedOperationExecutor privilegedOperationExecutor;
+
+ public GpuResourceHandlerImpl(Context nmContext,
+ CGroupsHandler cGroupsHandler,
+ PrivilegedOperationExecutor privilegedOperationExecutor) {
+ this.cGroupsHandler = cGroupsHandler;
+ this.privilegedOperationExecutor = privilegedOperationExecutor;
+ gpuAllocator = new GpuResourceAllocator(nmContext);
+ }
+
+ @Override
+ public List bootstrap(Configuration configuration)
+ throws ResourceHandlerException {
+ List usableGpus;
+ try {
+ usableGpus = GpuDiscoverer.getInstance()
+ .getGpusUsableByYarn();
+ if (usableGpus == null || usableGpus.isEmpty()) {
+ String message = "GPU is enabled on the NodeManager, but couldn't find "
+ + "any usable GPU devices, please double check configuration.";
+ LOG.error(message);
+ throw new ResourceHandlerException(message);
+ }
+ } catch (YarnException e) {
+ LOG.error("Exception when trying to get usable GPU device", e);
+ throw new ResourceHandlerException(e);
+ }
+
+ for (GpuDevice gpu : usableGpus) {
+ gpuAllocator.addGpu(gpu);
+ }
+
+ // And initialize cgroups
+ this.cGroupsHandler.initializeCGroupController(
+ CGroupsHandler.CGroupController.DEVICES);
+
+ return null;
+ }
+
+ @Override
+ public synchronized List preStart(Container container)
+ throws ResourceHandlerException {
+ String containerIdStr = container.getContainerId().toString();
+
+ // Assign Gpus to container if requested some.
+ GpuResourceAllocator.GpuAllocation allocation = gpuAllocator.assignGpus(
+ container);
+
+ // Create device cgroups for the container
+ cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES,
+ containerIdStr);
+ try {
+ // Execute c-e to setup GPU isolation before launch the container
+ PrivilegedOperation privilegedOperation = new PrivilegedOperation(
+ PrivilegedOperation.OperationType.GPU, Arrays
+ .asList(CONTAINER_ID_CLI_OPTION, containerIdStr));
+ if (!allocation.getDeniedGPUs().isEmpty()) {
+ List minorNumbers = new ArrayList<>();
+ for (GpuDevice deniedGpu : allocation.getDeniedGPUs()) {
+ minorNumbers.add(deniedGpu.getMinorNumber());
+ }
+ privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION,
+ StringUtils.join(",", minorNumbers)));
+ }
+ privilegedOperationExecutor.executePrivilegedOperation(
+ privilegedOperation, true);
+ } catch (PrivilegedOperationException e) {
+ cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES,
+ containerIdStr);
+ LOG.warn("Could not update cgroup for container", e);
+ throw new ResourceHandlerException(e);
+ }
+
+ List ret = new ArrayList<>();
+ ret.add(new PrivilegedOperation(
+ PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
+ PrivilegedOperation.CGROUP_ARG_PREFIX
+ + cGroupsHandler.getPathForCGroupTasks(
+ CGroupsHandler.CGroupController.DEVICES, containerIdStr)));
+
+ return ret;
+ }
+
+ public GpuResourceAllocator getGpuAllocator() {
+ return gpuAllocator;
+ }
+
+ @Override
+ public List reacquireContainer(ContainerId containerId)
+ throws ResourceHandlerException {
+ gpuAllocator.recoverAssignedGpus(containerId);
+ return null;
+ }
+
+ @Override
+ public synchronized List postComplete(
+ ContainerId containerId) throws ResourceHandlerException {
+ gpuAllocator.cleanupAssignGpus(containerId);
+ cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES,
+ containerId.toString());
+ return null;
+ }
+
+ @Override
+ public List teardown() throws ResourceHandlerException {
+ return null;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
new file mode 100644
index 00000000000..88f77ed12ed
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * Plugins to handle resources on a node. This will be used by
+ * {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater}
+ */
+public abstract class NodeResourceUpdaterPlugin {
+ /**
+ * Update configured resource for the given component.
+ * @param res resource passed in by external mododule (such as
+ * {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater}
+ * @throws YarnException when any issue happens.
+ */
+ public abstract void updateConfiguredResource(Resource res)
+ throws YarnException;
+
+ /**
+ * This method will be called when the node's resource is loaded from
+ * dynamic-resources.xml in ResourceManager.
+ *
+ * @param newResource newResource reported by RM
+ * @throws YarnException when any mismatch between NM/RM
+ */
+ public void handleUpdatedResourceFromRM(Resource newResource) throws
+ YarnException {
+ // by default do nothing, subclass should implement this method when any
+ // special activities required upon new resource reported by RM.
+ }
+
+ // TODO: add implementation to update node attribute once YARN-3409 merged.
+}
\ No newline at end of file
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
new file mode 100644
index 00000000000..78167c4ef33
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo;
+
+/**
+ * {@link ResourcePlugin} is an interface for node manager to easier support
+ * discovery/manage/isolation for new resource types.
+ *
+ *
+ * It has two major part: {@link ResourcePlugin#createResourceHandler(Context,
+ * CGroupsHandler, PrivilegedOperationExecutor)} and
+ * {@link ResourcePlugin#getNodeResourceHandlerInstance()}, see javadocs below
+ * for more details.
+ *
+ */
+public interface ResourcePlugin {
+ /**
+ * Initialize the plugin, this will be invoked during NM startup.
+ * @param context NM Context
+ * @throws YarnException when any issue occurs
+ */
+ void initialize(Context context) throws YarnException;
+
+ /**
+ * Plugin needs to return {@link ResourceHandler} when any special isolation
+ * required for the resource type. This will be added to
+ * {@link ResourceHandlerChain} during NodeManager startup. When no special
+ * isolation need, return null.
+ *
+ * @param nmContext NodeManager context.
+ * @param cGroupsHandler CGroupsHandler
+ * @param privilegedOperationExecutor Privileged Operation Executor.
+ * @return ResourceHandler
+ */
+ ResourceHandler createResourceHandler(Context nmContext,
+ CGroupsHandler cGroupsHandler,
+ PrivilegedOperationExecutor privilegedOperationExecutor);
+
+ /**
+ * Plugin needs to return {@link NodeResourceUpdaterPlugin} when any discovery
+ * mechanism required for the resource type. For example, if we want to set
+ * resource-value during NM registration or send update during NM-RM heartbeat
+ * We can implement a {@link NodeResourceUpdaterPlugin} and update fields of
+ * {@link org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest}
+ * or {@link org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest}
+ *
+ * This will be invoked during every node status update or node registration,
+ * please avoid creating new instance every time.
+ *
+ * @return NodeResourceUpdaterPlugin, could be null when no discovery needed.
+ */
+ NodeResourceUpdaterPlugin getNodeResourceHandlerInstance();
+
+ /**
+ * Do cleanup of the plugin, this will be invoked when
+ * {@link org.apache.hadoop.yarn.server.nodemanager.NodeManager} stops
+ * @throws YarnException if any issue occurs
+ */
+ void cleanup() throws YarnException;
+
+ /**
+ * Get resource information from this plugin.
+ *
+ * @return NMResourceInfo, an example is
+ * {@link org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation}
+ *
+ * @throws YarnException when any issue occurs
+ */
+ NMResourceInfo getNMResourceInfo() throws YarnException;
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
new file mode 100644
index 00000000000..73d6038afb1
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+/**
+ * Manages {@link ResourcePlugin} configured on this NodeManager.
+ */
+public class ResourcePluginManager {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ResourcePluginManager.class);
+ private static final Set SUPPORTED_RESOURCE_PLUGINS = ImmutableSet.of(
+ GPU_URI);
+
+ private Map configuredPlugins = Collections.EMPTY_MAP;
+
+ public synchronized void initialize(Context context)
+ throws YarnException {
+ Configuration conf = context.getConf();
+ String[] plugins = conf.getStrings(YarnConfiguration.NM_RESOURCE_PLUGINS);
+
+ if (plugins != null) {
+ Map pluginMap = new HashMap<>();
+
+ // Initialize each plugins
+ for (String resourceName : plugins) {
+ resourceName = resourceName.trim();
+ if (!SUPPORTED_RESOURCE_PLUGINS.contains(resourceName)) {
+ String msg =
+ "Trying to initialize resource plugin with name=" + resourceName
+ + ", it is not supported, list of supported plugins:"
+ + StringUtils.join(",",
+ SUPPORTED_RESOURCE_PLUGINS);
+ LOG.error(msg);
+ throw new YarnException(msg);
+ }
+
+ if (pluginMap.containsKey(resourceName)) {
+ // Duplicated items, ignore ...
+ continue;
+ }
+
+ ResourcePlugin plugin = null;
+ if (resourceName.equals(GPU_URI)) {
+ plugin = new GpuResourcePlugin();
+ }
+
+ if (plugin == null) {
+ throw new YarnException(
+ "This shouldn't happen, plugin=" + resourceName
+ + " should be loaded and initialized");
+ }
+ plugin.initialize(context);
+ pluginMap.put(resourceName, plugin);
+ }
+
+ configuredPlugins = Collections.unmodifiableMap(pluginMap);
+ }
+ }
+
+ public synchronized void cleanup() throws YarnException {
+ for (ResourcePlugin plugin : configuredPlugins.values()) {
+ plugin.cleanup();
+ }
+ }
+
+ /**
+ * Get resource name (such as gpu/fpga) to plugin references.
+ * @return read-only map of resource name to plugins.
+ */
+ public synchronized Map getNameToPlugins() {
+ return configuredPlugins;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/AssignedGpuDevice.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/AssignedGpuDevice.java
new file mode 100644
index 00000000000..26fd9050742
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/AssignedGpuDevice.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+
+/**
+ * In addition to {@link GpuDevice}, this include container id and more runtime
+ * information related to who is using the GPU device if possible
+ */
+public class AssignedGpuDevice extends GpuDevice {
+ private static final long serialVersionUID = -12983712986315L;
+
+ String containerId;
+
+ public AssignedGpuDevice(int index, int minorNumber,
+ ContainerId containerId) {
+ super(index, minorNumber);
+ this.containerId = containerId.toString();
+ }
+
+ public String getContainerId() {
+ return containerId;
+ }
+
+ public void setContainerId(String containerId) {
+ this.containerId = containerId;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null || !(obj instanceof AssignedGpuDevice)) {
+ return false;
+ }
+ AssignedGpuDevice other = (AssignedGpuDevice) obj;
+ return index == other.index && minorNumber == other.minorNumber
+ && containerId.equals(other.containerId);
+ }
+
+ @Override
+ public int compareTo(Object obj) {
+ if (obj == null || (!(obj instanceof AssignedGpuDevice))) {
+ return -1;
+ }
+
+ AssignedGpuDevice other = (AssignedGpuDevice) obj;
+
+ int result = Integer.compare(index, other.index);
+ if (0 != result) {
+ return result;
+ }
+ result = Integer.compare(minorNumber, other.minorNumber);
+ if (0 != result) {
+ return result;
+ }
+ return containerId.compareTo(other.containerId);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 47;
+ return prime * (prime * index + minorNumber) + containerId.hashCode();
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDevice.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDevice.java
new file mode 100644
index 00000000000..bce1d9fa480
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDevice.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import java.io.Serializable;
+
+/**
+ * This class is used to represent GPU device while allocation.
+ */
+public class GpuDevice implements Serializable, Comparable {
+ protected int index;
+ protected int minorNumber;
+ private static final long serialVersionUID = -6812314470754667710L;
+
+ public GpuDevice(int index, int minorNumber) {
+ this.index = index;
+ this.minorNumber = minorNumber;
+ }
+
+ public int getIndex() {
+ return index;
+ }
+
+ public int getMinorNumber() {
+ return minorNumber;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null || !(obj instanceof GpuDevice)) {
+ return false;
+ }
+ GpuDevice other = (GpuDevice) obj;
+ return index == other.index && minorNumber == other.minorNumber;
+ }
+
+ @Override
+ public int compareTo(Object obj) {
+ if (obj == null || (!(obj instanceof GpuDevice))) {
+ return -1;
+ }
+
+ GpuDevice other = (GpuDevice) obj;
+
+ int result = Integer.compare(index, other.index);
+ if (0 != result) {
+ return result;
+ }
+ return Integer.compare(minorNumber, other.minorNumber);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 47;
+ return prime * index + minorNumber;
+ }
+
+ @Override
+ public String toString() {
+ return "(index=" + index + ",minor_number=" + minorNumber + ")";
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
new file mode 100644
index 00000000000..6e3cf1315ce
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformationParser;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class GpuDiscoverer {
+ public static final Logger LOG = LoggerFactory.getLogger(
+ GpuDiscoverer.class);
+ @VisibleForTesting
+ protected static final String DEFAULT_BINARY_NAME = "nvidia-smi";
+
+ // When executable path not set, try to search default dirs
+ // By default search /usr/bin, /bin, and /usr/local/nvidia/bin (when
+ // launched by nvidia-docker.
+ private static final Set DEFAULT_BINARY_SEARCH_DIRS = ImmutableSet.of(
+ "/usr/bin", "/bin", "/usr/local/nvidia/bin");
+
+ // command should not run more than 10 sec.
+ private static final int MAX_EXEC_TIMEOUT_MS = 10 * 1000;
+ private static final int MAX_REPEATED_ERROR_ALLOWED = 10;
+ private static GpuDiscoverer instance;
+
+ static {
+ instance = new GpuDiscoverer();
+ }
+
+ private Configuration conf = null;
+ private String pathOfGpuBinary = null;
+ private Map environment = new HashMap<>();
+ private GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
+
+ private int numOfErrorExecutionSinceLastSucceed = 0;
+ GpuDeviceInformation lastDiscoveredGpuInformation = null;
+
+ private void validateConfOrThrowException() throws YarnException {
+ if (conf == null) {
+ throw new YarnException("Please initialize (call initialize) before use "
+ + GpuDiscoverer.class.getSimpleName());
+ }
+ }
+
+ /**
+ * Get GPU device information from system.
+ * This need to be called after initialize.
+ *
+ * Please note that this only works on *NIX platform, so external caller
+ * need to make sure this.
+ *
+ * @return GpuDeviceInformation
+ * @throws YarnException when any error happens
+ */
+ public synchronized GpuDeviceInformation getGpuDeviceInformation()
+ throws YarnException {
+ validateConfOrThrowException();
+
+ if (null == pathOfGpuBinary) {
+ throw new YarnException(
+ "Failed to find GPU discovery executable, please double check "
+ + YarnConfiguration.NM_GPU_PATH_TO_EXEC + " setting.");
+ }
+
+ if (numOfErrorExecutionSinceLastSucceed == MAX_REPEATED_ERROR_ALLOWED) {
+ String msg =
+ "Failed to execute GPU device information detection script for "
+ + MAX_REPEATED_ERROR_ALLOWED
+ + " times, skip following executions.";
+ LOG.error(msg);
+ throw new YarnException(msg);
+ }
+
+ String output;
+ try {
+ output = Shell.execCommand(environment,
+ new String[] { pathOfGpuBinary, "-x", "-q" }, MAX_EXEC_TIMEOUT_MS);
+ GpuDeviceInformation info = parser.parseXml(output);
+ numOfErrorExecutionSinceLastSucceed = 0;
+ lastDiscoveredGpuInformation = info;
+ return info;
+ } catch (IOException e) {
+ numOfErrorExecutionSinceLastSucceed++;
+ String msg =
+ "Failed to execute " + pathOfGpuBinary + " exception message:" + e
+ .getMessage() + ", continue ...";
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(msg);
+ }
+ throw new YarnException(e);
+ } catch (YarnException e) {
+ numOfErrorExecutionSinceLastSucceed++;
+ String msg = "Failed to parse xml output" + e.getMessage();
+ if (LOG.isDebugEnabled()) {
+ LOG.warn(msg, e);
+ }
+ throw e;
+ }
+ }
+
+ /**
+ * Get list of GPU devices usable by YARN.
+ *
+ * @return List of GPU devices
+ * @throws YarnException when any issue happens
+ */
+ public synchronized List getGpusUsableByYarn()
+ throws YarnException {
+ validateConfOrThrowException();
+
+ String allowedDevicesStr = conf.get(
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+
+ List gpuDevices = new ArrayList<>();
+
+ if (allowedDevicesStr.equals(
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
+ // Get gpu device information from system.
+ if (null == lastDiscoveredGpuInformation) {
+ String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
+ + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
+ + ", however automatically discovering "
+ + "GPU information failed, please check NodeManager log for more"
+ + " details, as an alternative, admin can specify "
+ + YarnConfiguration.NM_GPU_ALLOWED_DEVICES
+ + " manually to enable GPU isolation.";
+ LOG.error(msg);
+ throw new YarnException(msg);
+ }
+
+ if (lastDiscoveredGpuInformation.getGpus() != null) {
+ for (int i = 0; i < lastDiscoveredGpuInformation.getGpus().size();
+ i++) {
+ List gpuInfos =
+ lastDiscoveredGpuInformation.getGpus();
+ gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
+ }
+ }
+ } else{
+ for (String s : allowedDevicesStr.split(",")) {
+ if (s.trim().length() > 0) {
+ String[] kv = s.trim().split(":");
+ if (kv.length != 2) {
+ throw new YarnException(
+ "Illegal format, it should be index:minor_number format, now it="
+ + s);
+ }
+
+ gpuDevices.add(
+ new GpuDevice(Integer.parseInt(kv[0]), Integer.parseInt(kv[1])));
+ }
+ }
+ LOG.info("Allowed GPU devices:" + gpuDevices);
+ }
+
+ return gpuDevices;
+ }
+
+ public synchronized void initialize(Configuration conf) throws YarnException {
+ this.conf = conf;
+ numOfErrorExecutionSinceLastSucceed = 0;
+ String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
+ YarnConfiguration.DEFAULT_NM_GPU_PATH_TO_EXEC);
+ if (pathToExecutable.isEmpty()) {
+ pathToExecutable = DEFAULT_BINARY_NAME;
+ }
+
+ // Validate file existence
+ File binaryPath = new File(pathToExecutable);
+
+ if (!binaryPath.exists()) {
+ // When binary not exist, use default setting.
+ boolean found = false;
+ for (String dir : DEFAULT_BINARY_SEARCH_DIRS) {
+ binaryPath = new File(dir, DEFAULT_BINARY_NAME);
+ if (binaryPath.exists()) {
+ found = true;
+ pathOfGpuBinary = binaryPath.getAbsolutePath();
+ break;
+ }
+ }
+
+ if (!found) {
+ LOG.warn("Failed to locate binary at:" + binaryPath.getAbsolutePath()
+ + ", please double check [" + YarnConfiguration.NM_GPU_PATH_TO_EXEC
+ + "] setting. Now use " + "default binary:" + DEFAULT_BINARY_NAME);
+ }
+ } else{
+ // If path specified by user is a directory, use
+ if (binaryPath.isDirectory()) {
+ binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME);
+ LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME
+ + " under the directory, updated path-to-executable:" + binaryPath
+ .getAbsolutePath());
+ }
+ // Validated
+ pathOfGpuBinary = binaryPath.getAbsolutePath();
+ }
+
+ // Try to discover GPU information once and print
+ try {
+ LOG.info("Trying to discover GPU information ...");
+ GpuDeviceInformation info = getGpuDeviceInformation();
+ LOG.info(info.toString());
+ } catch (YarnException e) {
+ String msg =
+ "Failed to discover GPU information from system, exception message:"
+ + e.getMessage() + " continue...";
+ LOG.warn(msg);
+ }
+ }
+
+ @VisibleForTesting
+ protected Map getEnvironmentToRunCommand() {
+ return environment;
+ }
+
+ @VisibleForTesting
+ protected String getPathOfGpuBinary() {
+ return pathOfGpuBinary;
+ }
+
+ public static GpuDiscoverer getInstance() {
+ return instance;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
new file mode 100644
index 00000000000..796eb25b431
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
+
+public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(GpuNodeResourceUpdateHandler.class);
+
+ @Override
+ public void updateConfiguredResource(Resource res) throws YarnException {
+ LOG.info("Initializing configured GPU resources for the NodeManager.");
+
+ List usableGpus =
+ GpuDiscoverer.getInstance().getGpusUsableByYarn();
+ if (null == usableGpus || usableGpus.isEmpty()) {
+ String message = "GPU is enabled, but couldn't find any usable GPUs on the "
+ + "NodeManager.";
+ LOG.error(message);
+ // No gpu can be used by YARN.
+ throw new YarnException(message);
+ }
+
+ long nUsableGpus = usableGpus.size();
+
+ Map configuredResourceTypes =
+ ResourceUtils.getResourceTypes();
+ if (!configuredResourceTypes.containsKey(GPU_URI)) {
+ throw new YarnException("Found " + nUsableGpus + " usable GPUs, however "
+ + GPU_URI
+ + " resource-type is not configured inside"
+ + " resource-types.xml, please configure it to enable GPU feature or"
+ + " remove " + GPU_URI + " from "
+ + YarnConfiguration.NM_RESOURCE_PLUGINS);
+ }
+
+ res.setResourceValue(GPU_URI, nUsableGpus);
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
new file mode 100644
index 00000000000..d294503704e
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceHandlerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
+
+import java.util.List;
+import java.util.Map;
+
+public class GpuResourcePlugin implements ResourcePlugin {
+ private GpuResourceHandlerImpl gpuResourceHandler = null;
+ private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null;
+
+ @Override
+ public synchronized void initialize(Context context) throws YarnException {
+ resourceDiscoverHandler = new GpuNodeResourceUpdateHandler();
+ GpuDiscoverer.getInstance().initialize(context.getConf());
+ }
+
+ @Override
+ public synchronized ResourceHandler createResourceHandler(
+ Context context, CGroupsHandler cGroupsHandler,
+ PrivilegedOperationExecutor privilegedOperationExecutor) {
+ if (gpuResourceHandler == null) {
+ gpuResourceHandler = new GpuResourceHandlerImpl(context, cGroupsHandler,
+ privilegedOperationExecutor);
+ }
+
+ return gpuResourceHandler;
+ }
+
+ @Override
+ public synchronized NodeResourceUpdaterPlugin getNodeResourceHandlerInstance() {
+ return resourceDiscoverHandler;
+ }
+
+ @Override
+ public void cleanup() throws YarnException {
+ // Do nothing.
+ }
+
+ @Override
+ public NMResourceInfo getNMResourceInfo() throws YarnException {
+ GpuDeviceInformation gpuDeviceInformation =
+ GpuDiscoverer.getInstance().getGpuDeviceInformation();
+ GpuResourceAllocator gpuResourceAllocator =
+ gpuResourceHandler.getGpuAllocator();
+ List totalGpus = gpuResourceAllocator.getAllowedGpusCopy();
+ List assignedGpuDevices =
+ gpuResourceAllocator.getAssignedGpusCopy();
+
+ return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus,
+ assignedGpuDevices);
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
index 08a486e7865..374cc290579 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
@@ -18,39 +18,25 @@
package org.apache.hadoop.yarn.server.nodemanager.recovery;
-import static org.fusesource.leveldbjni.JniDBFactory.asString;
-import static org.fusesource.leveldbjni.JniDBFactory.bytes;
-
-import org.apache.hadoop.yarn.api.records.Token;
-import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Timer;
-import java.util.TimerTask;
-import java.util.Set;
-
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestPBImpl;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Token;
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto;
@@ -58,9 +44,11 @@
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto;
-import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
import org.apache.hadoop.yarn.server.records.Version;
import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -72,10 +60,26 @@
import org.iq80.leveldb.DBException;
import org.iq80.leveldb.Options;
import org.iq80.leveldb.WriteBatch;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import static org.fusesource.leveldbjni.JniDBFactory.asString;
+import static org.fusesource.leveldbjni.JniDBFactory.bytes;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
public class NMLeveldbStateStoreService extends NMStateStoreService {
@@ -147,6 +151,9 @@
private static final String AMRMPROXY_KEY_PREFIX = "AMRMProxy/";
+ private static final String CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX =
+ "/assignedResources_";
+
private static final byte[] EMPTY_VALUE = new byte[0];
private DB db;
@@ -308,6 +315,13 @@ private RecoveredContainerState loadContainerState(ContainerId containerId,
rcs.setWorkDir(asString(entry.getValue()));
} else if (suffix.equals(CONTAINER_LOG_DIR_KEY_SUFFIX)) {
rcs.setLogDir(asString(entry.getValue()));
+ } else if (suffix.startsWith(CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX)) {
+ String resourceType = suffix.substring(
+ CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX.length());
+ ResourceMappings.AssignedResources assignedResources =
+ ResourceMappings.AssignedResources.fromBytes(entry.getValue());
+ rcs.getResourceMappings().addAssignedResources(resourceType,
+ assignedResources);
} else {
LOG.warn("the container " + containerId
+ " will be killed because of the unknown key " + key
@@ -1170,6 +1184,41 @@ public void removeLogDeleter(ApplicationId appId) throws IOException {
}
}
+ @Override
+ public void storeAssignedResources(Container container,
+ String resourceType, List assignedResources)
+ throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "storeAssignedResources: containerId=" + container.getContainerId()
+ + ", assignedResources=" + StringUtils
+ .join(",", assignedResources));
+
+ }
+
+ String keyResChng = CONTAINERS_KEY_PREFIX + container.getContainerId().toString()
+ + CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX + resourceType;
+ try {
+ WriteBatch batch = db.createWriteBatch();
+ try {
+ ResourceMappings.AssignedResources res =
+ new ResourceMappings.AssignedResources();
+ res.updateAssignedResources(assignedResources);
+
+ // New value will overwrite old values for the same key
+ batch.put(bytes(keyResChng), res.toBytes());
+ db.write(batch);
+ } finally {
+ batch.close();
+ }
+ } catch (DBException e) {
+ throw new IOException(e);
+ }
+
+ // update container resource mapping.
+ updateContainerResourceMapping(container, resourceType, assignedResources);
+ }
+
@SuppressWarnings("deprecation")
private void cleanupDeprecatedFinishedApps() {
try {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
index aaf6fb2cdbc..95ec61ae1a3 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager.recovery;
import java.io.IOException;
+import java.io.Serializable;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -34,6 +35,7 @@
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
// The state store to use when state isn't being stored
public class NMNullStateStoreService extends NMStateStoreService {
@@ -266,6 +268,13 @@ public void removeAMRMProxyAppContext(ApplicationAttemptId attempt)
throws IOException {
}
+ @Override
+ public void storeAssignedResources(Container container,
+ String resourceType, List assignedResources)
+ throws IOException {
+ updateContainerResourceMapping(container, resourceType, assignedResources);
+ }
+
@Override
protected void initStorage(Configuration conf) throws IOException {
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
index 1cdbd277ff3..350f2423834 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager.recovery;
import java.io.IOException;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -43,6 +44,8 @@
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
@Private
@Unstable
@@ -90,6 +93,7 @@ public NMStateStoreService(String name) {
private RecoveredContainerType recoveryType =
RecoveredContainerType.RECOVER;
private long startTime;
+ private ResourceMappings resMappings = new ResourceMappings();
public RecoveredContainerStatus getStatus() {
return status;
@@ -174,6 +178,14 @@ public RecoveredContainerType getRecoveryType() {
public void setRecoveryType(RecoveredContainerType recoveryType) {
this.recoveryType = recoveryType;
}
+
+ public ResourceMappings getResourceMappings() {
+ return resMappings;
+ }
+
+ public void setResourceMappings(ResourceMappings mappings) {
+ this.resMappings = mappings;
+ }
}
public static class LocalResourceTrackerState {
@@ -718,9 +730,31 @@ public abstract void removeAMRMProxyAppContextEntry(
public abstract void removeAMRMProxyAppContext(ApplicationAttemptId attempt)
throws IOException;
+ /**
+ * Store the assigned resources to a container.
+ *
+ * @param container NMContainer
+ * @param resourceType Resource Type
+ * @param assignedResources Assigned resources
+ * @throws IOException if fails
+ */
+ public abstract void storeAssignedResources(Container container,
+ String resourceType, List assignedResources)
+ throws IOException;
+
protected abstract void initStorage(Configuration conf) throws IOException;
protected abstract void startStorage() throws IOException;
protected abstract void closeStorage() throws IOException;
+
+ protected void updateContainerResourceMapping(Container container,
+ String resourceType, List assignedResources) {
+ // Update Container#getResourceMapping.
+ ResourceMappings.AssignedResources newAssigned =
+ new ResourceMappings.AssignedResources();
+ newAssigned.updateAssignedResources(assignedResources);
+ container.getResourceMappings().addAssignedResources(resourceType,
+ newAssigned);
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java
index c5379ccf258..9157374928d 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java
@@ -27,6 +27,10 @@
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -496,6 +500,28 @@ public void write(OutputStream os) throws IOException,
}
}
+ @GET
+ @Path("/resources/{resourcename}")
+ @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
+ MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
+ public Object getNMResourceInfo(
+ @PathParam("resourcename")
+ String resourceName) throws YarnException {
+ init();
+ ResourcePluginManager rpm = this.nmContext.getResourcePluginManager();
+ if (rpm != null && rpm.getNameToPlugins() != null) {
+ ResourcePlugin plugin = rpm.getNameToPlugins().get(resourceName);
+ if (plugin != null) {
+ NMResourceInfo nmResourceInfo = plugin.getNMResourceInfo();
+ if (nmResourceInfo != null) {
+ return nmResourceInfo;
+ }
+ }
+ }
+
+ return new NMResourceInfo();
+ }
+
private long parseLongParam(String bytes) {
if (bytes == null || bytes.isEmpty()) {
return Long.MAX_VALUE;
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMResourceInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMResourceInfo.java
new file mode 100644
index 00000000000..18ce8ea7a68
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMResourceInfo.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlRootElement;
+
+@XmlRootElement
+@XmlAccessorType(XmlAccessType.FIELD)
+public class NMResourceInfo {
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java
new file mode 100644
index 00000000000..837d5cc99cd
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.List;
+
+/**
+ * All GPU Device Information in the system, fetched from nvidia-smi.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "nvidia_smi_log")
+public class GpuDeviceInformation {
+ List gpus;
+
+ String driverVersion = "N/A";
+
+ // More fields like topology information could be added when needed.
+ // ...
+
+ @javax.xml.bind.annotation.XmlElement(name = "gpu")
+ public List getGpus() {
+ return gpus;
+ }
+
+ public void setGpus(List gpus) {
+ this.gpus = gpus;
+ }
+
+ @javax.xml.bind.annotation.XmlElement(name = "driver_version")
+ public String getDriverVersion() {
+ return driverVersion;
+ }
+
+ public void setDriverVersion(String driverVersion) {
+ this.driverVersion = driverVersion;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("=== Gpus in the system ===\n").append("\tDriver Version:").append(
+ getDriverVersion()).append("\n");
+
+ if (gpus != null) {
+ for (PerGpuDeviceInformation gpu : gpus) {
+ sb.append("\t").append(gpu.toString()).append("\n");
+ }
+ }
+ return sb.toString();
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java
new file mode 100644
index 00000000000..1bd92f63a88
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.sax.SAXSource;
+import java.io.StringReader;
+
+/**
+ * Parse XML and get GPU device information
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class GpuDeviceInformationParser {
+ private static final Logger LOG = LoggerFactory.getLogger(
+ GpuDeviceInformationParser.class);
+
+ private Unmarshaller unmarshaller = null;
+ private XMLReader xmlReader = null;
+
+ private void init()
+ throws SAXException, ParserConfigurationException, JAXBException {
+ SAXParserFactory spf = SAXParserFactory.newInstance();
+ // Disable external-dtd since by default nvidia-smi output contains
+ // in header
+ spf.setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+ false);
+ spf.setFeature("http://xml.org/sax/features/validation", false);
+
+ JAXBContext jaxbContext = JAXBContext.newInstance(
+ GpuDeviceInformation.class);
+
+ this.xmlReader = spf.newSAXParser().getXMLReader();
+ this.unmarshaller = jaxbContext.createUnmarshaller();
+ }
+
+ public synchronized GpuDeviceInformation parseXml(String xmlContent)
+ throws YarnException {
+ if (unmarshaller == null) {
+ try {
+ init();
+ } catch (SAXException | ParserConfigurationException | JAXBException e) {
+ LOG.error("Exception while initialize parser", e);
+ throw new YarnException(e);
+ }
+ }
+
+ InputSource inputSource = new InputSource(new StringReader(xmlContent));
+ SAXSource source = new SAXSource(xmlReader, inputSource);
+ try {
+ return (GpuDeviceInformation) unmarshaller.unmarshal(source);
+ } catch (JAXBException e) {
+ LOG.error("Exception while parsing xml", e);
+ throw new YarnException(e);
+ }
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/NMGpuResourceInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/NMGpuResourceInfo.java
new file mode 100644
index 00000000000..bf1d463df7c
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/NMGpuResourceInfo.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo;
+
+import java.util.List;
+
+/**
+ * Gpu device information return to client when
+ * {@link org.apache.hadoop.yarn.server.nodemanager.webapp.NMWebServices#getNMResourceInfo(String)}
+ * is invoked.
+ */
+public class NMGpuResourceInfo extends NMResourceInfo {
+ GpuDeviceInformation gpuDeviceInformation;
+
+ List totalGpuDevices;
+ List assignedGpuDevices;
+
+ public NMGpuResourceInfo(GpuDeviceInformation gpuDeviceInformation,
+ List totalGpuDevices,
+ List assignedGpuDevices) {
+ this.gpuDeviceInformation = gpuDeviceInformation;
+ this.totalGpuDevices = totalGpuDevices;
+ this.assignedGpuDevices = assignedGpuDevices;
+ }
+
+ public GpuDeviceInformation getGpuDeviceInformation() {
+ return gpuDeviceInformation;
+ }
+
+ public void setGpuDeviceInformation(
+ GpuDeviceInformation gpuDeviceInformation) {
+ this.gpuDeviceInformation = gpuDeviceInformation;
+ }
+
+ public List getTotalGpuDevices() {
+ return totalGpuDevices;
+ }
+
+ public void setTotalGpuDevices(List totalGpuDevices) {
+ this.totalGpuDevices = totalGpuDevices;
+ }
+
+ public List getAssignedGpuDevices() {
+ return assignedGpuDevices;
+ }
+
+ public void setAssignedGpuDevices(
+ List assignedGpuDevices) {
+ this.assignedGpuDevices = assignedGpuDevices;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java
new file mode 100644
index 00000000000..25c2e3a1f1d
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java
@@ -0,0 +1,165 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlAdapter;
+
+/**
+ * Capture single GPU device information such as memory size, temperature,
+ * utilization.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "gpu")
+public class PerGpuDeviceInformation {
+
+ private String productName = "N/A";
+ private String uuid = "N/A";
+ private int minorNumber = -1;
+
+ private PerGpuUtilizations gpuUtilizations;
+ private PerGpuMemoryUsage gpuMemoryUsage;
+ private PerGpuTemperature temperature;
+
+ /**
+ * Convert formats like "34 C", "75.6 %" to float.
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ static class StrToFloatBeforeSpaceAdapter extends
+ XmlAdapter {
+ @Override
+ public String marshal(Float v) throws Exception {
+ if (v == null) {
+ return "";
+ }
+ return String.valueOf(v);
+ }
+
+ @Override
+ public Float unmarshal(String v) throws Exception {
+ if (v == null) {
+ return -1f;
+ }
+
+ return Float.valueOf(v.split(" ")[0]);
+ }
+ }
+
+ /**
+ * Convert formats like "725 MiB" to long.
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ static class StrToMemAdapter extends XmlAdapter {
+ @Override
+ public String marshal(Long v) throws Exception {
+ if (v == null) {
+ return "";
+ }
+ return String.valueOf(v) + " MiB";
+ }
+
+ @Override
+ public Long unmarshal(String v) throws Exception {
+ if (v == null) {
+ return -1L;
+ }
+ return Long.valueOf(v.split(" ")[0]);
+ }
+ }
+
+ @XmlElement(name = "temperature")
+ public PerGpuTemperature getTemperature() {
+ return temperature;
+ }
+
+ public void setTemperature(PerGpuTemperature temperature) {
+ this.temperature = temperature;
+ }
+
+ @XmlElement(name = "uuid")
+ public String getUuid() {
+ return uuid;
+ }
+
+ public void setUuid(String uuid) {
+ this.uuid = uuid;
+ }
+
+ @XmlElement(name = "product_name")
+ public String getProductName() {
+ return productName;
+ }
+
+ public void setProductName(String productName) {
+ this.productName = productName;
+ }
+
+ @XmlElement(name = "minor_number")
+ public int getMinorNumber() {
+ return minorNumber;
+ }
+
+ public void setMinorNumber(int minorNumber) {
+ this.minorNumber = minorNumber;
+ }
+
+ @XmlElement(name = "utilization")
+ public PerGpuUtilizations getGpuUtilizations() {
+ return gpuUtilizations;
+ }
+
+ public void setGpuUtilizations(PerGpuUtilizations utilizations) {
+ this.gpuUtilizations = utilizations;
+ }
+
+ @XmlElement(name = "fb_memory_usage")
+ public PerGpuMemoryUsage getGpuMemoryUsage() {
+ return gpuMemoryUsage;
+ }
+
+ public void setGpuMemoryUsage(PerGpuMemoryUsage gpuMemoryUsage) {
+ this.gpuMemoryUsage = gpuMemoryUsage;
+ }
+
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("ProductName=").append(productName).append(", MinorNumber=")
+ .append(minorNumber);
+
+ if (getGpuMemoryUsage() != null) {
+ sb.append(", TotalMemory=").append(
+ getGpuMemoryUsage().getTotalMemoryMiB()).append("MiB");
+ }
+
+ if (getGpuUtilizations() != null) {
+ sb.append(", Utilization=").append(
+ getGpuUtilizations().getOverallGpuUtilization()).append("%");
+ }
+ return sb.toString();
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java
new file mode 100644
index 00000000000..afc1a9679b7
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@XmlRootElement(name = "fb_memory_usage")
+public class PerGpuMemoryUsage {
+ long usedMemoryMiB = -1L;
+ long availMemoryMiB = -1L;
+
+ @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToMemAdapter.class)
+ @XmlElement(name = "used")
+ public Long getUsedMemoryMiB() {
+ return usedMemoryMiB;
+ }
+
+ public void setUsedMemoryMiB(Long usedMemoryMiB) {
+ this.usedMemoryMiB = usedMemoryMiB;
+ }
+
+ @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToMemAdapter.class)
+ @XmlElement(name = "free")
+ public Long getAvailMemoryMiB() {
+ return availMemoryMiB;
+ }
+
+ public void setAvailMemoryMiB(Long availMemoryMiB) {
+ this.availMemoryMiB = availMemoryMiB;
+ }
+
+ public long getTotalMemoryMiB() {
+ return usedMemoryMiB + availMemoryMiB;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java
new file mode 100644
index 00000000000..ccd60cbf5e5
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *