diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java index e49d2f24bd9..0faa26c75fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -18,7 +18,9 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; -import org.apache.hadoop.yarn.api.records.ContainerId; +import java.util.List; + +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; @@ -32,11 +34,14 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; - -import java.util.List; -import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class GpuResourcePlugin implements ResourcePlugin { + + private static final Logger LOGGER = + LoggerFactory.getLogger(GpuResourcePlugin.class); + private GpuResourceHandlerImpl gpuResourceHandler = null; private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null; private DockerCommandPlugin dockerCommandPlugin = null; @@ -80,8 +85,13 @@ public DockerCommandPlugin getDockerCommandPluginInstance() { public NMResourceInfo getNMResourceInfo() throws YarnException { GpuDeviceInformation gpuDeviceInformation = GpuDiscoverer.getInstance().getGpuDeviceInformation(); + + //At this point the gpu plugin is already enabled + checkGpuResourceHandler(); + GpuResourceAllocator gpuResourceAllocator = gpuResourceHandler.getGpuAllocator(); + List totalGpus = gpuResourceAllocator.getAllowedGpusCopy(); List assignedGpuDevices = gpuResourceAllocator.getAssignedGpusCopy(); @@ -90,6 +100,17 @@ public NMResourceInfo getNMResourceInfo() throws YarnException { assignedGpuDevices); } + private void checkGpuResourceHandler() throws YarnException { + if(gpuResourceHandler == null) { + String errorMsg = + "Linux Container Executor is not configured for the NodeManager. " + + "To fully enable GPU feature on the node also set " + + YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly."; + LOGGER.warn(errorMsg); + throw new YarnException(errorMsg); + } + } + @Override public String toString() { return GpuResourcePlugin.class.getName();