diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java index 3f2b65769fd..4133fb49492 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java @@ -136,7 +136,7 @@ public synchronized GpuDeviceInformation getGpuDeviceInformation() return lastDiscoveredGpuInformation; } - private boolean isAutoDiscoveryEnabled() { + boolean isAutoDiscoveryEnabled() { String allowedDevicesStr = getConf().get( YarnConfiguration.NM_GPU_ALLOWED_DEVICES, YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java index d44160e8271..25ea19396b7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -94,27 +94,29 @@ public DockerCommandPlugin getDockerCommandPluginInstance() { @Override public synchronized NMResourceInfo getNMResourceInfo() throws YarnException { - GpuDeviceInformation gpuDeviceInformation; - - //At this point the gpu plugin is already enabled - checkGpuResourceHandler(); - - checkErrorCount(); - try{ - gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation(); - numOfErrorExecutionSinceLastSucceed = 0; - } catch (YarnException e) { - LOG.error(e.getMessage(), e); - numOfErrorExecutionSinceLastSucceed++; - throw e; + final GpuDeviceInformation gpuDeviceInformation; + + if (gpuDiscoverer.isAutoDiscoveryEnabled()) { + //At this point the gpu plugin is already enabled + checkGpuResourceHandler(); + + checkErrorCount(); + try{ + gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation(); + numOfErrorExecutionSinceLastSucceed = 0; + } catch (YarnException e) { + LOG.error(e.getMessage(), e); + numOfErrorExecutionSinceLastSucceed++; + throw e; + } + } else { + gpuDeviceInformation = null; } - GpuResourceAllocator gpuResourceAllocator = gpuResourceHandler.getGpuAllocator(); List totalGpus = gpuResourceAllocator.getAllowedGpus(); List assignedGpuDevices = gpuResourceAllocator.getAssignedGpus(); - return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus, assignedGpuDevices); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java index 888f8999d5d..5e065cb340d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java @@ -19,15 +19,38 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import com.google.common.collect.Lists; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation; +import org.junit.Assert; import org.junit.Test; +import java.util.List; public class TestGpuResourcePlugin { + private GpuDiscoverer createMockDiscoverer() throws YarnException { + GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(true); + + PerGpuDeviceInformation gpu = + new PerGpuDeviceInformation(); + gpu.setProductName("testGpu"); + List gpus = Lists.newArrayList(); + gpus.add(gpu); + + GpuDeviceInformation gpuDeviceInfo = new GpuDeviceInformation(); + gpuDeviceInfo.setGpus(gpus); + when(gpuDiscoverer.getGpuDeviceInformation()).thenReturn(gpuDeviceInfo); + return gpuDiscoverer; + } + @Test(expected = YarnException.class) public void testResourceHandlerNotInitialized() throws YarnException { - GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + GpuDiscoverer gpuDiscoverer = createMockDiscoverer(); GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = mock(GpuNodeResourceUpdateHandler.class); @@ -39,7 +62,7 @@ public void testResourceHandlerNotInitialized() throws YarnException { @Test public void testResourceHandlerIsInitialized() throws YarnException { - GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + GpuDiscoverer gpuDiscoverer = createMockDiscoverer(); GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = mock(GpuNodeResourceUpdateHandler.class); @@ -51,4 +74,52 @@ public void testResourceHandlerIsInitialized() throws YarnException { //Not throwing any exception target.getNMResourceInfo(); } + + @Test + public void testGetNMResourceInfoAutoDiscoveryEnabled() + throws YarnException { + GpuDiscoverer gpuDiscoverer = createMockDiscoverer(); + + GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = + mock(GpuNodeResourceUpdateHandler.class); + + GpuResourcePlugin target = + new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer); + + target.createResourceHandler(null, null, null); + + NMGpuResourceInfo resourceInfo = + (NMGpuResourceInfo) target.getNMResourceInfo(); + Assert.assertNotNull("GpuDeviceInformation should not be null", + resourceInfo.getGpuDeviceInformation()); + + List gpus = + resourceInfo.getGpuDeviceInformation().getGpus(); + Assert.assertNotNull("List of PerGpuDeviceInformation should not be null", + gpus); + + Assert.assertEquals("List of PerGpuDeviceInformation should have a " + + "size of 1", 1, gpus.size()); + Assert.assertEquals("Product name of GPU does not match", + "testGpu", gpus.get(0).getProductName()); + } + + @Test + public void testGetNMResourceInfoAutoDiscoveryDisabled() + throws YarnException { + GpuDiscoverer gpuDiscoverer = createMockDiscoverer(); + when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(false); + + GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = + mock(GpuNodeResourceUpdateHandler.class); + + GpuResourcePlugin target = + new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer); + + target.createResourceHandler(null, null, null); + + NMGpuResourceInfo resourceInfo = + (NMGpuResourceInfo) target.getNMResourceInfo(); + Assert.assertNull(resourceInfo.getGpuDeviceInformation()); + } }