diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index dfc3adb29cc..3f37d4ddf27 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -18,6 +18,8 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl; +import static org.apache.commons.lang.StringUtils.isEmpty; + import java.io.IOException; import java.net.InetAddress; import java.net.InetSocketAddress; @@ -126,6 +128,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; @@ -139,6 +142,8 @@ import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.RackResolver; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -664,12 +669,8 @@ public TaskAttemptImpl(TaskId taskId, int i, this.jobFile = jobFile; this.partition = partition; - //TODO:create the resource reqt for this Task attempt this.resourceCapability = recordFactory.newRecordInstance(Resource.class); - this.resourceCapability.setMemorySize( - getMemoryRequired(conf, taskId.getTaskType())); - this.resourceCapability.setVirtualCores( - getCpuRequired(conf, taskId.getTaskType())); + populateResourceCapability(taskId.getTaskType()); this.dataLocalHosts = resolveHosts(dataLocalHosts); RackResolver.init(conf); @@ -701,21 +702,133 @@ private int getMemoryRequired(Configuration conf, TaskType taskType) { return memory; } + private void populateResourceCapability(TaskType taskType) { + String resourceTypePrefix = + getResourceTypePrefix(taskType); + boolean memorySet = false; + boolean cpuVcoresSet = false; + if (resourceTypePrefix != null) { + List resourceRequests = + ResourceUtils.getRequestedResourcesFromConfig(conf, + resourceTypePrefix); + for (ResourceInformation resourceRequest : resourceRequests) { + String resourceName = resourceRequest.getName(); + if (MRJobConfig.RESOURCE_TYPE_NAME_MEMORY.equals(resourceName) || + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY.equals( + resourceName)) { + if (memorySet) { + throw new IllegalArgumentException( + "Only one of the following keys " + + "can be specified for a single job: " + + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY + ", " + + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY); + } + String units = isEmpty(resourceRequest.getUnits()) ? + ResourceUtils.getDefaultUnit(ResourceInformation.MEMORY_URI) : + resourceRequest.getUnits(); + this.resourceCapability.setMemorySize( + UnitsConversionUtil.convert(units, "Mi", + resourceRequest.getValue())); + memorySet = true; + String memoryKey = getMemoryKey(taskType); + if (memoryKey != null && conf.get(memoryKey) != null) { + LOG.warn("Configuration " + resourceTypePrefix + resourceName + + "=" + resourceRequest.getValue() + resourceRequest.getUnits() + + " is overriding the " + memoryKey + "=" + conf.get(memoryKey) + + " configuration"); + } + } else if (MRJobConfig.RESOURCE_TYPE_NAME_VCORE.equals( + resourceName)) { + this.resourceCapability.setVirtualCores( + (int) UnitsConversionUtil.convert(resourceRequest.getUnits(), "", + resourceRequest.getValue())); + cpuVcoresSet = true; + String cpuKey = getCpuVcoresKey(taskType); + if (cpuKey != null && conf.get(cpuKey) != null) { + LOG.warn("Configuration " + resourceTypePrefix + + MRJobConfig.RESOURCE_TYPE_NAME_VCORE + "=" + + resourceRequest.getValue() + resourceRequest.getUnits() + + " is overriding the " + cpuKey + "=" + + conf.get(cpuKey) + " configuration"); + } + } else { + ResourceInformation resourceInformation = + this.resourceCapability.getResourceInformation(resourceName); + resourceInformation.setUnits(resourceRequest.getUnits()); + resourceInformation.setValue(resourceRequest.getValue()); + this.resourceCapability.setResourceInformation(resourceName, + resourceInformation); + } + } + } + if (!memorySet) { + this.resourceCapability.setMemorySize(getMemoryRequired(conf, taskType)); + } + if (!cpuVcoresSet) { + this.resourceCapability.setVirtualCores(getCpuRequired(conf, taskType)); + } + } + + private String getCpuVcoresKey(TaskType taskType) { + switch (taskType) { + case MAP: + return MRJobConfig.MAP_CPU_VCORES; + case REDUCE: + return MRJobConfig.REDUCE_CPU_VCORES; + default: + return null; + } + } + + private String getMemoryKey(TaskType taskType) { + switch (taskType) { + case MAP: + return MRJobConfig.MAP_MEMORY_MB; + case REDUCE: + return MRJobConfig.REDUCE_MEMORY_MB; + default: + return null; + } + } + + private Integer getCpuVcoreDefault(TaskType taskType) { + switch (taskType) { + case MAP: + return MRJobConfig.DEFAULT_MAP_CPU_VCORES; + case REDUCE: + return MRJobConfig.DEFAULT_REDUCE_CPU_VCORES; + default: + return null; + } + } + private int getCpuRequired(Configuration conf, TaskType taskType) { int vcores = 1; - if (taskType == TaskType.MAP) { - vcores = - conf.getInt(MRJobConfig.MAP_CPU_VCORES, - MRJobConfig.DEFAULT_MAP_CPU_VCORES); - } else if (taskType == TaskType.REDUCE) { - vcores = - conf.getInt(MRJobConfig.REDUCE_CPU_VCORES, - MRJobConfig.DEFAULT_REDUCE_CPU_VCORES); + String cpuVcoreKey = getCpuVcoresKey(taskType); + if (cpuVcoreKey != null) { + Integer defaultCpuVcores = getCpuVcoreDefault(taskType); + if (null == defaultCpuVcores) { + defaultCpuVcores = vcores; + } + vcores = conf.getInt(cpuVcoreKey, defaultCpuVcores); } - return vcores; } + private String getResourceTypePrefix(TaskType taskType) { + switch (taskType) { + case MAP: + return MRJobConfig.MAP_RESOURCE_TYPE_PREFIX; + case REDUCE: + return MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX; + default: + LOG.info("TaskType " + taskType + + " does not support custom resource types - this support can be " + + "added in " + getClass().getSimpleName()); + return null; + } + } + /** * Create a {@link LocalResource} record with all the given parameters. * The NM that hosts AM container will upload resources to shared cache. diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/TestMapreduceConfigFields.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/TestMapreduceConfigFields.java index 096cec937d4..f469aad1e6b 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/TestMapreduceConfigFields.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/TestMapreduceConfigFields.java @@ -78,6 +78,17 @@ public void initializeMemberVariables() { xmlPropsToSkipCompare.add("mapreduce.local.clientfactory.class.name"); xmlPropsToSkipCompare.add("mapreduce.jobtracker.system.dir"); xmlPropsToSkipCompare.add("mapreduce.jobtracker.staging.root.dir"); + + // Resource type related properties are only prefixes, + // they need to be postfixed with the resource name + // in order to take effect. + // There is nothing to be added to mapred-default.xml + configurationPropsToSkipCompare.add( + MRJobConfig.MR_AM_RESOURCE_PREFIX); + configurationPropsToSkipCompare.add( + MRJobConfig.MAP_RESOURCE_TYPE_PREFIX); + configurationPropsToSkipCompare.add( + MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX); } } diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java index 60a2177a88c..e055798640d 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java @@ -28,14 +28,21 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; import com.google.common.base.Supplier; +import org.junit.After; import org.junit.Assert; +import org.junit.BeforeClass; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -43,6 +50,7 @@ import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapTaskAttemptImpl; +import org.apache.hadoop.mapred.ReduceTaskAttemptImpl; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.JobCounter; import org.apache.hadoop.mapreduce.MRJobConfig; @@ -83,24 +91,36 @@ import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.ControlledClock; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.spi.LoggingEvent; import org.junit.Test; import org.mockito.ArgumentCaptor; +import com.google.common.collect.ImmutableList; + @SuppressWarnings({"unchecked", "rawtypes"}) public class TestTaskAttempt{ - + + private static final String CUSTOM_RESOURCE_NAME = "a-custom-resource"; + static public class StubbedFS extends RawLocalFileSystem { @Override public FileStatus getFileStatus(Path f) throws IOException { @@ -108,6 +128,63 @@ public FileStatus getFileStatus(Path f) throws IOException { } } + private static class CustomResourceTypesConfigurationProvider + extends LocalConfigurationProvider { + + @Override + public InputStream getConfigurationInputStream(Configuration bootstrapConf, + String name) throws YarnException, IOException { + if (YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE.equals(name)) { + return new ByteArrayInputStream( + ("\n" + + " \n" + + " yarn.resource-types\n" + + " a-custom-resource\n" + + " \n" + + " \n" + + " yarn.resource-types.a-custom-resource.units\n" + + " G\n" + + " \n" + + "\n").getBytes()); + } else { + return super.getConfigurationInputStream(bootstrapConf, name); + } + } + } + + private static class TestAppender extends AppenderSkeleton { + + private final List logEvents = new CopyOnWriteArrayList<>(); + + @Override + public boolean requiresLayout() { + return false; + } + + @Override + public void close() { + } + + @Override + protected void append(LoggingEvent arg0) { + logEvents.add(arg0); + } + + private List getLogEvents() { + return logEvents; + } + } + + @BeforeClass + public static void setupBeforeClass() { + ResourceUtils.resetResourceTypes(new Configuration()); + } + + @After + public void tearDown() { + ResourceUtils.resetResourceTypes(new Configuration()); + } + @Test public void testMRAppHistoryForMap() throws Exception { MRApp app = new FailingAttemptsMRApp(1, 0); @@ -329,17 +406,18 @@ public void verifyMillisCounters(Resource containerResource, private TaskAttemptImpl createMapTaskAttemptImplForTest( EventHandler eventHandler, TaskSplitMetaInfo taskSplitMetaInfo) { Clock clock = SystemClock.getInstance(); - return createMapTaskAttemptImplForTest(eventHandler, taskSplitMetaInfo, clock); + return createMapTaskAttemptImplForTest(eventHandler, taskSplitMetaInfo, + clock, new JobConf()); } private TaskAttemptImpl createMapTaskAttemptImplForTest( - EventHandler eventHandler, TaskSplitMetaInfo taskSplitMetaInfo, Clock clock) { + EventHandler eventHandler, TaskSplitMetaInfo taskSplitMetaInfo, + Clock clock, JobConf jobConf) { ApplicationId appId = ApplicationId.newInstance(1, 1); JobId jobId = MRBuilderUtils.newJobId(appId, 1); TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP); TaskAttemptListener taListener = mock(TaskAttemptListener.class); Path jobFile = mock(Path.class); - JobConf jobConf = new JobConf(); TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, taskSplitMetaInfo, jobConf, taListener, null, @@ -347,6 +425,20 @@ private TaskAttemptImpl createMapTaskAttemptImplForTest( return taImpl; } + private TaskAttemptImpl createReduceTaskAttemptImplForTest( + EventHandler eventHandler, Clock clock, JobConf jobConf) { + ApplicationId appId = ApplicationId.newInstance(1, 1); + JobId jobId = MRBuilderUtils.newJobId(appId, 1); + TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.REDUCE); + TaskAttemptListener taListener = mock(TaskAttemptListener.class); + Path jobFile = mock(Path.class); + TaskAttemptImpl taImpl = + new ReduceTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, + 1, jobConf, taListener, null, + null, clock, null); + return taImpl; + } + private void testMRAppHistory(MRApp app) throws Exception { Configuration conf = new Configuration(); Job job = app.submit(conf); @@ -1423,6 +1515,271 @@ public void testTimeoutWhileFailFinishing() throws Exception { assertFalse("InternalError occurred", eventHandler.internalError); } + @Test + public void testMapperCustomResourceTypes() { + initResourceTypes(); + EventHandler eventHandler = mock(EventHandler.class); + TaskSplitMetaInfo taskSplitMetaInfo = new TaskSplitMetaInfo(); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.setLong(MRJobConfig.MAP_RESOURCE_TYPE_PREFIX + + CUSTOM_RESOURCE_NAME, 7L); + TaskAttemptImpl taImpl = createMapTaskAttemptImplForTest(eventHandler, + taskSplitMetaInfo, clock, jobConf); + ResourceInformation resourceInfo = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getResourceInformation(CUSTOM_RESOURCE_NAME); + assertEquals("Expecting the default unit (G)", + "G", resourceInfo.getUnits()); + assertEquals(7L, resourceInfo.getValue()); + } + + @Test + public void testReducerCustomResourceTypes() { + initResourceTypes(); + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + + CUSTOM_RESOURCE_NAME, "3m"); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + ResourceInformation resourceInfo = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getResourceInformation(CUSTOM_RESOURCE_NAME); + assertEquals("Expecting the specified unit (m)", + "m", resourceInfo.getUnits()); + assertEquals(3L, resourceInfo.getValue()); + } + + @Test + public void testReducerMemoryRequestViaMapreduceReduceMemoryMb() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 2048); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + long memorySize = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getMemorySize(); + assertEquals(2048, memorySize); + } + + @Test + public void testReducerMemoryRequestViaMapreduceReduceResourceMemory() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY, "2 Gi"); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + long memorySize = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getMemorySize(); + assertEquals(2048, memorySize); + } + + @Test + public void testReducerMemoryRequestDefaultMemory() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, new JobConf()); + long memorySize = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getMemorySize(); + assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB, memorySize); + } + + @Test + public void testReducerMemoryRequestWithoutUnits() { + Clock clock = SystemClock.getInstance(); + for (String memoryResourceName : ImmutableList.of( + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY, + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY)) { + EventHandler eventHandler = mock(EventHandler.class); + JobConf jobConf = new JobConf(); + jobConf.setInt(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + + memoryResourceName, 2048); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + long memorySize = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getMemorySize(); + assertEquals(2048, memorySize); + } + } + + @Test + public void testReducerMemoryRequestOverriding() { + for (String memoryName : ImmutableList.of( + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY, + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY)) { + TestAppender testAppender = new TestAppender(); + final Logger logger = Logger.getLogger(TaskAttemptImpl.class); + try { + logger.addAppender(testAppender); + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + memoryName, + "3Gi"); + jobConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 2048); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + long memorySize = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getMemorySize(); + assertEquals(3072, memorySize); + boolean foundLogWarning = false; + for (LoggingEvent e : testAppender.getLogEvents()) { + if (e.getLevel() == Level.WARN && ("Configuration " + + "mapreduce.reduce.resource." + memoryName + "=3Gi is " + + "overriding the mapreduce.reduce.memory.mb=2048 configuration") + .equals(e.getMessage())) { + foundLogWarning = true; + break; + } + } + assertTrue(foundLogWarning); + } finally { + logger.removeAppender(testAppender); + } + } + } + + @Test(expected=IllegalArgumentException.class) + public void testReducerMemoryRequestMultipleName() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + for (String memoryName : ImmutableList.of( + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY, + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY)) { + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + memoryName, + "3Gi"); + } + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + } + + @Test + public void testReducerCpuRequestViaMapreduceReduceCpuVcores() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 3); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + int vCores = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getVirtualCores(); + assertEquals(3, vCores); + } + + @Test + public void testReducerCpuRequestViaMapreduceReduceResourceVcores() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + + MRJobConfig.RESOURCE_TYPE_NAME_VCORE, "5"); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + int vCores = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getVirtualCores(); + assertEquals(5, vCores); + } + + @Test + public void testReducerCpuRequestDefaultMemory() { + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, new JobConf()); + int vCores = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getVirtualCores(); + assertEquals(MRJobConfig.DEFAULT_REDUCE_CPU_VCORES, vCores); + } + + @Test + public void testReducerCpuRequestOverriding() { + TestAppender testAppender = new TestAppender(); + final Logger logger = Logger.getLogger(TaskAttemptImpl.class); + try { + logger.addAppender(testAppender); + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + + MRJobConfig.RESOURCE_TYPE_NAME_VCORE, "7"); + jobConf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 9); + TaskAttemptImpl taImpl = + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + long vCores = + getResourceInfoFromContainerRequest(taImpl, eventHandler). + getVirtualCores(); + assertEquals(7, vCores); + boolean foundLogWarning = false; + for (LoggingEvent e : testAppender.getLogEvents()) { + if (e.getLevel() == Level.WARN && ("Configuration " + + "mapreduce.reduce.resource.vcores=7 is overriding the " + + "mapreduce.reduce.cpu.vcores=9 configuration" + ).equals(e.getMessage())) { + foundLogWarning = true; + break; + } + } + assertTrue(foundLogWarning); + } finally { + logger.removeAppender(testAppender); + } + } + + private Resource getResourceInfoFromContainerRequest( + TaskAttemptImpl taImpl, EventHandler eventHandler) { + taImpl.handle(new TaskAttemptEvent(taImpl.getID(), + TaskAttemptEventType.TA_SCHEDULE)); + + assertEquals("Task attempt is not in STARTING state", taImpl.getState(), + TaskAttemptState.STARTING); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Event.class); + verify(eventHandler, times(2)).handle(captor.capture()); + + List containerRequestEvents = new ArrayList<>(); + for (Event e : captor.getAllValues()) { + if (e instanceof ContainerRequestEvent) { + containerRequestEvents.add((ContainerRequestEvent) e); + } + } + assertEquals("Expected one ContainerRequestEvent after scheduling " + + "task attempt", 1, containerRequestEvents.size()); + + return containerRequestEvents.get(0).getCapability(); + } + + @Test(expected=IllegalArgumentException.class) + public void testReducerCustomResourceTypeWithInvalidUnit() { + initResourceTypes(); + EventHandler eventHandler = mock(EventHandler.class); + Clock clock = SystemClock.getInstance(); + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.REDUCE_RESOURCE_TYPE_PREFIX + + CUSTOM_RESOURCE_NAME, "3z"); + createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); + } + + private void initResourceTypes() { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, + CustomResourceTypesConfigurationProvider.class.getName()); + ResourceUtils.resetResourceTypes(conf); + } + private void setupTaskAttemptFinishingMonitor( EventHandler eventHandler, JobConf jobConf, AppContext appCtx) { TaskAttemptFinishingMonitor taskAttemptFinishingMonitor = diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index d66612366a7..5a72def63c0 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -360,12 +360,47 @@ public static final String MAP_INPUT_START = "mapreduce.map.input.start"; + /** + * Configuration key for specifying memory requirement for the mapper. + * Kept for backward-compatibility, mapreduce.map.resource.memory + * is the new preferred way to specify this. + */ public static final String MAP_MEMORY_MB = "mapreduce.map.memory.mb"; public static final int DEFAULT_MAP_MEMORY_MB = 1024; + /** + * Configuration key for specifying CPU requirement for the mapper. + * Kept for backward-compatibility, mapreduce.map.resource.vcores + * is the new preferred way to specify this. + */ public static final String MAP_CPU_VCORES = "mapreduce.map.cpu.vcores"; public static final int DEFAULT_MAP_CPU_VCORES = 1; + /** + * Custom resource names required by the mapper should be + * appended to this prefix, the value's format is {amount}[ ][{unit}]. + * If no unit is defined, the default unit will be used. + * Standard resource names: memory (default unit: Mi), vcores + */ + public static final String MAP_RESOURCE_TYPE_PREFIX = + "mapreduce.map.resource."; + + /** + * Resource type name for CPU vcores. + */ + public static final String RESOURCE_TYPE_NAME_VCORE = "vcores"; + + /** + * Resource type name for memory. + */ + public static final String RESOURCE_TYPE_NAME_MEMORY = "memory"; + + /** + * Alternative resource type name for memory. + */ + public static final String RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY = + "memory-mb"; + public static final String MAP_ENV = "mapreduce.map.env"; public static final String MAP_JAVA_OPTS = "mapreduce.map.java.opts"; @@ -408,12 +443,31 @@ public static final String REDUCE_MARKRESET_BUFFER_SIZE = "mapreduce.reduce.markreset.buffer.size"; + /** + * Configuration key for specifying memory requirement for the reducer. + * Kept for backward-compatibility, mapreduce.reduce.resource.memory + * is the new preferred way to specify this. + */ public static final String REDUCE_MEMORY_MB = "mapreduce.reduce.memory.mb"; public static final int DEFAULT_REDUCE_MEMORY_MB = 1024; + /** + * Configuration key for specifying CPU requirement for the reducer. + * Kept for backward-compatibility, mapreduce.reduce.resource.vcores + * is the new preferred way to specify this. + */ public static final String REDUCE_CPU_VCORES = "mapreduce.reduce.cpu.vcores"; public static final int DEFAULT_REDUCE_CPU_VCORES = 1; + /** + * Resource names required by the reducer should be + * appended to this prefix, the value's format is {amount}[ ][{unit}]. + * If no unit is defined, the default unit will be used. + * Standard resource names: memory (default unit: Mi), vcores + */ + public static final String REDUCE_RESOURCE_TYPE_PREFIX = + "mapreduce.reduce.resource."; + public static final String REDUCE_MEMORY_TOTAL_BYTES = "mapreduce.reduce.memory.totalbytes"; public static final String SHUFFLE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.shuffle.input.buffer.percent"; @@ -599,7 +653,10 @@ public static final String DEFAULT_MR_AM_STAGING_DIR = "/tmp/hadoop-yarn/staging"; - /** The amount of memory the MR app master needs.*/ + /** The amount of memory the MR app master needs. + * Kept for backward-compatibility, yarn.app.mapreduce.am.resource.memory is + * the new preferred way to specify this + */ public static final String MR_AM_VMEM_MB = MR_AM_PREFIX+"resource.mb"; public static final int DEFAULT_MR_AM_VMEM_MB = 1536; @@ -609,6 +666,15 @@ MR_AM_PREFIX+"resource.cpu-vcores"; public static final int DEFAULT_MR_AM_CPU_VCORES = 1; + /** + * Resource names required by the MR AM should be + * appended to this prefix, the value's format is {amount}[ ][{unit}]. + * If no unit is defined, the default unit will be used + * Standard resource names: memory (default unit: Mi), vcores + */ + public static final String MR_AM_RESOURCE_PREFIX = + MR_AM_PREFIX + "resource."; + /** Command line arguments passed to the MR app master.*/ public static final String MR_AM_COMMAND_OPTS = MR_AM_PREFIX+"command-opts"; diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 62aa4972929..ae051654f41 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; @@ -517,4 +518,10 @@ public void killApplication(ApplicationId appId, String diagnostics) throws YarnException, IOException { client.killApplication(appId, diagnostics); } + + @Override + public List getResourceTypeInfo() + throws YarnException, IOException { + return client.getResourceTypeInfo(); + } } diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index a23ff34b574..12a307930fc 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -18,6 +18,9 @@ package org.apache.hadoop.mapred; +import static org.apache.commons.lang.StringUtils.isEmpty; +import static org.apache.hadoop.mapreduce.MRJobConfig.MR_AM_RESOURCE_PREFIX; + import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; @@ -84,6 +87,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -93,6 +97,8 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.client.RMDelegationTokenSelector; import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import com.google.common.annotations.VisibleForTesting; @@ -659,16 +665,76 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( private List generateResourceRequests() throws IOException { Resource capability = recordFactory.newRecordInstance(Resource.class); - capability.setMemorySize( - conf.getInt( - MRJobConfig.MR_AM_VMEM_MB, MRJobConfig.DEFAULT_MR_AM_VMEM_MB - ) - ); - capability.setVirtualCores( - conf.getInt( - MRJobConfig.MR_AM_CPU_VCORES, MRJobConfig.DEFAULT_MR_AM_CPU_VCORES - ) - ); + boolean memorySet = false; + boolean cpuVcoresSet = false; + List resourceRequests = ResourceUtils + .getRequestedResourcesFromConfig(conf, MR_AM_RESOURCE_PREFIX); + for (ResourceInformation resourceReq : resourceRequests) { + String resourceName = resourceReq.getName(); + if (MRJobConfig.RESOURCE_TYPE_NAME_MEMORY.equals(resourceName) || + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY.equals( + resourceName)) { + if (memorySet) { + throw new IllegalArgumentException( + "Only one of the following keys " + + "can be specified for a single job: " + + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY + ", " + + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY); + } + String units = isEmpty(resourceReq.getUnits()) ? + ResourceUtils.getDefaultUnit(ResourceInformation.MEMORY_URI) : + resourceReq.getUnits(); + capability.setMemorySize( + UnitsConversionUtil.convert(units, "Mi", resourceReq.getValue())); + memorySet = true; + if (conf.get(MRJobConfig.MR_AM_VMEM_MB) != null) { + LOG.warn("Configuration " + MR_AM_RESOURCE_PREFIX + + resourceName + "=" + resourceReq.getValue() + + resourceReq.getUnits() + " is overriding the " + + MRJobConfig.MR_AM_VMEM_MB + "=" + + conf.get(MRJobConfig.MR_AM_VMEM_MB) + " configuration"); + } + } else if (MRJobConfig.RESOURCE_TYPE_NAME_VCORE.equals(resourceName)) { + capability.setVirtualCores( + (int) UnitsConversionUtil.convert(resourceReq.getUnits(), "", + resourceReq.getValue())); + cpuVcoresSet = true; + if (conf.get(MRJobConfig.MR_AM_CPU_VCORES) != null) { + LOG.warn("Configuration " + MR_AM_RESOURCE_PREFIX + + resourceName + "=" + resourceReq.getValue() + + resourceReq.getUnits() + " is overriding the " + + MRJobConfig.MR_AM_CPU_VCORES + "=" + + conf.get(MRJobConfig.MR_AM_CPU_VCORES) + " configuration"); + } + } else if (!MRJobConfig.MR_AM_VMEM_MB.equals( + MR_AM_RESOURCE_PREFIX + resourceName) && + !MRJobConfig.MR_AM_CPU_VCORES.equals( + MR_AM_RESOURCE_PREFIX + resourceName)) { + // the "mb", "cpu-vcores" resource types are not processed here + // since the yarn.app.mapreduce.am.resource.mb, + // yarn.app.mapreduce.am.resource.cpu-vcores keys are used for + // backward-compatibility - which is handled after this loop + ResourceInformation resourceInformation = capability + .getResourceInformation(resourceName); + resourceInformation.setUnits(resourceReq.getUnits()); + resourceInformation.setValue(resourceReq.getValue()); + capability.setResourceInformation(resourceName, resourceInformation); + } + } + if (!memorySet) { + capability.setMemorySize( + conf.getInt( + MRJobConfig.MR_AM_VMEM_MB, MRJobConfig.DEFAULT_MR_AM_VMEM_MB + ) + ); + } + if (!cpuVcoresSet) { + capability.setVirtualCores( + conf.getInt( + MRJobConfig.MR_AM_CPU_VCORES, MRJobConfig.DEFAULT_MR_AM_CPU_VCORES + ) + ); + } if (LOG.isDebugEnabled()) { LOG.debug("AppMaster capability = " + capability); } diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index 65eac654845..8b6ea64118c 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -72,6 +72,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -494,6 +496,13 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throws YarnException, IOException { return null; } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) + throws YarnException, IOException { + return null; + } } class HistoryService extends AMService implements HSClientProtocol { diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java index c79b08e52e4..ecb396e91fd 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java @@ -32,10 +32,12 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.net.InetSocketAddress; import java.nio.ByteBuffer; @@ -43,6 +45,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -69,6 +72,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; @@ -96,28 +100,37 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.client.api.impl.YarnClientImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.Records; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.log4j.Appender; +import org.apache.log4j.AppenderSkeleton; import org.apache.log4j.Layout; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; import org.apache.log4j.WriterAppender; +import org.apache.log4j.spi.LoggingEvent; import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; +import com.google.common.collect.ImmutableList; + /** * Test YarnRunner and make sure the client side plugin works * fine @@ -131,6 +144,53 @@ MRJobConfig.DEFAULT_TASK_PROFILE_PARAMS.substring(0, MRJobConfig.DEFAULT_TASK_PROFILE_PARAMS.lastIndexOf("%")); + private static class CustomResourceTypesConfigurationProvider + extends LocalConfigurationProvider { + + @Override + public InputStream getConfigurationInputStream(Configuration bootstrapConf, + String name) throws YarnException, IOException { + if (YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE.equals(name)) { + return new ByteArrayInputStream( + ("\n" + + " \n" + + " yarn.resource-types\n" + + " a-custom-resource\n" + + " \n" + + " \n" + + " yarn.resource-types.a-custom-resource.units\n" + + " G\n" + + " \n" + + "\n").getBytes()); + } else { + return super.getConfigurationInputStream(bootstrapConf, name); + } + } + } + + private static class TestAppender extends AppenderSkeleton { + + private final List logEvents = new CopyOnWriteArrayList<>(); + + @Override + public boolean requiresLayout() { + return false; + } + + @Override + public void close() { + } + + @Override + protected void append(LoggingEvent arg0) { + logEvents.add(arg0); + } + + private List getLogEvents() { + return logEvents; + } + } + private YARNRunner yarnRunner; private ResourceMgrDelegate resourceMgrDelegate; private YarnConfiguration conf; @@ -143,6 +203,11 @@ private ClientServiceDelegate clientDelegate; private static final String failString = "Rejected job"; + @BeforeClass + public static void setupBeforeClass() { + ResourceUtils.resetResourceTypes(new Configuration()); + } + @Before public void setUp() throws Exception { resourceMgrDelegate = mock(ResourceMgrDelegate.class); @@ -175,6 +240,7 @@ public ApplicationSubmissionContext answer(InvocationOnMock invocation) @After public void cleanup() { FileUtil.fullyDelete(testWorkDir); + ResourceUtils.resetResourceTypes(new Configuration()); } @Test(timeout=20000) @@ -884,4 +950,105 @@ public void testSendJobConf() throws IOException { .get("hadoop.tmp.dir").equals("testconfdir")); UserGroupInformation.reset(); } + + @Test + public void testCustomAMRMResourceType() throws Exception { + initResourceTypes(); + String customResourceName = "a-custom-resource"; + + JobConf jobConf = new JobConf(); + + jobConf.setInt(MRJobConfig.MR_AM_RESOURCE_PREFIX + + customResourceName, 5); + jobConf.setInt(MRJobConfig.MR_AM_CPU_VCORES, 3); + + yarnRunner = new YARNRunner(jobConf); + + submissionContext = buildSubmitContext(yarnRunner, jobConf); + + List resourceRequests = + submissionContext.getAMContainerResourceRequests(); + + Assert.assertEquals(1, resourceRequests.size()); + ResourceRequest resourceRequest = resourceRequests.get(0); + + ResourceInformation resourceInformation = resourceRequest.getCapability() + .getResourceInformation(customResourceName); + Assert.assertEquals("Expecting the default unit (G)", + "G", resourceInformation.getUnits()); + Assert.assertEquals(5L, resourceInformation.getValue()); + Assert.assertEquals(3, resourceRequest.getCapability().getVirtualCores()); + } + + @Test + public void testAMRMemoryRequest() throws Exception { + for (String memoryName : ImmutableList.of( + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY, + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY)) { + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.MR_AM_RESOURCE_PREFIX + memoryName, "3 Gi"); + + yarnRunner = new YARNRunner(jobConf); + + submissionContext = buildSubmitContext(yarnRunner, jobConf); + + List resourceRequests = + submissionContext.getAMContainerResourceRequests(); + + Assert.assertEquals(1, resourceRequests.size()); + ResourceRequest resourceRequest = resourceRequests.get(0); + + long memorySize = resourceRequest.getCapability().getMemorySize(); + Assert.assertEquals(3072, memorySize); + } + } + + @Test + public void testAMRMemoryRequestOverriding() throws Exception { + for (String memoryName : ImmutableList.of( + MRJobConfig.RESOURCE_TYPE_NAME_MEMORY, + MRJobConfig.RESOURCE_TYPE_ALTERNATIVE_NAME_MEMORY)) { + TestAppender testAppender = new TestAppender(); + Logger logger = Logger.getLogger(YARNRunner.class); + logger.addAppender(testAppender); + try { + JobConf jobConf = new JobConf(); + jobConf.set(MRJobConfig.MR_AM_RESOURCE_PREFIX + memoryName, "3 Gi"); + jobConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 2048); + + yarnRunner = new YARNRunner(jobConf); + + submissionContext = buildSubmitContext(yarnRunner, jobConf); + + List resourceRequests = + submissionContext.getAMContainerResourceRequests(); + + Assert.assertEquals(1, resourceRequests.size()); + ResourceRequest resourceRequest = resourceRequests.get(0); + + long memorySize = resourceRequest.getCapability().getMemorySize(); + Assert.assertEquals(3072, memorySize); + boolean foundLogWarning = false; + for (LoggingEvent e : testAppender.getLogEvents()) { + if (e.getLevel() == Level.WARN && ("Configuration " + + "yarn.app.mapreduce.am.resource." + memoryName + "=3Gi is " + + "overriding the yarn.app.mapreduce.am.resource.mb=2048 " + + "configuration").equals(e.getMessage())) { + foundLogWarning = true; + break; + } + } + assertTrue(foundLogWarning); + } finally { + logger.removeAppender(testAppender); + } + } + } + + private void initResourceTypes() { + Configuration configuration = new Configuration(); + configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, + CustomResourceTypesConfigurationProvider.class.getName()); + ResourceUtils.resetResourceTypes(configuration); + } } diff --git hadoop-project/src/site/site.xml hadoop-project/src/site/site.xml index 10cca658733..80bb2e1b69b 100644 --- hadoop-project/src/site/site.xml +++ hadoop-project/src/site/site.xml @@ -142,6 +142,7 @@ + diff --git hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index b3e04fca060..e6dcefb2099 100644 --- hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -153,6 +153,10 @@ + + + + @@ -611,4 +615,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java index b7d925a6592..8e76a11dc27 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java @@ -52,11 +52,13 @@ void init(ApplicationMasterServiceContext amsContext, * @param request Register Request. * @param response Register Response. * @throws IOException IOException. + * @throws YarnException in critical situation where invalid + * profiles/resources are added. */ - void registerApplicationMaster( - ApplicationAttemptId applicationAttemptId, + void registerApplicationMaster(ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException; + RegisterApplicationMasterResponse response) + throws IOException, YarnException; /** * Allocate call. diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java index 6d39366dccd..1f0a360b5b2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java @@ -65,6 +65,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -75,6 +77,7 @@ import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; /** *

The protocol between clients and the ResourceManager @@ -589,4 +592,18 @@ SignalContainerResponse signalToContainer( public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( UpdateApplicationTimeoutsRequest request) throws YarnException, IOException; + + /** + *

+ * The interface to get the details for a specific resource profile. + *

+ * @param request request to get the details of a resource profile + * @return Response containing the details for a particular resource profile + * @throws YarnException if any error happens inside YARN + * @throws IOException in case of other errors + */ + @Public + @Unstable + GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoRequest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoRequest.java new file mode 100644 index 00000000000..3bda4f54ec5 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoRequest.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.util.Records; + +/** + * Request class for getting all the resource profiles from the RM. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetAllResourceTypeInfoRequest { + + public static GetAllResourceTypeInfoRequest newInstance() { + return Records.newRecord(GetAllResourceTypeInfoRequest.class); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoResponse.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoResponse.java new file mode 100644 index 00000000000..b57b96df3fd --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoResponse.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.util.Records; + +import java.util.List; + +/** + * Response class for getting all the resource profiles from the RM. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetAllResourceTypeInfoResponse { + + public static GetAllResourceTypeInfoResponse newInstance() { + return Records.newRecord(GetAllResourceTypeInfoResponse.class); + } + + public abstract void setResourceTypeInfo(List resourceTypes); + + public abstract List getResourceTypeInfo(); + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || !(other instanceof GetAllResourceTypeInfoResponse)) { + return false; + } + return ((GetAllResourceTypeInfoResponse) other).getResourceTypeInfo() + .equals(this.getResourceTypeInfo()); + } + + @Override + public int hashCode() { + return this.getResourceTypeInfo().hashCode(); + } + +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ResourceTypes.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ResourceTypes.java new file mode 100644 index 00000000000..dbd9c37ceec --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ResourceTypes.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +/** + * Enum which represents the resource type. Currently, the only type allowed is + * COUNTABLE. + */ +public enum ResourceTypes { + COUNTABLE +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java index 3cf8f3defa3..d2e33ff9bca 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java @@ -24,6 +24,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.util.Records; +import java.util.Map; + /** * Contains various scheduling metrics to be reported by UI and CLI. */ @@ -35,9 +37,9 @@ @Unstable public static ApplicationResourceUsageReport newInstance( int numUsedContainers, int numReservedContainers, Resource usedResources, - Resource reservedResources, Resource neededResources, long memorySeconds, - long vcoreSeconds, float queueUsagePerc, float clusterUsagePerc, - long preemptedMemorySeconds, long preemptedVcoresSeconds) { + Resource reservedResources, Resource neededResources, + Map resourceSecondsMap, float queueUsagePerc, + float clusterUsagePerc, Map preemtedResourceSecondsMap) { ApplicationResourceUsageReport report = Records.newRecord(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -45,12 +47,10 @@ public static ApplicationResourceUsageReport newInstance( report.setUsedResources(usedResources); report.setReservedResources(reservedResources); report.setNeededResources(neededResources); - report.setMemorySeconds(memorySeconds); - report.setVcoreSeconds(vcoreSeconds); + report.setResourceSecondsMap(resourceSecondsMap); report.setQueueUsagePercentage(queueUsagePerc); report.setClusterUsagePercentage(clusterUsagePerc); - report.setPreemptedMemorySeconds(preemptedMemorySeconds); - report.setPreemptedVcoreSeconds(preemptedVcoresSeconds); + report.setPreemptedResourceSecondsMap(preemtedResourceSecondsMap); return report; } @@ -229,4 +229,47 @@ public static ApplicationResourceUsageReport newInstance( @Public @Unstable public abstract long getPreemptedVcoreSeconds(); + + /** + * Get the aggregated number of resources that the application has + * allocated times the number of seconds the application has been running. + * @return map containing the resource name and aggregated resource-seconds + */ + @Public + @Unstable + public abstract Map getResourceSecondsMap(); + + /** + * Set the aggregated number of resources that the application has + * allocated times the number of seconds the application has been running. + * @param resourceSecondsMap map containing the resource name and aggregated + * resource-seconds + */ + @Private + @Unstable + public abstract void setResourceSecondsMap( + Map resourceSecondsMap); + + + /** + * Get the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * @return map containing the resource name and aggregated preempted + * resource-seconds + */ + @Public + @Unstable + public abstract Map getPreemptedResourceSecondsMap(); + + /** + * Set the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * @param preemptedResourceSecondsMap map containing the resource name and + * aggregated preempted resource-seconds + */ + @Private + @Unstable + public abstract void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap); + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java index 89ca5d62f7e..f7c699f1a01 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java @@ -18,12 +18,23 @@ package org.apache.hadoop.yarn.api.records; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import com.google.common.collect.Lists; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; - +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.impl.LightWeightResource; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; /** *

Resource models a set of computer resources in the @@ -38,10 +49,10 @@ * the average number of threads it expects to have runnable at a time.

* *

Virtual cores take integer values and thus currently CPU-scheduling is - * very coarse. A complementary axis for CPU requests that represents processing - * power will likely be added in the future to enable finer-grained resource - * configuration.

- * + * very coarse. A complementary axis for CPU requests that represents + * processing power will likely be added in the future to enable finer-grained + * resource configuration.

+ * *

Typically, applications request Resource of suitable * capability to run their component tasks.

* @@ -52,64 +63,77 @@ @Stable public abstract class Resource implements Comparable { - private static class SimpleResource extends Resource { - private long memory; - private long vcores; - SimpleResource(long memory, long vcores) { - this.memory = memory; - this.vcores = vcores; - } - @Override - public int getMemory() { - return castToIntSafely(memory); - } - @Override - public void setMemory(int memory) { - this.memory = memory; - } - @Override - public long getMemorySize() { - return memory; - } - @Override - public void setMemorySize(long memory) { - this.memory = memory; - } - @Override - public int getVirtualCores() { - return castToIntSafely(vcores); - } - @Override - public void setVirtualCores(int vcores) { - this.vcores = vcores; - } - } + protected ResourceInformation[] resources = null; + + // Number of mandatory resources, this is added to avoid invoke + // MandatoryResources.values().length, since values() internally will + // copy array, etc. + protected static final int NUM_MANDATORY_RESOURCES = 2; + + protected static final int MEMORY_INDEX = 0; + protected static final int VCORES_INDEX = 1; @Public @Stable public static Resource newInstance(int memory, int vCores) { - return new SimpleResource(memory, vCores); + return new LightWeightResource(memory, vCores); } @Public @Stable public static Resource newInstance(long memory, int vCores) { - return new SimpleResource(memory, vCores); + return new LightWeightResource(memory, vCores); + } + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public static Resource newInstance(Resource resource) { + Resource ret; + int numberOfKnownResourceTypes = ResourceUtils + .getNumberOfKnownResourceTypes(); + if (numberOfKnownResourceTypes > 2) { + ret = new LightWeightResource(resource.getMemorySize(), + resource.getVirtualCores(), resource.getResources()); + } else { + ret = new LightWeightResource(resource.getMemorySize(), + resource.getVirtualCores()); + } + return ret; + } + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public static void copy(Resource source, Resource dest) { + for (ResourceInformation entry : source.getResources()) { + dest.setResourceInformation(entry.getName(), entry); + } } /** * This method is DEPRECATED: * Use {@link Resource#getMemorySize()} instead * - * Get memory of the resource. - * @return memory of the resource + * Get memory of the resource. Note - while memory has + * never had a unit specified, all YARN configurations have specified memory + * in MB. The assumption has been that the daemons and applications are always + * using the same units. With the introduction of the ResourceInformation + * class we have support for units - so this function will continue to return + * memory but in the units of MB + * + * @return memory(in MB) of the resource */ @Public @Deprecated public abstract int getMemory(); /** - * Get memory of the resource. + * Get memory of the resource. Note - while memory has + * never had a unit specified, all YARN configurations have specified memory + * in MB. The assumption has been that the daemons and applications are always + * using the same units. With the introduction of the ResourceInformation + * class we have support for units - so this function will continue to return + * memory but in the units of MB + * * @return memory of the resource */ @Public @@ -120,8 +144,14 @@ public long getMemorySize() { } /** - * Set memory of the resource. - * @param memory memory of the resource + * Set memory of the resource. Note - while memory has + * never had a unit specified, all YARN configurations have specified memory + * in MB. The assumption has been that the daemons and applications are always + * using the same units. With the introduction of the ResourceInformation + * class we have support for units - so this function will continue to set + * memory but the assumption is that the value passed is in units of MB. + * + * @param memory memory(in MB) of the resource */ @Public @Deprecated @@ -138,73 +168,316 @@ public void setMemorySize(long memory) { "This method is implemented by ResourcePBImpl"); } - /** * Get number of virtual cpu cores of the resource. * * Virtual cores are a unit for expressing CPU parallelism. A node's capacity - * should be configured with virtual cores equal to its number of physical cores. - * A container should be requested with the number of cores it can saturate, i.e. - * the average number of threads it expects to have runnable at a time. - * + * should be configured with virtual cores equal to its number of physical + * cores. A container should be requested with the number of cores it can + * saturate, i.e. the average number of threads it expects to have runnable + * at a time. + * * @return num of virtual cpu cores of the resource */ @Public @Evolving public abstract int getVirtualCores(); - + /** * Set number of virtual cpu cores of the resource. * * Virtual cores are a unit for expressing CPU parallelism. A node's capacity - * should be configured with virtual cores equal to its number of physical cores. - * A container should be requested with the number of cores it can saturate, i.e. - * the average number of threads it expects to have runnable at a time. - * + * should be configured with virtual cores equal to its number of physical + * cores. A container should be requested with the number of cores it can + * saturate, i.e. the average number of threads it expects to have runnable + * at a time. + * * @param vCores number of virtual cpu cores of the resource */ @Public @Evolving public abstract void setVirtualCores(int vCores); - @Override - public int hashCode() { - final int prime = 263167; + /** + * Get ResourceInformation for all resources. + * + * @return Map of resource name to ResourceInformation + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public ResourceInformation[] getResources() { + return resources; + } + + /** + * Get list of resource information, this will be used by JAXB. + * @return list of resources copy. + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public List getAllResourcesListCopy() { + List list = new ArrayList<>(); + for (ResourceInformation i : resources) { + ResourceInformation ri = new ResourceInformation(); + ResourceInformation.copy(i, ri); + list.add(ri); + } + return list; + } + + /** + * Get ResourceInformation for a specified resource. + * + * @param resource name of the resource + * @return the ResourceInformation object for the resource + * @throws ResourceNotFoundException if the resource can't be found + */ + @Public + @InterfaceStability.Unstable + public ResourceInformation getResourceInformation(String resource) + throws ResourceNotFoundException { + Integer index = ResourceUtils.getResourceTypeIndex().get(resource); + if (index != null) { + return resources[index]; + } + throw new ResourceNotFoundException("Unknown resource '" + resource + + "'. Known resources are " + Arrays.toString(resources)); + } + + /** + * Get ResourceInformation for a specified resource from a given index. + * + * @param index + * of the resource + * @return the ResourceInformation object for the resource + * @throws ResourceNotFoundException + * if the resource can't be found + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public ResourceInformation getResourceInformation(int index) + throws ResourceNotFoundException { + ResourceInformation ri = null; + try { + ri = resources[index]; + } catch (ArrayIndexOutOfBoundsException e) { + throwExceptionWhenArrayOutOfBound(index); + } + return ri; + } + + /** + * Get the value for a specified resource. No information about the units is + * returned. + * + * @param resource name of the resource + * @return the value for the resource + * @throws ResourceNotFoundException if the resource can't be found + */ + @Public + @InterfaceStability.Unstable + public long getResourceValue(String resource) + throws ResourceNotFoundException { + return getResourceInformation(resource).getValue(); + } + + /** + * Set the ResourceInformation object for a particular resource. + * + * @param resource the resource for which the ResourceInformation is provided + * @param resourceInformation ResourceInformation object + * @throws ResourceNotFoundException if the resource is not found + */ + @Public + @InterfaceStability.Unstable + public void setResourceInformation(String resource, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + if (resource.equals(ResourceInformation.MEMORY_URI)) { + this.setMemorySize(resourceInformation.getValue()); + return; + } + if (resource.equals(ResourceInformation.VCORES_URI)) { + this.setVirtualCores(castToIntSafely(resourceInformation.getValue())); + return; + } + ResourceInformation storedResourceInfo = getResourceInformation(resource); + ResourceInformation.copy(resourceInformation, storedResourceInfo); + } + + /** + * Set the ResourceInformation object for a particular resource. + * + * @param index + * the resource index for which the ResourceInformation is provided + * @param resourceInformation + * ResourceInformation object + * @throws ResourceNotFoundException + * if the resource is not found + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public void setResourceInformation(int index, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + if (index < 0 || index >= resources.length) { + throw new ResourceNotFoundException("Unknown resource at index '" + index + + "'. Valid resources are " + Arrays.toString(resources)); + } + ResourceInformation.copy(resourceInformation, resources[index]); + } + + /** + * Set the value of a resource in the ResourceInformation object. The unit of + * the value is assumed to be the one in the ResourceInformation object. + * + * @param resource the resource for which the value is provided. + * @param value the value to set + * @throws ResourceNotFoundException if the resource is not found + */ + @Public + @InterfaceStability.Unstable + public void setResourceValue(String resource, long value) + throws ResourceNotFoundException { + if (resource.equals(ResourceInformation.MEMORY_URI)) { + this.setMemorySize(value); + return; + } + if (resource.equals(ResourceInformation.VCORES_URI)) { + this.setVirtualCores(castToIntSafely(value)); + return; + } + + ResourceInformation storedResourceInfo = getResourceInformation(resource); + storedResourceInfo.setValue(value); + } + + /** + * Set the value of a resource in the ResourceInformation object. The unit of + * the value is assumed to be the one in the ResourceInformation object. + * + * @param index + * the resource index for which the value is provided. + * @param value + * the value to set + * @throws ResourceNotFoundException + * if the resource is not found + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public void setResourceValue(int index, long value) + throws ResourceNotFoundException { + try { + resources[index].setValue(value); + } catch (ArrayIndexOutOfBoundsException e) { + throwExceptionWhenArrayOutOfBound(index); + } + } - int result = (int) (939769357 - + getMemorySize()); // prime * result = 939769357 initially - result = prime * result + getVirtualCores(); - return result; + private void throwExceptionWhenArrayOutOfBound(int index) { + String exceptionMsg = String.format( + "Trying to access ResourceInformation for given index=%d. " + + "Acceptable index range is [0,%d), please check double check " + + "configured resources in resource-types.xml", + index, ResourceUtils.getNumberOfKnownResourceTypes()); + + throw new ResourceNotFoundException(exceptionMsg); } @Override public boolean equals(Object obj) { - if (this == obj) + if (this == obj) { return true; - if (obj == null) + } + if (obj == null) { return false; - if (!(obj instanceof Resource)) + } + if (!(obj instanceof Resource)) { return false; + } Resource other = (Resource) obj; - if (getMemorySize() != other.getMemorySize() || - getVirtualCores() != other.getVirtualCores()) { + + ResourceInformation[] otherVectors = other.getResources(); + + if (resources.length != otherVectors.length) { return false; } + + for (int i = 0; i < resources.length; i++) { + ResourceInformation a = resources[i]; + ResourceInformation b = otherVectors[i]; + if ((a != b) && ((a == null) || !a.equals(b))) { + return false; + } + } return true; } @Override public int compareTo(Resource other) { - long diff = this.getMemorySize() - other.getMemorySize(); - if (diff == 0) { - diff = this.getVirtualCores() - other.getVirtualCores(); + ResourceInformation[] otherResources = other.getResources(); + + int arrLenThis = this.resources.length; + int arrLenOther = otherResources.length; + + // compare memory and vcores first(in that order) to preserve + // existing behavior. + for (int i = 0; i < arrLenThis; i++) { + ResourceInformation otherEntry; + try { + otherEntry = otherResources[i]; + } catch (ArrayIndexOutOfBoundsException e) { + // For two vectors with different size and same prefix. Shorter vector + // goes first. + return 1; + } + ResourceInformation entry = resources[i]; + + long diff = entry.compareTo(otherEntry); + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } + } + + if (arrLenThis < arrLenOther) { + return -1; } - return diff == 0 ? 0 : (diff > 0 ? 1 : -1); + + return 0; } @Override public String toString() { - return ""; + StringBuilder sb = new StringBuilder(); + + sb.append(""); + return sb.toString(); + } + + @Override + public int hashCode() { + final int prime = 47; + long result = 0; + for (ResourceInformation entry : resources) { + result = prime * result + entry.hashCode(); + } + return (int) result; } /** @@ -220,4 +493,23 @@ protected static int castToIntSafely(long value) { } return Long.valueOf(value).intValue(); } + + /** + * Create ResourceInformation with basic fields. + * @param name Resource Type Name + * @param unit Default unit of provided resource type + * @param value Value associated with giveb resource + * @return ResourceInformation object + */ + protected static ResourceInformation newDefaultInformation(String name, + String unit, long value) { + ResourceInformation ri = new ResourceInformation(); + ri.setName(name); + ri.setValue(value); + ri.setResourceType(ResourceTypes.COUNTABLE); + ri.setUnitsWithoutValidation(unit); + ri.setMinimumAllocation(0); + ri.setMaximumAllocation(Long.MAX_VALUE); + return ri; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java new file mode 100644 index 00000000000..08746a9c96b --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java @@ -0,0 +1,306 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; + +import java.util.Map; + +/** + * Class to encapsulate information about a Resource - the name of the resource, + * the units(milli, micro, etc), the type(countable), and the value. + */ +public class ResourceInformation implements Comparable { + + private String name; + private String units; + private ResourceTypes resourceType; + private long value; + private long minimumAllocation; + private long maximumAllocation; + + // Known resource types + public static final String MEMORY_URI = "memory-mb"; + public static final String VCORES_URI = "vcores"; + public static final String GPU_URI = "yarn.io/gpu"; + + public static final ResourceInformation MEMORY_MB = + ResourceInformation.newInstance(MEMORY_URI, "Mi"); + public static final ResourceInformation VCORES = + ResourceInformation.newInstance(VCORES_URI); + public static final ResourceInformation GPUS = + ResourceInformation.newInstance(GPU_URI); + + public static final Map MANDATORY_RESOURCES = + ImmutableMap.of(MEMORY_URI, MEMORY_MB, VCORES_URI, VCORES, GPU_URI, GPUS); + + /** + * Get the name for the resource. + * + * @return resource name + */ + public String getName() { + return name; + } + + /** + * Set the name for the resource. + * + * @param rName name for the resource + */ + public void setName(String rName) { + this.name = rName; + } + + /** + * Get units for the resource. + * + * @return units for the resource + */ + public String getUnits() { + return units; + } + + /** + * Set the units for the resource. + * + * @param rUnits units for the resource + */ + public void setUnits(String rUnits) { + if (!UnitsConversionUtil.KNOWN_UNITS.contains(rUnits)) { + throw new IllegalArgumentException( + "Unknown unit '" + rUnits + "'. Known units are " + + UnitsConversionUtil.KNOWN_UNITS); + } + this.units = rUnits; + } + + /** + * Checking if a unit included by KNOWN_UNITS is an expensive operation. This + * can be avoided in critical path in RM. + * @param rUnits units for the resource + */ + @InterfaceAudience.Private + public void setUnitsWithoutValidation(String rUnits) { + this.units = rUnits; + } + + /** + * Get the resource type. + * + * @return the resource type + */ + public ResourceTypes getResourceType() { + return resourceType; + } + + /** + * Set the resource type. + * + * @param type the resource type + */ + public void setResourceType(ResourceTypes type) { + this.resourceType = type; + } + + /** + * Get the value for the resource. + * + * @return the resource value + */ + public long getValue() { + return value; + } + + /** + * Set the value for the resource. + * + * @param rValue the resource value + */ + public void setValue(long rValue) { + this.value = rValue; + } + + /** + * Get the minimum allocation for the resource. + * + * @return the minimum allocation for the resource + */ + public long getMinimumAllocation() { + return minimumAllocation; + } + + /** + * Set the minimum allocation for the resource. + * + * @param minimumAllocation the minimum allocation for the resource + */ + public void setMinimumAllocation(long minimumAllocation) { + this.minimumAllocation = minimumAllocation; + } + + /** + * Get the maximum allocation for the resource. + * + * @return the maximum allocation for the resource + */ + public long getMaximumAllocation() { + return maximumAllocation; + } + + /** + * Set the maximum allocation for the resource. + * + * @param maximumAllocation the maximum allocation for the resource + */ + public void setMaximumAllocation(long maximumAllocation) { + this.maximumAllocation = maximumAllocation; + } + + /** + * Create a new instance of ResourceInformation from another object. + * + * @param other the object from which the new object should be created + * @return the new ResourceInformation object + */ + public static ResourceInformation newInstance(ResourceInformation other) { + ResourceInformation ret = new ResourceInformation(); + copy(other, ret); + return ret; + } + + public static ResourceInformation newInstance(String name, String units, + long value, ResourceTypes type, long minimumAllocation, + long maximumAllocation) { + ResourceInformation ret = new ResourceInformation(); + ret.setName(name); + ret.setResourceType(type); + ret.setUnits(units); + ret.setValue(value); + ret.setMinimumAllocation(minimumAllocation); + ret.setMaximumAllocation(maximumAllocation); + return ret; + } + + public static ResourceInformation newInstance(String name, String units, + long value) { + return ResourceInformation + .newInstance(name, units, value, ResourceTypes.COUNTABLE, 0L, + Long.MAX_VALUE); + } + + public static ResourceInformation newInstance(String name, String units) { + return ResourceInformation + .newInstance(name, units, 0L, ResourceTypes.COUNTABLE, 0L, + Long.MAX_VALUE); + } + + public static ResourceInformation newInstance(String name, String units, + long minRes, long maxRes) { + return ResourceInformation.newInstance(name, units, 0L, + ResourceTypes.COUNTABLE, minRes, maxRes); + } + + public static ResourceInformation newInstance(String name, long value) { + return ResourceInformation + .newInstance(name, "", value, ResourceTypes.COUNTABLE, 0L, + Long.MAX_VALUE); + } + + public static ResourceInformation newInstance(String name) { + return ResourceInformation.newInstance(name, ""); + } + + /** + * Copies the content of the source ResourceInformation object to the + * destination object, overwriting all properties of the destination object. + * @param src Source ResourceInformation object + * @param dst Destination ResourceInformation object + */ + + public static void copy(ResourceInformation src, ResourceInformation dst) { + dst.setName(src.getName()); + dst.setResourceType(src.getResourceType()); + dst.setUnits(src.getUnits()); + dst.setValue(src.getValue()); + dst.setMinimumAllocation(src.getMinimumAllocation()); + dst.setMaximumAllocation(src.getMaximumAllocation()); + } + + @Override + public String toString() { + return "name: " + this.name + ", units: " + this.units + ", type: " + + resourceType + ", value: " + value + ", minimum allocation: " + + minimumAllocation + ", maximum allocation: " + maximumAllocation; + } + + public String getShorthandRepresentation() { + return "" + this.value + this.units; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof ResourceInformation)) { + return false; + } + ResourceInformation r = (ResourceInformation) obj; + if (!this.name.equals(r.getName()) + || !this.resourceType.equals(r.getResourceType())) { + return false; + } + if (this.units.equals(r.units)) { + return this.value == r.value; + } + return (UnitsConversionUtil.compare(this.units, this.value, r.units, + r.value) == 0); + } + + @Override + public int hashCode() { + final int prime = 263167; + int result = + 939769357 + name.hashCode(); // prime * result = 939769357 initially + result = prime * result + resourceType.hashCode(); + result = prime * result + units.hashCode(); + result = prime * result + Long.valueOf(value).hashCode(); + return result; + } + + @Override + public int compareTo(ResourceInformation other) { + int diff = this.name.compareTo(other.name); + if (diff == 0) { + diff = UnitsConversionUtil + .compare(this.units, this.value, other.units, other.value); + if (diff == 0) { + diff = this.resourceType.compareTo(other.resourceType); + } + } + return diff; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java index 94eda7c4826..e1a98ae8045 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceTypeInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceTypeInfo.java new file mode 100644 index 00000000000..b6f7f147658 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceTypeInfo.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.util.Records; + +/** + * Class to encapsulate information about a ResourceType - the name of the + * resource, the units(milli, micro, etc), the type(countable). + */ +public abstract class ResourceTypeInfo implements Comparable { + + /** + * Get the name for the resource. + * + * @return resource name + */ + public abstract String getName(); + + /** + * Set the name for the resource. + * + * @param rName + * name for the resource + */ + public abstract void setName(String rName); + + /** + * Get units for the resource. + * + * @return units for the resource + */ + public abstract String getDefaultUnit(); + + /** + * Set the units for the resource. + * + * @param rUnits + * units for the resource + */ + public abstract void setDefaultUnit(String rUnits); + + /** + * Get the resource type. + * + * @return the resource type + */ + public abstract ResourceTypes getResourceType(); + + /** + * Set the resource type. + * + * @param type + * the resource type + */ + public abstract void setResourceType(ResourceTypes type); + + /** + * Create a new instance of ResourceTypeInfo from another object. + * + * @param other + * the object from which the new object should be created + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(ResourceTypeInfo other) { + ResourceTypeInfo resourceType = Records.newRecord(ResourceTypeInfo.class); + copy(other, resourceType); + return resourceType; + } + + /** + * Create a new instance of ResourceTypeInfo from name, units and type. + * + * @param name name of resource type + * @param units units of resource type + * @param type such as countable, etc. + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(String name, String units, + ResourceTypes type) { + ResourceTypeInfo resourceType = Records.newRecord(ResourceTypeInfo.class); + resourceType.setName(name); + resourceType.setResourceType(type); + resourceType.setDefaultUnit(units); + return resourceType; + } + + /** + * Create a new instance of ResourceTypeInfo from name, units. + * + * @param name name of resource type + * @param units units of resource type + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(String name, String units) { + return ResourceTypeInfo.newInstance(name, units, ResourceTypes.COUNTABLE); + } + + /** + * Create a new instance of ResourceTypeInfo from name. + * + * @param name name of resource type + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(String name) { + return ResourceTypeInfo.newInstance(name, ""); + } + + /** + * Copies the content of the source ResourceTypeInfo object to the + * destination object, overwriting all properties of the destination object. + * + * @param src + * Source ResourceTypeInfo object + * @param dst + * Destination ResourceTypeInfo object + */ + + public static void copy(ResourceTypeInfo src, ResourceTypeInfo dst) { + dst.setName(src.getName()); + dst.setResourceType(src.getResourceType()); + dst.setDefaultUnit(src.getDefaultUnit()); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(""); + return sb.toString(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof ResourceTypeInfo)) { + return false; + } + ResourceTypeInfo r = (ResourceTypeInfo) obj; + return this.getName().equals(r.getName()) + && this.getResourceType().equals(r.getResourceType()) + && this.getDefaultUnit().equals(r.getDefaultUnit()); + } + + @Override + public int hashCode() { + final int prime = 47; + int result = prime + getName().hashCode(); + result = prime * result + getResourceType().hashCode(); + return result; + } + + @Override + public int compareTo(ResourceTypeInfo other) { + int diff = this.getName().compareTo(other.getName()); + if (diff == 0) { + diff = this.getDefaultUnit().compareTo(other.getDefaultUnit()); + if (diff == 0) { + diff = this.getResourceType().compareTo(other.getResourceType()); + } + } + return diff; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java new file mode 100644 index 00000000000..34efb55c8c4 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import static org.apache.hadoop.yarn.api.records.ResourceInformation.*; + +/** + *

+ * LightWeightResource extends Resource to handle base resources such + * as memory and CPU. + * TODO: We have a long term plan to use AbstractResource when additional + * resource types are to be handled as well. + * This will be used to speed up internal calculation to avoid creating + * costly PB-backed Resource object: ResourcePBImpl + *

+ * + *

+ * Currently it models both memory and CPU. + *

+ * + *

+ * The unit for memory is megabytes. CPU is modeled with virtual cores (vcores), + * a unit for expressing parallelism. A node's capacity should be configured + * with virtual cores equal to its number of physical cores. A container should + * be requested with the number of cores it can saturate, i.e. the average + * number of threads it expects to have runnable at a time. + *

+ * + *

+ * Virtual cores take integer values and thus currently CPU-scheduling is very + * coarse. A complementary axis for CPU requests that represents processing + * power will likely be added in the future to enable finer-grained resource + * configuration. + *

+ * + * @see Resource + */ +@InterfaceAudience.Private +@Unstable +public class LightWeightResource extends Resource { + + private ResourceInformation memoryResInfo; + private ResourceInformation vcoresResInfo; + + public LightWeightResource(long memory, int vcores) { + int numberOfKnownResourceTypes = ResourceUtils + .getNumberOfKnownResourceTypes(); + initResourceInformations(memory, vcores, numberOfKnownResourceTypes); + + if (numberOfKnownResourceTypes > 2) { + ResourceInformation[] types = ResourceUtils.getResourceTypesArray(); + for (int i = 2; i < numberOfKnownResourceTypes; i++) { + resources[i] = new ResourceInformation(); + ResourceInformation.copy(types[i], resources[i]); + } + } + } + + public LightWeightResource(long memory, int vcores, + ResourceInformation[] source) { + int numberOfKnownResourceTypes = ResourceUtils + .getNumberOfKnownResourceTypes(); + initResourceInformations(memory, vcores, numberOfKnownResourceTypes); + + for (int i = 2; i < numberOfKnownResourceTypes; i++) { + resources[i] = new ResourceInformation(); + ResourceInformation.copy(source[i], resources[i]); + } + } + + private void initResourceInformations(long memory, int vcores, + int numberOfKnownResourceTypes) { + this.memoryResInfo = newDefaultInformation(MEMORY_URI, MEMORY_MB.getUnits(), + memory); + this.vcoresResInfo = newDefaultInformation(VCORES_URI, VCORES.getUnits(), + vcores); + + resources = new ResourceInformation[numberOfKnownResourceTypes]; + resources[MEMORY_INDEX] = memoryResInfo; + resources[VCORES_INDEX] = vcoresResInfo; + } + + @Override + @SuppressWarnings("deprecation") + public int getMemory() { + return castToIntSafely(memoryResInfo.getValue()); + } + + @Override + @SuppressWarnings("deprecation") + public void setMemory(int memory) { + this.memoryResInfo.setValue(memory); + } + + @Override + public long getMemorySize() { + return memoryResInfo.getValue(); + } + + @Override + public void setMemorySize(long memory) { + this.memoryResInfo.setValue(memory); + } + + @Override + public int getVirtualCores() { + return castToIntSafely(vcoresResInfo.getValue()); + } + + @Override + public void setVirtualCores(int vcores) { + this.vcoresResInfo.setValue(vcores); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || !(obj instanceof Resource)) { + return false; + } + Resource other = (Resource) obj; + if (getMemorySize() != other.getMemorySize() + || getVirtualCores() != other.getVirtualCores()) { + return false; + } + + if (resources.length > 2) { + ResourceInformation[] otherVectors = other.getResources(); + + if (resources.length != otherVectors.length) { + return false; + } + + for (int i = 2; i < resources.length; i++) { + ResourceInformation a = resources[i]; + ResourceInformation b = otherVectors[i]; + if ((a != b) && ((a == null) || !a.equals(b))) { + return false; + } + } + } + + return true; + } + + @Override + public int compareTo(Resource other) { + // compare memory and vcores first(in that order) to preserve + // existing behavior. + if (resources.length <= 2) { + long diff = this.getMemorySize() - other.getMemorySize(); + if (diff == 0) { + return this.getVirtualCores() - other.getVirtualCores(); + } else if (diff > 0) { + return 1; + } else { + return -1; + } + } + + return super.compareTo(other); + } + + @Override + public int hashCode() { + final int prime = 47; + long result = prime + getMemorySize(); + result = prime * result + getVirtualCores(); + + return (int) result; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/package-info.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/package-info.java new file mode 100644 index 00000000000..b2420bc50f8 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package org.apache.hadoop.yarn.api.records.impl contains classes + * which define basic resources. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +package org.apache.hadoop.yarn.api.records.impl; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 668f9940600..ce1b8932995 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -62,9 +62,28 @@ @Private public static final String CORE_SITE_CONFIGURATION_FILE = "core-site.xml"; + @Private + public static final String RESOURCE_TYPES_CONFIGURATION_FILE = + "resource-types.xml"; + + @Private + public static final String NODE_RESOURCES_CONFIGURATION_FILE = + "node-resources.xml"; + @Private public static final List RM_CONFIGURATION_FILES = Collections.unmodifiableList(Arrays.asList( + RESOURCE_TYPES_CONFIGURATION_FILE, + DR_CONFIGURATION_FILE, + CS_CONFIGURATION_FILE, + HADOOP_POLICY_CONFIGURATION_FILE, + YARN_SITE_CONFIGURATION_FILE, + CORE_SITE_CONFIGURATION_FILE)); + + @Private + public static final List NM_CONFIGURATION_FILES = + Collections.unmodifiableList(Arrays.asList( + NODE_RESOURCES_CONFIGURATION_FILE, DR_CONFIGURATION_FILE, CS_CONFIGURATION_FILE, HADOOP_POLICY_CONFIGURATION_FILE, @@ -108,6 +127,16 @@ private static void addDeprecatedKeys() { public static final String YARN_PREFIX = "yarn."; + ///////////////////////////// + // Resource types configs + //////////////////////////// + + public static final String RESOURCE_TYPES = + YarnConfiguration.YARN_PREFIX + "resource-types"; + + public static final String NM_RESOURCES_PREFIX = + YarnConfiguration.NM_PREFIX + "resource-type."; + /** Delay before deleting resource to ease debugging of NM issues */ public static final String DEBUG_NM_DELETE_DELAY_SEC = YarnConfiguration.NM_PREFIX + "delete.debug-delay-sec"; @@ -1382,6 +1411,39 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT = NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit"; + /** + * Prefix for computation resources, example of computation resources like + * GPU / FPGA / TPU, etc. + */ + @Private + public static final String NM_RESOURCE_PLUGINS = + NM_PREFIX + "resource-plugins"; + + /** + * Prefix for gpu configurations. Work in progress: This configuration + * parameter may be changed/removed in the future. + */ + @Private + public static final String NM_GPU_RESOURCE_PREFIX = + NM_RESOURCE_PLUGINS + ".gpu."; + + @Private + public static final String NM_GPU_ALLOWED_DEVICES = + NM_GPU_RESOURCE_PREFIX + "allowed-gpu-devices"; + @Private + public static final String AUTOMATICALLY_DISCOVER_GPU_DEVICES = "auto"; + + /** + * This setting controls where to how to invoke GPU binaries + */ + @Private + public static final String NM_GPU_PATH_TO_EXEC = + NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables"; + + @Private + public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = ""; + + /** NM Webapp address.**/ public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address"; public static final int DEFAULT_NM_WEBAPP_PORT = 8042; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java new file mode 100644 index 00000000000..b5fece7dc8c --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This exception is thrown when details of an unknown resource type + * are requested. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class ResourceNotFoundException extends YarnRuntimeException { + + private static final long serialVersionUID = 10081982L; + + public ResourceNotFoundException(String message) { + super(message); + } + + public ResourceNotFoundException(Throwable cause) { + super(cause); + } + + public ResourceNotFoundException(String message, Throwable cause) { + super(message, cause); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/YARNFeatureNotEnabledException.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/YARNFeatureNotEnabledException.java new file mode 100644 index 00000000000..62340fea363 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/YARNFeatureNotEnabledException.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This exception is thrown when a feature is being used which is not enabled + * yet. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class YARNFeatureNotEnabledException extends YarnException { + private static final long serialVersionUID = 898023752676L; + + public YARNFeatureNotEnabledException(Throwable cause) { + super(cause); + } + + public YARNFeatureNotEnabledException(String message) { + super(message); + } + + public YARNFeatureNotEnabledException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/UnitsConversionUtil.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/UnitsConversionUtil.java new file mode 100644 index 00000000000..7a212e163d9 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/UnitsConversionUtil.java @@ -0,0 +1,221 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import java.math.BigInteger; +import java.util.*; + +/** + * A util to convert values in one unit to another. Units refers to whether + * the value is expressed in pico, nano, etc. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class UnitsConversionUtil { + + /** + * Helper class for encapsulating conversion values. + */ + public static class Converter { + private long numerator; + private long denominator; + + Converter(long n, long d) { + this.numerator = n; + this.denominator = d; + } + } + + private static final String[] UNITS = {"p", "n", "u", "m", "", "k", "M", "G", + "T", "P", "Ki", "Mi", "Gi", "Ti", "Pi"}; + private static final List SORTED_UNITS = Arrays.asList(UNITS); + public static final Set KNOWN_UNITS = createKnownUnitsSet(); + private static final Converter PICO = + new Converter(1L, 1000L * 1000L * 1000L * 1000L); + private static final Converter NANO = + new Converter(1L, 1000L * 1000L * 1000L); + private static final Converter MICRO = new Converter(1L, 1000L * 1000L); + private static final Converter MILLI = new Converter(1L, 1000L); + private static final Converter BASE = new Converter(1L, 1L); + private static final Converter KILO = new Converter(1000L, 1L); + private static final Converter MEGA = new Converter(1000L * 1000L, 1L); + private static final Converter GIGA = + new Converter(1000L * 1000L * 1000L, 1L); + private static final Converter TERA = + new Converter(1000L * 1000L * 1000L * 1000L, 1L); + private static final Converter PETA = + new Converter(1000L * 1000L * 1000L * 1000L * 1000L, 1L); + + private static final Converter KILO_BINARY = new Converter(1024L, 1L); + private static final Converter MEGA_BINARY = new Converter(1024L * 1024L, 1L); + private static final Converter GIGA_BINARY = + new Converter(1024L * 1024L * 1024L, 1L); + private static final Converter TERA_BINARY = + new Converter(1024L * 1024L * 1024L * 1024L, 1L); + private static final Converter PETA_BINARY = + new Converter(1024L * 1024L * 1024L * 1024L * 1024L, 1L); + + private static Set createKnownUnitsSet() { + Set ret = new HashSet<>(); + ret.addAll(Arrays.asList(UNITS)); + return ret; + } + + private static Converter getConverter(String unit) { + switch (unit) { + case "p": + return PICO; + case "n": + return NANO; + case "u": + return MICRO; + case "m": + return MILLI; + case "": + return BASE; + case "k": + return KILO; + case "M": + return MEGA; + case "G": + return GIGA; + case "T": + return TERA; + case "P": + return PETA; + case "Ki": + return KILO_BINARY; + case "Mi": + return MEGA_BINARY; + case "Gi": + return GIGA_BINARY; + case "Ti": + return TERA_BINARY; + case "Pi": + return PETA_BINARY; + default: + throw new IllegalArgumentException( + "Unknown unit '" + unit + "'. Known units are " + KNOWN_UNITS); + } + } + + /** + * Converts a value from one unit to another. Supported units can be obtained + * by inspecting the KNOWN_UNITS set. + * + * @param fromUnit the unit of the from value + * @param toUnit the target unit + * @param fromValue the value you wish to convert + * @return the value in toUnit + */ + public static long convert(String fromUnit, String toUnit, long fromValue) { + if (toUnit == null || fromUnit == null) { + throw new IllegalArgumentException("One or more arguments are null"); + } + + if (fromUnit.equals(toUnit)) { + return fromValue; + } + Converter fc = getConverter(fromUnit); + Converter tc = getConverter(toUnit); + long numerator = fc.numerator * tc.denominator; + long denominator = fc.denominator * tc.numerator; + long numeratorMultiplierLimit = Long.MAX_VALUE / numerator; + if (numerator < denominator) { + if (numeratorMultiplierLimit < fromValue) { + String overflowMsg = + "Converting " + fromValue + " from '" + fromUnit + "' to '" + toUnit + + "' will result in an overflow of Long"; + throw new IllegalArgumentException(overflowMsg); + } + return (fromValue * numerator) / denominator; + } + if (numeratorMultiplierLimit > fromValue) { + return (numerator * fromValue) / denominator; + } + long tmp = numerator / denominator; + if ((Long.MAX_VALUE / tmp) < fromValue) { + String overflowMsg = + "Converting " + fromValue + " from '" + fromUnit + "' to '" + toUnit + + "' will result in an overflow of Long"; + throw new IllegalArgumentException(overflowMsg); + } + return fromValue * tmp; + } + + /** + * Compare a value in a given unit with a value in another unit. The return + * value is equivalent to the value returned by compareTo. + * + * @param unitA first unit + * @param valueA first value + * @param unitB second unit + * @param valueB second value + * @return +1, 0 or -1 depending on whether the relationship is greater than, + * equal to or lesser than + */ + public static int compare(String unitA, long valueA, String unitB, + long valueB) { + if (unitA == null || unitB == null || !KNOWN_UNITS.contains(unitA) + || !KNOWN_UNITS.contains(unitB)) { + throw new IllegalArgumentException("Units cannot be null"); + } + if (!KNOWN_UNITS.contains(unitA)) { + throw new IllegalArgumentException("Unknown unit '" + unitA + "'"); + } + if (!KNOWN_UNITS.contains(unitB)) { + throw new IllegalArgumentException("Unknown unit '" + unitB + "'"); + } + if (unitA.equals(unitB)) { + return Long.compare(valueA, valueB); + } + Converter unitAC = getConverter(unitA); + Converter unitBC = getConverter(unitB); + int unitAPos = SORTED_UNITS.indexOf(unitA); + int unitBPos = SORTED_UNITS.indexOf(unitB); + try { + long tmpA = valueA; + long tmpB = valueB; + if (unitAPos < unitBPos) { + tmpB = convert(unitB, unitA, valueB); + } else { + tmpA = convert(unitA, unitB, valueA); + } + return Long.compare(tmpA, tmpB); + } catch (IllegalArgumentException ie) { + BigInteger tmpA = BigInteger.valueOf(valueA); + BigInteger tmpB = BigInteger.valueOf(valueB); + if (unitAPos < unitBPos) { + tmpB = tmpB.multiply(BigInteger.valueOf(unitBC.numerator)); + tmpB = tmpB.multiply(BigInteger.valueOf(unitAC.denominator)); + tmpB = tmpB.divide(BigInteger.valueOf(unitBC.denominator)); + tmpB = tmpB.divide(BigInteger.valueOf(unitAC.numerator)); + } else { + tmpA = tmpA.multiply(BigInteger.valueOf(unitAC.numerator)); + tmpA = tmpA.multiply(BigInteger.valueOf(unitBC.denominator)); + tmpA = tmpA.divide(BigInteger.valueOf(unitAC.denominator)); + tmpA = tmpA.divide(BigInteger.valueOf(unitBC.numerator)); + } + return tmpA.compareTo(tmpB); + } + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java new file mode 100644 index 00000000000..a359ad197b7 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java @@ -0,0 +1,621 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util.resource; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.conf.ConfigurationProvider; +import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI; + +/** + * Helper class to read the resource-types to be supported by the system. + */ +public class ResourceUtils { + + public static final String UNITS = ".units"; + public static final String TYPE = ".type"; + public static final String MINIMUM_ALLOCATION = ".minimum-allocation"; + public static final String MAXIMUM_ALLOCATION = ".maximum-allocation"; + + private static final String MEMORY = ResourceInformation.MEMORY_MB.getName(); + private static final String VCORES = ResourceInformation.VCORES.getName(); + private static final Pattern RESOURCE_REQUEST_VALUE_PATTERN = + Pattern.compile("^([0-9]+) ?([a-zA-Z]*)$"); + + private static volatile boolean initializedResources = false; + private static final Map RESOURCE_NAME_TO_INDEX = + new ConcurrentHashMap(); + private static volatile Map resourceTypes; + private static volatile ResourceInformation[] resourceTypesArray; + private static volatile boolean initializedNodeResources = false; + private static volatile Map readOnlyNodeResources; + private static volatile int numKnownResourceTypes = -1; + + static final Log LOG = LogFactory.getLog(ResourceUtils.class); + + private ResourceUtils() { + } + + private static void checkMandatoryResources( + Map resourceInformationMap) + throws YarnRuntimeException { + /* + * Supporting 'memory' also as invalid resource name, in addition to + * 'MEMORY' for historical reasons + */ + String key = "memory"; + if (resourceInformationMap.containsKey(key)) { + LOG.warn( + "Attempt to define resource '" + key + "', but it is not allowed."); + throw new YarnRuntimeException( + "Attempt to re-define mandatory resource '" + key + "'."); + } + + for (Map.Entry mandatoryResourceEntry : + ResourceInformation.MANDATORY_RESOURCES.entrySet()) { + key = mandatoryResourceEntry.getKey(); + ResourceInformation mandatoryRI = mandatoryResourceEntry.getValue(); + + ResourceInformation newDefinedRI = resourceInformationMap.get(key); + if (newDefinedRI != null) { + String expectedUnit = mandatoryRI.getUnits(); + ResourceTypes expectedType = mandatoryRI.getResourceType(); + String actualUnit = newDefinedRI.getUnits(); + ResourceTypes actualType = newDefinedRI.getResourceType(); + + if (!expectedUnit.equals(actualUnit) || !expectedType.equals( + actualType)) { + throw new YarnRuntimeException("Defined mandatory resource type=" + + key + " inside resource-types.xml, however its type or " + + "unit is conflict to mandatory resource types, expected type=" + + expectedType + ", unit=" + expectedUnit + "; actual type=" + + actualType + " actual unit=" + actualUnit); + } + } + } + } + + private static void addMandatoryResources( + Map res) { + ResourceInformation ri; + if (!res.containsKey(MEMORY)) { + LOG.info("Adding resource type - name = " + MEMORY + ", units = " + + ResourceInformation.MEMORY_MB.getUnits() + ", type = " + + ResourceTypes.COUNTABLE); + ri = ResourceInformation + .newInstance(MEMORY, + ResourceInformation.MEMORY_MB.getUnits()); + res.put(MEMORY, ri); + } + if (!res.containsKey(VCORES)) { + LOG.info("Adding resource type - name = " + VCORES + ", units = , type = " + + ResourceTypes.COUNTABLE); + ri = + ResourceInformation.newInstance(VCORES); + res.put(VCORES, ri); + } + } + + private static void setMinimumAllocationForMandatoryResources( + Map res, Configuration conf) { + String[][] resourceTypesKeys = { + {ResourceInformation.MEMORY_MB.getName(), + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB), + ResourceInformation.MEMORY_MB.getName()}, + {ResourceInformation.VCORES.getName(), + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES), + ResourceInformation.VCORES.getName()}}; + for (String[] arr : resourceTypesKeys) { + String resourceTypesKey = + YarnConfiguration.RESOURCE_TYPES + "." + arr[0] + MINIMUM_ALLOCATION; + long minimumResourceTypes = conf.getLong(resourceTypesKey, -1); + long minimumConf = conf.getLong(arr[1], -1); + long minimum; + if (minimumResourceTypes != -1) { + minimum = minimumResourceTypes; + if (minimumConf != -1) { + LOG.warn("Using minimum allocation for memory specified in " + + "resource-types config file with key " + + minimumResourceTypes + ", ignoring minimum specified using " + + arr[1]); + } + } else { + minimum = conf.getLong(arr[1], Long.parseLong(arr[2])); + } + ResourceInformation ri = res.get(arr[3]); + ri.setMinimumAllocation(minimum); + } + } + + private static void setMaximumAllocationForMandatoryResources( + Map res, Configuration conf) { + String[][] resourceTypesKeys = { + {ResourceInformation.MEMORY_MB.getName(), + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB), + ResourceInformation.MEMORY_MB.getName()}, + {ResourceInformation.VCORES.getName(), + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES), + ResourceInformation.VCORES.getName()}}; + for (String[] arr : resourceTypesKeys) { + String resourceTypesKey = + YarnConfiguration.RESOURCE_TYPES + "." + arr[0] + MAXIMUM_ALLOCATION; + long maximumResourceTypes = conf.getLong(resourceTypesKey, -1); + long maximumConf = conf.getLong(arr[1], -1); + long maximum; + if (maximumResourceTypes != -1) { + maximum = maximumResourceTypes; + if (maximumConf != -1) { + LOG.warn("Using maximum allocation for memory specified in " + + "resource-types config file with key " + + maximumResourceTypes + ", ignoring maximum specified using " + + arr[1]); + } + } else { + maximum = conf.getLong(arr[1], Long.parseLong(arr[2])); + } + ResourceInformation ri = res.get(arr[3]); + ri.setMaximumAllocation(maximum); + } + } + + /** + * Get maximum allocation from config, *THIS WILL NOT UPDATE INTERNAL DATA* + * @param conf config + * @return maximum allocation + */ + public static Resource fetchMaximumAllocationFromConfig(Configuration conf) { + Map resourceInformationMap = + getResourceInformationMapFromConfig(conf); + Resource ret = Resource.newInstance(0, 0); + for (ResourceInformation entry : resourceInformationMap.values()) { + ret.setResourceValue(entry.getName(), entry.getMaximumAllocation()); + } + return ret; + } + + private static Map getResourceInformationMapFromConfig( + Configuration conf) { + Map resourceInformationMap = new HashMap<>(); + String[] resourceNames = conf.getStrings(YarnConfiguration.RESOURCE_TYPES); + + if (resourceNames != null && resourceNames.length != 0) { + for (String resourceName : resourceNames) { + String resourceUnits = conf.get( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + UNITS, ""); + String resourceTypeName = conf.get( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + TYPE, + ResourceTypes.COUNTABLE.toString()); + Long minimumAllocation = conf.getLong( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + + MINIMUM_ALLOCATION, 0L); + Long maximumAllocation = conf.getLong( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + + MAXIMUM_ALLOCATION, Long.MAX_VALUE); + if (resourceName == null || resourceName.isEmpty() + || resourceUnits == null || resourceTypeName == null) { + throw new YarnRuntimeException( + "Incomplete configuration for resource type '" + resourceName + + "'. One of name, units or type is configured incorrectly."); + } + ResourceTypes resourceType = ResourceTypes.valueOf(resourceTypeName); + LOG.info("Adding resource type - name = " + resourceName + ", units = " + + resourceUnits + ", type = " + resourceTypeName); + if (resourceInformationMap.containsKey(resourceName)) { + throw new YarnRuntimeException( + "Error in config, key '" + resourceName + "' specified twice"); + } + resourceInformationMap.put(resourceName, ResourceInformation + .newInstance(resourceName, resourceUnits, 0L, resourceType, + minimumAllocation, maximumAllocation)); + } + } + + checkMandatoryResources(resourceInformationMap); + addMandatoryResources(resourceInformationMap); + + setMinimumAllocationForMandatoryResources(resourceInformationMap, conf); + setMaximumAllocationForMandatoryResources(resourceInformationMap, conf); + + return resourceInformationMap; + } + + @VisibleForTesting + static void initializeResourcesMap(Configuration conf) { + Map resourceInformationMap = + getResourceInformationMapFromConfig(conf); + initializeResourcesFromResourceInformationMap(resourceInformationMap); + } + + /** + * This method is visible for testing, unit test can construct a + * resourceInformationMap and pass it to this method to initialize multiple resources. + * @param resourceInformationMap constructed resource information map. + */ + @VisibleForTesting + public static void initializeResourcesFromResourceInformationMap( + Map resourceInformationMap) { + resourceTypes = Collections.unmodifiableMap(resourceInformationMap); + updateKnownResources(); + updateResourceTypeIndex(); + initializedResources = true; + } + + private static void updateKnownResources() { + // Update resource names. + resourceTypesArray = new ResourceInformation[resourceTypes.size()]; + + int index = 2; + for (ResourceInformation resInfo : resourceTypes.values()) { + if (resInfo.getName().equals(MEMORY)) { + resourceTypesArray[0] = ResourceInformation + .newInstance(resourceTypes.get(MEMORY)); + } else if (resInfo.getName().equals(VCORES)) { + resourceTypesArray[1] = ResourceInformation + .newInstance(resourceTypes.get(VCORES)); + } else { + resourceTypesArray[index] = ResourceInformation.newInstance(resInfo); + index++; + } + } + } + + private static void updateResourceTypeIndex() { + RESOURCE_NAME_TO_INDEX.clear(); + + for (int index = 0; index < resourceTypesArray.length; index++) { + ResourceInformation resInfo = resourceTypesArray[index]; + RESOURCE_NAME_TO_INDEX.put(resInfo.getName(), index); + } + } + + /** + * Get associate index of resource types such memory, cpu etc. + * This could help to access each resource types in a resource faster. + * @return Index map for all Resource Types. + */ + public static Map getResourceTypeIndex() { + return RESOURCE_NAME_TO_INDEX; + } + + /** + * Get the resource types to be supported by the system. + * @return A map of the resource name to a ResouceInformation object + * which contains details such as the unit. + */ + public static Map getResourceTypes() { + return getResourceTypes(null, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + } + + public static ResourceInformation[] getResourceTypesArray() { + initializeResourceTypesIfNeeded(null, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + return resourceTypesArray; + } + + public static int getNumberOfKnownResourceTypes() { + if (numKnownResourceTypes < 0) { + initializeResourceTypesIfNeeded(null, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + } + return numKnownResourceTypes; + } + + private static Map getResourceTypes( + Configuration conf) { + return getResourceTypes(conf, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + } + + private static void initializeResourceTypesIfNeeded(Configuration conf, + String resourceFile) { + if (!initializedResources) { + synchronized (ResourceUtils.class) { + if (!initializedResources) { + Configuration resConf = conf; + + if (resConf == null) { + resConf = new YarnConfiguration(); + } + + addResourcesFileToConf(resourceFile, resConf); + initializeResourcesMap(resConf); + } + } + } + numKnownResourceTypes = resourceTypes.size(); + } + + private static Map getResourceTypes( + Configuration conf, String resourceFile) { + initializeResourceTypesIfNeeded(conf, resourceFile); + return resourceTypes; + } + + private static InputStream getConfInputStream(String resourceFile, + Configuration conf) throws IOException, YarnException { + + ConfigurationProvider provider = + ConfigurationProviderFactory.getConfigurationProvider(conf); + try { + provider.init(conf); + } catch (Exception e) { + throw new IOException(e); + } + + InputStream ris = provider.getConfigurationInputStream(conf, resourceFile); + if (ris == null) { + if (conf.getResource(resourceFile) == null) { + throw new FileNotFoundException("Unable to find " + resourceFile); + } + throw new IOException( + "Unable to open resource types file '" + resourceFile + + "'. Using provider " + provider); + } + return ris; + } + + private static void addResourcesFileToConf(String resourceFile, + Configuration conf) { + try { + InputStream ris = getConfInputStream(resourceFile, conf); + LOG.debug("Found " + resourceFile + ", adding to configuration"); + conf.addResource(ris); + } catch (FileNotFoundException fe) { + LOG.info("Unable to find '" + resourceFile + "'."); + } catch (IOException | YarnException ex) { + LOG.fatal("Exception trying to read resource types configuration '" + + resourceFile + "'.", ex); + throw new YarnRuntimeException(ex); + } + } + + @VisibleForTesting + synchronized static void resetResourceTypes() { + initializedResources = false; + } + + @VisibleForTesting + public static Map + resetResourceTypes(Configuration conf) { + synchronized (ResourceUtils.class) { + initializedResources = false; + } + return getResourceTypes(conf); + } + + public static String getUnits(String resourceValue) { + String units; + for (int i = 0; i < resourceValue.length(); i++) { + if (Character.isAlphabetic(resourceValue.charAt(i))) { + units = resourceValue.substring(i); + if (StringUtils.isAlpha(units)) { + return units; + } + } + } + return ""; + } + + /** + * Function to get the resources for a node. This function will look at the + * file {@link YarnConfiguration#NODE_RESOURCES_CONFIGURATION_FILE} to + * determine the node resources. + * + * @param conf configuration file + * @return a map to resource name to the ResourceInformation object. The map + * is guaranteed to have entries for memory and vcores + */ + public static Map getNodeResourceInformation( + Configuration conf) { + if (!initializedNodeResources) { + synchronized (ResourceUtils.class) { + if (!initializedNodeResources) { + Map nodeResources = initializeNodeResourceInformation( + conf); + addMandatoryResources(nodeResources); + checkMandatoryResources(nodeResources); + setMinimumAllocationForMandatoryResources(nodeResources, conf); + setMaximumAllocationForMandatoryResources(nodeResources, conf); + readOnlyNodeResources = Collections.unmodifiableMap(nodeResources); + initializedNodeResources = true; + } + } + } + return readOnlyNodeResources; + } + + private static Map initializeNodeResourceInformation( + Configuration conf) { + Map nodeResources = new HashMap<>(); + + addResourcesFileToConf(YarnConfiguration.NODE_RESOURCES_CONFIGURATION_FILE, + conf); + + for (Map.Entry entry : conf) { + String key = entry.getKey(); + String value = entry.getValue(); + addResourceTypeInformation(key, value, nodeResources); + } + + return nodeResources; + } + + private static void addResourceTypeInformation(String prop, String value, + Map nodeResources) { + if (prop.startsWith(YarnConfiguration.NM_RESOURCES_PREFIX)) { + LOG.info("Found resource entry " + prop); + String resourceType = prop.substring( + YarnConfiguration.NM_RESOURCES_PREFIX.length()); + if (!nodeResources.containsKey(resourceType)) { + nodeResources + .put(resourceType, ResourceInformation.newInstance(resourceType)); + } + String units = getUnits(value); + Long resourceValue = + Long.valueOf(value.substring(0, value.length() - units.length())); + nodeResources.get(resourceType).setValue(resourceValue); + nodeResources.get(resourceType).setUnits(units); + LOG.debug("Setting value for resource type " + resourceType + " to " + + resourceValue + " with units " + units); + } + } + + @VisibleForTesting + synchronized public static void resetNodeResources() { + initializedNodeResources = false; + } + + public static Resource getResourceTypesMinimumAllocation() { + Resource ret = Resource.newInstance(0, 0); + for (ResourceInformation entry : resourceTypesArray) { + String name = entry.getName(); + if (name.equals(ResourceInformation.MEMORY_MB.getName())) { + ret.setMemorySize(entry.getMinimumAllocation()); + } else if (name.equals(ResourceInformation.VCORES.getName())) { + Long tmp = entry.getMinimumAllocation(); + if (tmp > Integer.MAX_VALUE) { + tmp = (long) Integer.MAX_VALUE; + } + ret.setVirtualCores(tmp.intValue()); + } else { + ret.setResourceValue(name, entry.getMinimumAllocation()); + } + } + return ret; + } + + /** + * Get a Resource object with for the maximum allocation possible. + * @return a Resource object with the maximum allocation for the scheduler + */ + public static Resource getResourceTypesMaximumAllocation() { + Resource ret = Resource.newInstance(0, 0); + for (ResourceInformation entry : resourceTypesArray) { + ret.setResourceValue(entry.getName(), + entry.getMaximumAllocation()); + } + return ret; + } + + /** + * Get default unit by given resource type. + * @param resourceType resourceType + * @return default unit + */ + public static String getDefaultUnit(String resourceType) { + ResourceInformation ri = getResourceTypes().get(resourceType); + if (ri != null) { + return ri.getUnits(); + } + return ""; + } + + /** + * Get all resource types information from known resource types. + * @return List of ResourceTypeInfo + */ + public static List getResourcesTypeInfo() { + List array = new ArrayList<>(); + // Add all resource types + Collection resourcesInfo = + ResourceUtils.getResourceTypes().values(); + for (ResourceInformation resourceInfo : resourcesInfo) { + array.add(ResourceTypeInfo + .newInstance(resourceInfo.getName(), resourceInfo.getUnits(), + resourceInfo.getResourceType())); + } + return array; + } + + /** + * From a given configuration get all entries representing requested + * resources: entries that match the {prefix}{resourceName}={value}[{units}] + * pattern. + * @param configuration The configuration + * @param prefix Keys with this prefix are considered from the configuration + * @return The list of requested resources as described by the configuration + */ + public static List getRequestedResourcesFromConfig( + Configuration configuration, String prefix) { + List result = new ArrayList<>(); + Map customResourcesMap = configuration + .getValByRegex("^" + Pattern.quote(prefix) + "[^.]+$"); + for (Entry resource : customResourcesMap.entrySet()) { + String resourceName = resource.getKey().substring(prefix.length()); + Matcher matcher = + RESOURCE_REQUEST_VALUE_PATTERN.matcher(resource.getValue()); + if (!matcher.matches()) { + String errorMsg = "Invalid resource request specified for property " + + resource.getKey() + ": \"" + resource.getValue() + + "\", expected format is: value[ ][units]"; + LOG.error(errorMsg); + throw new IllegalArgumentException(errorMsg); + } + long value = Long.parseLong(matcher.group(1)); + String unit = matcher.group(2); + if (unit.isEmpty()) { + unit = ResourceUtils.getDefaultUnit(resourceName); + } + ResourceInformation resourceInformation = new ResourceInformation(); + resourceInformation.setName(resourceName); + resourceInformation.setValue(value); + resourceInformation.setUnits(unit); + result.add(resourceInformation); + } + return result; + } + +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java new file mode 100644 index 00000000000..d7c799d7cbf --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package org.apache.hadoop.yarn.util.resource contains classes + * which is used as utility class for resource profile computations. + */ +package org.apache.hadoop.yarn.util.resource; \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto index ba79db09a6f..512d3a15c1f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto @@ -61,4 +61,5 @@ service ApplicationClientProtocolService { rpc updateApplicationPriority (UpdateApplicationPriorityRequestProto) returns (UpdateApplicationPriorityResponseProto); rpc signalToContainer(SignalContainerRequestProto) returns (SignalContainerResponseProto); rpc updateApplicationTimeouts (UpdateApplicationTimeoutsRequestProto) returns (UpdateApplicationTimeoutsResponseProto); + rpc getResourceTypeInfo(GetAllResourceTypeInfoRequestProto) returns (GetAllResourceTypeInfoResponseProto); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 51efd15f8f5..214a2b812f0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -53,9 +53,27 @@ message ContainerIdProto { optional int64 id = 3; } +enum ResourceTypesProto { + COUNTABLE = 0; +} + +message ResourceInformationProto { + required string key = 1; + optional int64 value = 2; + optional string units = 3; + optional ResourceTypesProto type = 4; +} + +message ResourceTypeInfoProto { + required string name = 1; + optional string units = 2; + optional ResourceTypesProto type = 3; +} + message ResourceProto { optional int64 memory = 1; optional int32 virtual_cores = 2; + repeated ResourceInformationProto resource_value_map = 3; } message ResourceUtilizationProto { @@ -205,6 +223,11 @@ message LocalResourceProto { optional bool should_be_uploaded_to_shared_cache = 7; } +message StringLongMapProto { + required string key = 1; + required int64 value = 2; +} + message ApplicationResourceUsageReportProto { optional int32 num_used_containers = 1; optional int32 num_reserved_containers = 2; @@ -217,6 +240,8 @@ message ApplicationResourceUsageReportProto { optional float cluster_usage_percentage = 9; optional int64 preempted_memory_seconds = 10; optional int64 preempted_vcore_seconds = 11; + repeated StringLongMapProto application_resource_usage_map = 12; + repeated StringLongMapProto application_preempted_resource_usage_map = 13; } message ApplicationReportProto { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index a5fc568b34d..038d73aed6a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -283,6 +283,13 @@ message UpdateApplicationTimeoutsResponseProto { repeated ApplicationUpdateTimeoutMapProto application_timeouts = 1; } +message GetAllResourceTypeInfoRequestProto { +} + +message GetAllResourceTypeInfoResponseProto { + repeated ResourceTypeInfoProto resource_type_info = 1; +} + ////////////////////////////////////////////////////// /////// client_NM_Protocol /////////////////////////// ////////////////////////////////////////////////////// diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java new file mode 100644 index 00000000000..66bf3204bf6 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.conf; + +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class to verify various resource informations in a given resource. + */ +public class TestResourceInformation { + + @Test + public void testName() { + String name = "yarn.io/test"; + ResourceInformation ri = ResourceInformation.newInstance(name); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + } + + @Test + public void testUnits() { + String name = "yarn.io/test"; + String units = "m"; + ResourceInformation ri = ResourceInformation.newInstance(name, units); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + Assert.assertEquals("Resource units incorrect", units, ri.getUnits()); + units = "z"; + try { + ResourceInformation.newInstance(name, units); + Assert.fail(units + "is not a valid unit"); + } catch (IllegalArgumentException ie) { + // do nothing + } + } + + @Test + public void testValue() { + String name = "yarn.io/test"; + long value = 1L; + ResourceInformation ri = ResourceInformation.newInstance(name, value); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + Assert.assertEquals("Resource value incorrect", value, ri.getValue()); + } + + @Test + public void testResourceInformation() { + String name = "yarn.io/test"; + long value = 1L; + String units = "m"; + ResourceInformation ri = + ResourceInformation.newInstance(name, units, value); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + Assert.assertEquals("Resource value incorrect", value, ri.getValue()); + Assert.assertEquals("Resource units incorrect", units, ri.getUnits()); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/util/TestUnitsConversionUtil.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/util/TestUnitsConversionUtil.java new file mode 100644 index 00000000000..a412faebed8 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/util/TestUnitsConversionUtil.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class to handle all test cases needed to verify basic unit conversion + * scenarios. + */ +public class TestUnitsConversionUtil { + + @Test + public void testUnitsConversion() { + int value = 5; + String fromUnit = ""; + long test = value; + Assert.assertEquals("pico test failed", + value * 1000L * 1000L * 1000L * 1000L, + UnitsConversionUtil.convert(fromUnit, "p", test)); + Assert.assertEquals("nano test failed", + value * 1000L * 1000L * 1000L, + UnitsConversionUtil.convert(fromUnit, "n", test)); + Assert + .assertEquals("micro test failed", value * 1000L * 1000L, + UnitsConversionUtil.convert(fromUnit, "u", test)); + Assert.assertEquals("milli test failed", value * 1000L, + UnitsConversionUtil.convert(fromUnit, "m", test)); + + test = value * 1000L * 1000L * 1000L * 1000L * 1000L; + fromUnit = ""; + Assert.assertEquals("kilo test failed", test / 1000L, + UnitsConversionUtil.convert(fromUnit, "k", test)); + + Assert + .assertEquals("mega test failed", test / (1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "M", test)); + Assert.assertEquals("giga test failed", + test / (1000L * 1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "G", test)); + Assert.assertEquals("tera test failed", + test / (1000L * 1000L * 1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "T", test)); + Assert.assertEquals("peta test failed", + test / (1000L * 1000L * 1000L * 1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "P", test)); + + Assert.assertEquals("nano to pico test failed", value * 1000L, + UnitsConversionUtil.convert("n", "p", value)); + + Assert.assertEquals("mega to giga test failed", value, + UnitsConversionUtil.convert("M", "G", value * 1000L)); + + Assert.assertEquals("Mi to Gi test failed", value, + UnitsConversionUtil.convert("Mi", "Gi", value * 1024L)); + + Assert.assertEquals("Mi to Ki test failed", value * 1024, + UnitsConversionUtil.convert("Mi", "Ki", value)); + + Assert.assertEquals("Ki to base units test failed", 5 * 1024, + UnitsConversionUtil.convert("Ki", "", 5)); + + Assert.assertEquals("Mi to k test failed", 1073741, + UnitsConversionUtil.convert("Mi", "k", 1024)); + + Assert.assertEquals("M to Mi test failed", 953, + UnitsConversionUtil.convert("M", "Mi", 1000)); + } + + @Test + public void testOverflow() { + long test = 5 * 1000L * 1000L * 1000L * 1000L * 1000L; + try { + UnitsConversionUtil.convert("P", "p", test); + Assert.fail("this operation should result in an overflow"); + } catch (IllegalArgumentException ie) { + // do nothing + } + try { + UnitsConversionUtil.convert("m", "p", Long.MAX_VALUE - 1); + Assert.fail("this operation should result in an overflow"); + } catch (IllegalArgumentException ie) { + // do nothing + } + } + + @Test + public void testCompare() { + String unitA = "P"; + long valueA = 1; + String unitB = "p"; + long valueB = 2; + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitA, valueA, unitB, valueB)); + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitB, valueB, unitA, valueA)); + Assert.assertEquals(0, + UnitsConversionUtil.compare(unitA, valueA, unitA, valueA)); + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitA, valueA, unitA, valueB)); + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitA, valueB, unitA, valueA)); + + unitB = "T"; + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitA, valueA, unitB, valueB)); + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitB, valueB, unitA, valueA)); + Assert.assertEquals(0, + UnitsConversionUtil.compare(unitA, valueA, unitB, 1000L)); + + unitA = "p"; + unitB = "n"; + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitA, valueA, unitB, valueB)); + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitB, valueB, unitA, valueA)); + Assert.assertEquals(0, + UnitsConversionUtil.compare(unitA, 1000L, unitB, valueA)); + + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index 50f5bdf7763..b3a6fffb6a0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -79,6 +79,7 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication; import org.apache.hadoop.yarn.client.util.YarnClientUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java index 8c68a31ffe2..29a8dba8e3b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java @@ -61,6 +61,8 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.ReservationDefinition; import org.apache.hadoop.yarn.api.records.ReservationId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -70,6 +72,7 @@ import org.apache.hadoop.yarn.exceptions.ApplicationIdNotProvidedException; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.ContainerNotFoundException; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; @@ -855,4 +858,17 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throw new UnsupportedOperationException("The sub-class extending " + YarnClient.class.getName() + " is expected to implement this !"); } + + /** + *

+ * Get available resource types supported by RM. + *

+ * @return list of supported resource types with detailed information + * @throws YarnException if any issue happens inside YARN + * @throws IOException in case of other others + */ + @Public + @Unstable + public abstract List getResourceTypeInfo() + throws YarnException, IOException; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index 8217e34ba52..de9877b85ae 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -150,12 +150,7 @@ public int compare(Resource arg0, Resource arg1) { } static boolean canFit(Resource arg0, Resource arg1) { - long mem0 = arg0.getMemorySize(); - long mem1 = arg1.getMemorySize(); - long cpu0 = arg0.getVirtualCores(); - long cpu1 = arg1.getVirtualCores(); - - return (mem0 <= mem1 && cpu0 <= cpu1); + return Resources.fitsIn(arg0, arg1); } private final Map> remoteRequests = diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 7d69a87b2bc..397566a0dbd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -43,6 +43,7 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -101,6 +102,8 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -931,4 +934,12 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throws YarnException, IOException { return rmClient.updateApplicationTimeouts(request); } + + @Override + public List getResourceTypeInfo() + throws YarnException, IOException { + GetAllResourceTypeInfoRequest request = + GetAllResourceTypeInfoRequest.newInstance(); + return rmClient.getResourceTypeInfo(request).getResourceTypeInfo(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index 5f6b30017d1..2a9b3bcd925 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -62,6 +62,8 @@ import com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.yarn.util.StringHelper.getResourceSecondsString; + @Private @Unstable public class ApplicationCLI extends YarnCLI { @@ -711,24 +713,9 @@ private int printApplicationReport(String applicationId) appReportStr.println(appReport.getRpcPort()); appReportStr.print("\tAM Host : "); appReportStr.println(appReport.getHost()); - appReportStr.print("\tAggregate Resource Allocation : "); - ApplicationResourceUsageReport usageReport = appReport.getApplicationResourceUsageReport(); - if (usageReport != null) { - //completed app report in the timeline server doesn't have usage report - appReportStr.print(usageReport.getMemorySeconds() + " MB-seconds, "); - appReportStr.println(usageReport.getVcoreSeconds() + " vcore-seconds"); - appReportStr.print("\tAggregate Resource Preempted : "); - appReportStr.print(usageReport.getPreemptedMemorySeconds() + - " MB-seconds, "); - appReportStr.println(usageReport.getPreemptedVcoreSeconds() + - " vcore-seconds"); - } else { - appReportStr.println("N/A"); - appReportStr.print("\tAggregate Resource Preempted : "); - appReportStr.println("N/A"); - } + printResourceUsage(appReportStr, usageReport); appReportStr.print("\tLog Aggregation Status : "); appReportStr.println(appReport.getLogAggregationStatus() == null ? "N/A" : appReport.getLogAggregationStatus()); @@ -759,6 +746,22 @@ private int printApplicationReport(String applicationId) return 0; } + private void printResourceUsage(PrintWriter appReportStr, + ApplicationResourceUsageReport usageReport) { + appReportStr.print("\tAggregate Resource Allocation : "); + if (usageReport != null) { + appReportStr.println( + getResourceSecondsString(usageReport.getResourceSecondsMap())); + appReportStr.print("\tAggregate Resource Preempted : "); + appReportStr.println(getResourceSecondsString( + usageReport.getPreemptedResourceSecondsMap())); + } else { + appReportStr.println("N/A"); + appReportStr.print("\tAggregate Resource Preempted : "); + appReportStr.println("N/A"); + } + } + private String getAllValidApplicationStates() { StringBuilder sb = new StringBuilder(); sb.append("The valid application state can be" + " one of the following: "); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index ff8b27cd9e4..23fe31b3daf 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -546,7 +546,7 @@ private void verifyMatches( List> matches, int matchSize) { assertEquals(1, matches.size()); - assertEquals(matches.get(0).size(), matchSize); + assertEquals(matchSize, matches.get(0).size()); } @Test (timeout=60000) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java index bec7e5f974a..bc21fdeca4b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java @@ -39,8 +39,10 @@ import java.util.Collections; import java.util.Date; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.regex.Pattern; @@ -69,6 +71,7 @@ import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; @@ -118,9 +121,18 @@ public void testGetApplicationReport() throws Exception { for (int i = 0; i < 2; ++i) { ApplicationCLI cli = createAndGetAppCLI(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); + Map resourceSecondsMap = new HashMap<>(); + Map preemptedResoureSecondsMap = new HashMap<>(); + resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 123456L); + resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 4567L); + preemptedResoureSecondsMap + .put(ResourceInformation.MEMORY_MB.getName(), 1111L); + preemptedResoureSecondsMap + .put(ResourceInformation.VCORES.getName(), 2222L); ApplicationResourceUsageReport usageReport = i == 0 ? null : - ApplicationResourceUsageReport.newInstance( - 2, 0, null, null, null, 123456, 4567, 0, 0, 1111, 2222); + ApplicationResourceUsageReport + .newInstance(2, 0, null, null, null, resourceSecondsMap, 0, 0, + preemptedResoureSecondsMap); ApplicationReport newApplicationReport = ApplicationReport.newInstance( applicationId, ApplicationAttemptId.newInstance(applicationId, 1), "user", "queue", "appname", "host", 124, null, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml index 2110a246574..40c70ba0342 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml @@ -204,6 +204,14 @@ true
+ + + ${project.basedir}/src/test/resources + + + ${project.basedir}/src/test/resources/resource-types + + org.apache.rat diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java index cef03b9b052..73c49906c37 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java @@ -51,7 +51,8 @@ public synchronized InputStream getConfigurationInputStream( "Illegal argument! The parameter should not be null or empty"); } Path filePath; - if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name)) { + if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name) || + YarnConfiguration.NM_CONFIGURATION_FILES.contains(name)) { filePath = new Path(this.configDir, name); if (!fs.exists(filePath)) { LOG.info(filePath + " not found"); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java index cfa194fb5b2..0cdbd1516d5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java @@ -39,7 +39,8 @@ public InputStream getConfigurationInputStream(Configuration bootstrapConf, if (name == null || name.isEmpty()) { throw new YarnException( "Illegal argument! The parameter should not be null or empty"); - } else if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name)) { + } else if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name) || + YarnConfiguration.NM_CONFIGURATION_FILES.contains(name)) { return bootstrapConf.getConfResourceAsInputStream(name); } return new FileInputStream(name); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java index ad7cb296080..a6ccd2a04f2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java @@ -89,6 +89,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptRequestPBImpl; @@ -147,6 +149,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SignalContainerResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoResponsePBImpl; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.proto.YarnServiceProtos; @@ -619,4 +623,18 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( return null; } } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + YarnServiceProtos.GetAllResourceTypeInfoRequestProto requestProto = + ((GetAllResourceTypeInfoRequestPBImpl) request).getProto(); + try { + return new GetAllResourceTypeInfoResponsePBImpl( + proxy.getResourceTypeInfo(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java index 93ce6a343c5..ca4276a92ee 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptRequestPBImpl; @@ -116,6 +117,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.UpdateApplicationTimeoutsResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoResponsePBImpl; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.proto.YarnServiceProtos; import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptRequestProto; @@ -169,6 +172,8 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.UpdateApplicationTimeoutsResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoResponseProto; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; @@ -631,4 +636,20 @@ public UpdateApplicationTimeoutsResponseProto updateApplicationTimeouts( throw new ServiceException(e); } } + + @Override + public GetAllResourceTypeInfoResponseProto getResourceTypeInfo( + RpcController controller, GetAllResourceTypeInfoRequestProto proto) + throws ServiceException { + GetAllResourceTypeInfoRequestPBImpl req = new GetAllResourceTypeInfoRequestPBImpl( + proto); + try { + GetAllResourceTypeInfoResponse resp = real.getResourceTypeInfo(req); + return ((GetAllResourceTypeInfoResponsePBImpl) resp).getProto(); + } catch (YarnException ye) { + throw new ServiceException(ye); + } catch (IOException ie) { + throw new ServiceException(ie); + } + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoRequestPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoRequestPBImpl.java new file mode 100644 index 00000000000..b3f4692412e --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoRequestPBImpl.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoRequestProto; + +/** + * Protobuf implementation class for GetAllResourceTypeInfoRequest. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetAllResourceTypeInfoRequestPBImpl + extends GetAllResourceTypeInfoRequest { + + private GetAllResourceTypeInfoRequestProto proto = + GetAllResourceTypeInfoRequestProto.getDefaultInstance(); + private GetAllResourceTypeInfoRequestProto.Builder builder = null; + + private boolean viaProto = false; + + public GetAllResourceTypeInfoRequestPBImpl() { + builder = GetAllResourceTypeInfoRequestProto.newBuilder(); + } + + public GetAllResourceTypeInfoRequestPBImpl( + GetAllResourceTypeInfoRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetAllResourceTypeInfoRequestProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoResponsePBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoResponsePBImpl.java new file mode 100644 index 00000000000..28decebcabf --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoResponsePBImpl.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourceTypeInfoPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypeInfoProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoResponseProtoOrBuilder; + +import com.google.protobuf.TextFormat; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * Protobuf implementation class for the GetAllResourceTypeInfoResponse. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetAllResourceTypeInfoResponsePBImpl + extends + GetAllResourceTypeInfoResponse { + + private GetAllResourceTypeInfoResponseProto proto = GetAllResourceTypeInfoResponseProto + .getDefaultInstance(); + private GetAllResourceTypeInfoResponseProto.Builder builder = null; + private boolean viaProto = false; + + private List resourceTypeInfo; + + public GetAllResourceTypeInfoResponsePBImpl() { + builder = GetAllResourceTypeInfoResponseProto.newBuilder(); + } + + public GetAllResourceTypeInfoResponsePBImpl( + GetAllResourceTypeInfoResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetAllResourceTypeInfoResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public void setResourceTypeInfo(List resourceTypes) { + if (resourceTypeInfo == null) { + builder.clearResourceTypeInfo(); + } + this.resourceTypeInfo = resourceTypes; + } + + @Override + public List getResourceTypeInfo() { + initResourceTypeInfosList(); + return this.resourceTypeInfo; + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } + + private void mergeLocalToBuilder() { + if (this.resourceTypeInfo != null) { + addResourceTypeInfosToProto(); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = GetAllResourceTypeInfoResponseProto.newBuilder(proto); + } + viaProto = false; + } + + // Once this is called. containerList will never be null - until a getProto + // is called. + private void initResourceTypeInfosList() { + if (this.resourceTypeInfo != null) { + return; + } + GetAllResourceTypeInfoResponseProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getResourceTypeInfoList(); + resourceTypeInfo = new ArrayList(); + + for (ResourceTypeInfoProto a : list) { + resourceTypeInfo.add(convertFromProtoFormat(a)); + } + } + + private void addResourceTypeInfosToProto() { + maybeInitBuilder(); + builder.clearResourceTypeInfo(); + if (resourceTypeInfo == null) { + return; + } + Iterable iterable = new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + + Iterator iter = resourceTypeInfo.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public ResourceTypeInfoProto next() { + return convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + + } + }; + + } + }; + builder.addAllResourceTypeInfo(iterable); + } + + private ResourceTypeInfoPBImpl convertFromProtoFormat( + ResourceTypeInfoProto p) { + return new ResourceTypeInfoPBImpl(p); + } + + private ResourceTypeInfoProto convertToProtoFormat(ResourceTypeInfo t) { + return ((ResourceTypeInfoPBImpl) t).getProto(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java index 1c85e28dca8..14ede5dbf34 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java @@ -22,12 +22,15 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationResourceUsageReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationResourceUsageReportProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import com.google.protobuf.TextFormat; +import java.util.Map; + @Private @Unstable public class ApplicationResourceUsageReportPBImpl @@ -41,6 +44,9 @@ Resource reservedResources; Resource neededResources; + private Map resourceSecondsMap; + private Map preemptedResourceSecondsMap; + public ApplicationResourceUsageReportPBImpl() { builder = ApplicationResourceUsageReportProto.newBuilder(); } @@ -49,6 +55,8 @@ public ApplicationResourceUsageReportPBImpl( ApplicationResourceUsageReportProto proto) { this.proto = proto; viaProto = true; + getResourceSecondsMap(); + getPreemptedResourceSecondsMap(); } public synchronized ApplicationResourceUsageReportProto getProto() { @@ -89,6 +97,23 @@ private void mergeLocalToBuilder() { if (this.neededResources != null) { builder.setNeededResources(convertToProtoFormat(this.neededResources)); } + builder.clearApplicationResourceUsageMap(); + builder.clearApplicationPreemptedResourceUsageMap(); + + if (preemptedResourceSecondsMap != null && !preemptedResourceSecondsMap + .isEmpty()) { + builder.addAllApplicationPreemptedResourceUsageMap(ProtoUtils + .convertMapToStringLongMapProtoList(preemptedResourceSecondsMap)); + } + if (resourceSecondsMap != null && !resourceSecondsMap.isEmpty()) { + builder.addAllApplicationResourceUsageMap( + ProtoUtils.convertMapToStringLongMapProtoList(resourceSecondsMap)); + } + + builder.setMemorySeconds(this.getMemorySeconds()); + builder.setVcoreSeconds(this.getVcoreSeconds()); + builder.setPreemptedMemorySeconds(this.getPreemptedMemorySeconds()); + builder.setPreemptedVcoreSeconds(this.getPreemptedVcoreSeconds()); } private void mergeLocalToProto() { @@ -196,54 +221,64 @@ public synchronized void setNeededResources(Resource reserved_resources) { @Override public synchronized void setMemorySeconds(long memory_seconds) { - maybeInitBuilder(); - builder.setMemorySeconds(memory_seconds); + getResourceSecondsMap() + .put(ResourceInformation.MEMORY_MB.getName(), memory_seconds); } - + @Override public synchronized long getMemorySeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; - return p.getMemorySeconds(); + Map tmp = getResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.MEMORY_MB.getName())) { + return tmp.get(ResourceInformation.MEMORY_MB.getName()); + } + return 0; } @Override public synchronized void setVcoreSeconds(long vcore_seconds) { - maybeInitBuilder(); - builder.setVcoreSeconds(vcore_seconds); + getResourceSecondsMap() + .put(ResourceInformation.VCORES.getName(), vcore_seconds); } @Override public synchronized long getVcoreSeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; - return (p.getVcoreSeconds()); + Map tmp = getResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.VCORES.getName())) { + return tmp.get(ResourceInformation.VCORES.getName()); + } + return 0; } @Override public synchronized void setPreemptedMemorySeconds( long preemptedMemorySeconds) { - maybeInitBuilder(); - builder.setPreemptedMemorySeconds(preemptedMemorySeconds); + getPreemptedResourceSecondsMap() + .put(ResourceInformation.MEMORY_MB.getName(), preemptedMemorySeconds); } @Override public synchronized long getPreemptedMemorySeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = - viaProto ? proto : builder; - return p.getPreemptedMemorySeconds(); + Map tmp = getPreemptedResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.MEMORY_MB.getName())) { + return tmp.get(ResourceInformation.MEMORY_MB.getName()); + } + return 0; } @Override public synchronized void setPreemptedVcoreSeconds( long vcoreSeconds) { - maybeInitBuilder(); - builder.setPreemptedVcoreSeconds(vcoreSeconds); + getPreemptedResourceSecondsMap() + .put(ResourceInformation.VCORES.getName(), vcoreSeconds); } @Override public synchronized long getPreemptedVcoreSeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = - viaProto ? proto : builder; - return (p.getPreemptedVcoreSeconds()); + Map tmp = getPreemptedResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.VCORES.getName())) { + return tmp.get(ResourceInformation.VCORES.getName()); + } + return 0; } private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { @@ -277,4 +312,81 @@ public synchronized void setClusterUsagePercentage(float clusterUsagePerc) { maybeInitBuilder(); builder.setClusterUsagePercentage((clusterUsagePerc)); } + + @Override + public synchronized void setResourceSecondsMap( + Map resourceSecondsMap) { + this.resourceSecondsMap = resourceSecondsMap; + if (resourceSecondsMap == null) { + return; + } + if (!resourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setMemorySeconds(0L); + } + if (!resourceSecondsMap.containsKey(ResourceInformation.VCORES.getName())) { + this.setVcoreSeconds(0L); + } + } + + @Override + public synchronized Map getResourceSecondsMap() { + if (this.resourceSecondsMap != null) { + return this.resourceSecondsMap; + } + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + this.resourceSecondsMap = ProtoUtils + .convertStringLongMapProtoListToMap( + p.getApplicationResourceUsageMapList()); + if (!this.resourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setMemorySeconds(p.getMemorySeconds()); + } + if (!this.resourceSecondsMap + .containsKey(ResourceInformation.VCORES.getName())) { + this.setVcoreSeconds(p.getVcoreSeconds()); + } + this.setMemorySeconds(p.getMemorySeconds()); + this.setVcoreSeconds(p.getVcoreSeconds()); + return this.resourceSecondsMap; + } + + @Override + public synchronized void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap) { + this.preemptedResourceSecondsMap = preemptedResourceSecondsMap; + if (preemptedResourceSecondsMap == null) { + return; + } + if (!preemptedResourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setPreemptedMemorySeconds(0L); + } + if (!preemptedResourceSecondsMap + .containsKey(ResourceInformation.VCORES.getName())) { + this.setPreemptedVcoreSeconds(0L); + } + } + + @Override + public synchronized Map getPreemptedResourceSecondsMap() { + if (this.preemptedResourceSecondsMap != null) { + return this.preemptedResourceSecondsMap; + } + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + this.preemptedResourceSecondsMap = ProtoUtils + .convertStringLongMapProtoListToMap( + p.getApplicationPreemptedResourceUsageMapList()); + if (!this.preemptedResourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setPreemptedMemorySeconds(p.getPreemptedMemorySeconds()); + } + if (!this.preemptedResourceSecondsMap + .containsKey(ResourceInformation.VCORES.getName())) { + this.setPreemptedVcoreSeconds(p.getPreemptedVcoreSeconds()); + } + this.setPreemptedMemorySeconds(p.getPreemptedMemorySeconds()); + this.setPreemptedVcoreSeconds(p.getPreemptedVcoreSeconds()); + return this.preemptedResourceSecondsMap; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java index df766d39472..8176068984f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java @@ -19,10 +19,15 @@ package org.apache.hadoop.yarn.api.records.impl.pb; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.ApplicationsRequestScope; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; import org.apache.hadoop.yarn.api.records.AMCommand; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; @@ -45,6 +50,7 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.ReservationRequestInterpreter; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.UpdateContainerError; import org.apache.hadoop.yarn.api.records.UpdateContainerRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; @@ -73,6 +79,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ContainerTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeRequestProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypesProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos; import org.apache.hadoop.yarn.proto.YarnServiceProtos.ContainerUpdateTypeProto; import org.apache.hadoop.yarn.server.api.ContainerType; @@ -449,6 +456,45 @@ public static UpdateContainerErrorPBImpl convertFromProtoFormat( convertToProtoFormat(UpdateContainerError t) { return ((UpdateContainerErrorPBImpl) t).getProto(); } + + /* + * ResourceTypes + */ + public static ResourceTypesProto converToProtoFormat(ResourceTypes e) { + return ResourceTypesProto.valueOf(e.name()); + } + + public static ResourceTypes convertFromProtoFormat(ResourceTypesProto e) { + return ResourceTypes.valueOf(e.name()); + } + + public static Map convertStringLongMapProtoListToMap( + List pList) { + Resource tmp = Resource.newInstance(0, 0); + Map ret = new HashMap<>(); + for (ResourceInformation entry : tmp.getResources()) { + ret.put(entry.getName(), 0L); + } + if (pList != null) { + for (YarnProtos.StringLongMapProto p : pList) { + ret.put(p.getKey(), p.getValue()); + } + } + return ret; + } + + public static List convertMapToStringLongMapProtoList( + Map map) { + List ret = new ArrayList<>(); + for (Map.Entry entry : map.entrySet()) { + YarnProtos.StringLongMapProto.Builder tmp = + YarnProtos.StringLongMapProto.newBuilder(); + tmp.setKey(entry.getKey()); + tmp.setValue(entry.getValue()); + ret.add(tmp.build()); + } + return ret; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java index e6295bf4313..401e0c00d8c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java @@ -18,16 +18,29 @@ package org.apache.hadoop.yarn.api.records.impl.pb; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.yarn.api.records.*; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceInformationProto; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import java.util.Map; @Private @Unstable public class ResourcePBImpl extends Resource { + + private static final Log LOG = LogFactory.getLog(ResourcePBImpl.class); + ResourceProto proto = ResourceProto.getDefaultInstance(); ResourceProto.Builder builder = null; boolean viaProto = false; @@ -36,25 +49,31 @@ static ResourceProto getProto(Resource r) { final ResourcePBImpl pb; if (r instanceof ResourcePBImpl) { - pb = (ResourcePBImpl)r; + pb = (ResourcePBImpl) r; } else { pb = new ResourcePBImpl(); pb.setMemorySize(r.getMemorySize()); pb.setVirtualCores(r.getVirtualCores()); + for(ResourceInformation res : r.getResources()) { + pb.setResourceInformation(res.getName(), res); + } } return pb.getProto(); } public ResourcePBImpl() { builder = ResourceProto.newBuilder(); + initResources(); } public ResourcePBImpl(ResourceProto proto) { this.proto = proto; viaProto = true; + initResources(); } - + public ResourceProto getProto() { + mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; return proto; @@ -75,8 +94,14 @@ public int getMemory() { @Override public long getMemorySize() { - ResourceProtoOrBuilder p = viaProto ? proto : builder; - return p.getMemory(); + // memory should always be present + ResourceInformation ri = resources[MEMORY_INDEX]; + + if (ri.getUnits().isEmpty()) { + return ri.getValue(); + } + return UnitsConversionUtil.convert(ri.getUnits(), + ResourceInformation.MEMORY_MB.getUnits(), ri.getValue()); } @Override @@ -88,18 +113,131 @@ public void setMemory(int memory) { @Override public void setMemorySize(long memory) { maybeInitBuilder(); - builder.setMemory(memory); + resources[MEMORY_INDEX].setValue(memory); } @Override public int getVirtualCores() { - ResourceProtoOrBuilder p = viaProto ? proto : builder; - return p.getVirtualCores(); + // vcores should always be present + return castToIntSafely(resources[VCORES_INDEX].getValue()); } @Override public void setVirtualCores(int vCores) { maybeInitBuilder(); - builder.setVirtualCores(vCores); + resources[VCORES_INDEX].setValue(vCores); + } + + private void initResources() { + if (this.resources != null) { + return; + } + ResourceProtoOrBuilder p = viaProto ? proto : builder; + ResourceInformation[] types = ResourceUtils.getResourceTypesArray(); + Map indexMap = ResourceUtils.getResourceTypeIndex(); + resources = new ResourceInformation[types.length]; + + for (ResourceInformationProto entry : p.getResourceValueMapList()) { + Integer index = indexMap.get(entry.getKey()); + if (index == null) { + LOG.warn("Got unknown resource type: " + entry.getKey() + "; skipping"); + } else { + resources[index] = newDefaultInformation(types[index], entry); + } + } + + resources[MEMORY_INDEX] = ResourceInformation + .newInstance(ResourceInformation.MEMORY_MB); + resources[VCORES_INDEX] = ResourceInformation + .newInstance(ResourceInformation.VCORES); + this.setMemorySize(p.getMemory()); + this.setVirtualCores(p.getVirtualCores()); + + // Update missing resource information on respective index. + updateResourceInformationMap(types); + } + + private void updateResourceInformationMap(ResourceInformation[] types) { + for (int i = 0; i < types.length; i++) { + if (resources[i] == null) { + resources[i] = ResourceInformation.newInstance(types[i]); + } + } + } + + private static ResourceInformation newDefaultInformation( + ResourceInformation resourceInformation, ResourceInformationProto entry) { + ResourceInformation ri = new ResourceInformation(); + ri.setName(resourceInformation.getName()); + ri.setMinimumAllocation(resourceInformation.getMinimumAllocation()); + ri.setMaximumAllocation(resourceInformation.getMaximumAllocation()); + ri.setResourceType(entry.hasType() + ? ProtoUtils.convertFromProtoFormat(entry.getType()) + : ResourceTypes.COUNTABLE); + ri.setUnits( + entry.hasUnits() ? entry.getUnits() : resourceInformation.getUnits()); + ri.setValue(entry.hasValue() ? entry.getValue() : 0L); + return ri; + } + + @Override + public void setResourceInformation(String resource, + ResourceInformation resourceInformation) { + maybeInitBuilder(); + if (resource == null || resourceInformation == null) { + throw new IllegalArgumentException( + "resource and/or resourceInformation cannot be null"); + } + ResourceInformation storedResourceInfo = super.getResourceInformation( + resource); + ResourceInformation.copy(resourceInformation, storedResourceInfo); + } + + @Override + public void setResourceValue(String resource, long value) + throws ResourceNotFoundException { + maybeInitBuilder(); + if (resource == null) { + throw new IllegalArgumentException("resource type object cannot be null"); + } + getResourceInformation(resource).setValue(value); + } + + @Override + public ResourceInformation getResourceInformation(String resource) + throws ResourceNotFoundException { + return super.getResourceInformation(resource); + } + + @Override + public long getResourceValue(String resource) + throws ResourceNotFoundException { + return super.getResourceValue(resource); + } + + synchronized private void mergeLocalToBuilder() { + builder.clearResourceValueMap(); + if (resources != null && resources.length != 0) { + for (ResourceInformation resInfo : resources) { + ResourceInformationProto.Builder e = ResourceInformationProto + .newBuilder(); + e.setKey(resInfo.getName()); + e.setUnits(resInfo.getUnits()); + e.setType(ProtoUtils.converToProtoFormat(resInfo.getResourceType())); + e.setValue(resInfo.getValue()); + builder.addResourceValueMap(e); + } + } + builder.setMemory(this.getMemorySize()); + builder.setVirtualCores(this.getVirtualCores()); + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; } -} +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceTypeInfoPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceTypeInfoPBImpl.java new file mode 100644 index 00000000000..17230e7dfa5 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceTypeInfoPBImpl.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypeInfoProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypesProto; + +/** + * {@code ResourceTypeInfoPBImpl} which implements the + * {@link ResourceTypeInfo} class which represents different resource types + * supported in YARN. + */ +@Private +@Unstable +public class ResourceTypeInfoPBImpl extends ResourceTypeInfo { + + ResourceTypeInfoProto proto = ResourceTypeInfoProto.getDefaultInstance(); + ResourceTypeInfoProto.Builder builder = null; + boolean viaProto = false; + + private String name = null; + private String defaultUnit = null; + private ResourceTypes resourceTypes = null; + + public ResourceTypeInfoPBImpl() { + builder = ResourceTypeInfoProto.newBuilder(); + } + + public ResourceTypeInfoPBImpl(ResourceTypeInfoProto proto) { + this.proto = proto; + viaProto = true; + } + + public ResourceTypeInfoProto getProto() { + mergeLocalToProto(); + return proto; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void mergeLocalToBuilder() { + if (this.name != null) { + builder.setName(this.name); + } + if (this.defaultUnit != null) { + builder.setUnits(this.defaultUnit); + } + if (this.resourceTypes != null) { + builder.setType(convertToProtoFormat(this.resourceTypes)); + } + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = YarnProtos.ResourceTypeInfoProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public String getName() { + if (this.name != null) { + return this.name; + } + + YarnProtos.ResourceTypeInfoProtoOrBuilder p = viaProto ? proto : builder; + return p.getName(); + } + + @Override + public void setName(String rName) { + maybeInitBuilder(); + if (rName == null) { + builder.clearName(); + } + this.name = rName; + } + + @Override + public String getDefaultUnit() { + if (this.defaultUnit != null) { + return this.defaultUnit; + } + + YarnProtos.ResourceTypeInfoProtoOrBuilder p = viaProto ? proto : builder; + return p.getUnits(); + } + + @Override + public void setDefaultUnit(String rUnits) { + maybeInitBuilder(); + if (rUnits == null) { + builder.clearUnits(); + } + this.defaultUnit = rUnits; + } + + @Override + public ResourceTypes getResourceType() { + if (this.resourceTypes != null) { + return this.resourceTypes; + } + + YarnProtos.ResourceTypeInfoProtoOrBuilder p = viaProto ? proto : builder; + return convertFromProtoFormat(p.getType()); + } + + @Override + public void setResourceType(ResourceTypes type) { + maybeInitBuilder(); + if (type == null) { + builder.clearType(); + } + this.resourceTypes = type; + } + + public static ResourceTypesProto convertToProtoFormat(ResourceTypes e) { + return ResourceTypesProto.valueOf(e.name()); + } + + public static ResourceTypes convertFromProtoFormat(ResourceTypesProto e) { + return ResourceTypes.valueOf(e.name()); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java index 0240fbcd59f..01e8951abb4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java @@ -20,9 +20,15 @@ import com.google.common.base.Joiner; import com.google.common.base.Splitter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; /** * Common string manipulation helpers @@ -174,4 +180,34 @@ private static void uappend(StringBuilder sb, String part) { } sb.append(part); } + + public static String getResourceSecondsString(Map targetMap) { + List strings = new ArrayList<>(targetMap.size()); + //completed app report in the timeline server doesn't have usage report + Long memorySeconds = 0L; + Long vcoreSeconds = 0L; + if (targetMap.containsKey(ResourceInformation.MEMORY_MB.getName())) { + memorySeconds = targetMap.get(ResourceInformation.MEMORY_MB.getName()); + } + if (targetMap.containsKey(ResourceInformation.VCORES.getName())) { + vcoreSeconds = targetMap.get(ResourceInformation.VCORES.getName()); + } + strings.add(memorySeconds + " MB-seconds"); + strings.add(vcoreSeconds + " vcore-seconds"); + Map tmp = ResourceUtils.getResourceTypes(); + if (targetMap.size() > 2) { + for (Map.Entry entry : targetMap.entrySet()) { + if (!entry.getKey().equals(ResourceInformation.MEMORY_MB.getName()) + && !entry.getKey().equals(ResourceInformation.VCORES.getName())) { + String units = ""; + if (tmp.containsKey(entry.getKey())) { + units = tmp.get(entry.getKey()).getUnits(); + } + strings.add(entry.getValue() + " " + entry.getKey() + "-" + units + + "seconds"); + } + } + } + return Joiner.on(", ").join(strings); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java index 7697e1dfc33..d64f03ec33b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java @@ -22,136 +22,395 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Arrays; /** - * A {@link ResourceCalculator} which uses the concept of + * A {@link ResourceCalculator} which uses the concept of * dominant resource to compare multi-dimensional resources. * - * Essentially the idea is that the in a multi-resource environment, - * the resource allocation should be determined by the dominant share - * of an entity (user or queue), which is the maximum share that the - * entity has been allocated of any resource. - * - * In a nutshell, it seeks to maximize the minimum dominant share across - * all entities. - * + * Essentially the idea is that the in a multi-resource environment, + * the resource allocation should be determined by the dominant share + * of an entity (user or queue), which is the maximum share that the + * entity has been allocated of any resource. + * + * In a nutshell, it seeks to maximize the minimum dominant share across + * all entities. + * * For example, if user A runs CPU-heavy tasks and user B runs - * memory-heavy tasks, it attempts to equalize CPU share of user A - * with Memory-share of user B. - * + * memory-heavy tasks, it attempts to equalize CPU share of user A + * with Memory-share of user B. + * * In the single resource case, it reduces to max-min fairness for that resource. - * + * * See the Dominant Resource Fairness paper for more details: * www.cs.berkeley.edu/~matei/papers/2011/nsdi_drf.pdf */ @Private @Unstable public class DominantResourceCalculator extends ResourceCalculator { - private static final Log LOG = - LogFactory.getLog(DominantResourceCalculator.class); + static final Log LOG = LogFactory.getLog(DominantResourceCalculator.class); + + public DominantResourceCalculator() { + } + + /** + * Compare two resources - if the value for every resource type for the lhs + * is greater than that of the rhs, return 1. If the value for every resource + * type in the lhs is less than the rhs, return -1. Otherwise, return 0 + * + * @param lhs resource to be compared + * @param rhs resource to be compared + * @return 0, 1, or -1 + */ + private int compare(Resource lhs, Resource rhs) { + boolean lhsGreater = false; + boolean rhsGreater = false; + int ret = 0; + + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation lhsResourceInformation = lhs + .getResourceInformation(i); + ResourceInformation rhsResourceInformation = rhs + .getResourceInformation(i); + int diff = lhsResourceInformation.compareTo(rhsResourceInformation); + if (diff >= 1) { + lhsGreater = true; + } else if (diff <= -1) { + rhsGreater = true; + } + } + if (lhsGreater && rhsGreater) { + ret = 0; + } else if (lhsGreater) { + ret = 1; + } else if (rhsGreater) { + ret = -1; + } + return ret; + } @Override public int compare(Resource clusterResource, Resource lhs, Resource rhs, boolean singleType) { - if (lhs.equals(rhs)) { return 0; } - + if (isInvalidDivisor(clusterResource)) { - if ((lhs.getMemorySize() < rhs.getMemorySize() && - lhs.getVirtualCores() > rhs.getVirtualCores()) || - (lhs.getMemorySize() > rhs.getMemorySize() && - lhs.getVirtualCores() < rhs.getVirtualCores())) { - return 0; - } else if (lhs.getMemorySize() > rhs.getMemorySize() - || lhs.getVirtualCores() > rhs.getVirtualCores()) { - return 1; - } else if (lhs.getMemorySize() < rhs.getMemorySize() - || lhs.getVirtualCores() < rhs.getVirtualCores()) { - return -1; + return this.compare(lhs, rhs); + } + + // We have to calculate the shares for all resource types for both + // resources and then look for which resource has the biggest + // share overall. + ResourceInformation[] clusterRes = clusterResource.getResources(); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + + // If array creation shows up as a time sink, these arrays could be cached + // because they're always the same length. + double[] lhsShares = new double[maxLength]; + double[] rhsShares = new double[maxLength]; + double diff; + + try { + if (singleType) { + double[] max = new double[2]; + + calculateShares(clusterRes, lhs, rhs, lhsShares, rhsShares, max); + + diff = max[0] - max[1]; + } else if (maxLength == 2) { + // Special case to handle the common scenario of only CPU and memory + // so that we can optimize for performance + diff = calculateSharesForTwoMandatoryResources(clusterRes, lhs, rhs, + lhsShares, rhsShares); + } else { + calculateShares(clusterRes, lhs, rhs, lhsShares, rhsShares); + + Arrays.sort(lhsShares); + Arrays.sort(rhsShares); + + diff = compareShares(lhsShares, rhsShares); } + } catch (ArrayIndexOutOfBoundsException ex) { + StringWriter out = new StringWriter(); // No need to close a StringWriter + ex.printStackTrace(new PrintWriter(out)); + + LOG.error("A problem was encountered while calculating resource " + + "availability that should not occur under normal circumstances. " + + "Please report this error to the Hadoop community by opening a " + + "JIRA ticket at http://issues.apache.org/jira and including the " + + "following information:\n* Exception encountered: " + out + "* " + + "Cluster resources: " + Arrays.toString(clusterRes) + "\n* " + + "LHS resource: " + Arrays.toString(lhs.getResources()) + "\n* " + + "RHS resource: " + Arrays.toString(rhs.getResources())); + LOG.error("The resource manager is in an inconsistent state. It is safe " + + "for the resource manager to be restarted as the error encountered " + + "should be transitive. If high availability is enabled, failing " + + "over to a standby resource manager is also safe."); + throw new YarnRuntimeException("A problem was encountered while " + + "calculating resource availability that should not occur under " + + "normal circumstances. Please see the log for more information.", + ex); } - float l = getResourceAsValue(clusterResource, lhs, true); - float r = getResourceAsValue(clusterResource, rhs, true); - - if (l < r) { + return (int) Math.signum(diff); + } + + /** + * Calculate the shares for {@code first} and {@code second} according to + * {@code clusterRes}, and store the results in {@code firstShares} and + * {@code secondShares}, respectively. All parameters must be non-null. + * @param clusterRes the array of ResourceInformation instances that + * represents the cluster's maximum resources + * @param first the first resource to compare + * @param second the second resource to compare + * @param firstShares an array to store the shares for the first resource + * @param secondShares an array to store the shares for the second resource + * @return -1.0, 0.0, or 1.0, depending on whether the max share of the first + * resource is less than, equal to, or greater than the max share of the + * second resource, respectively + * @throws NullPointerException if any parameter is null + */ + private void calculateShares(ResourceInformation[] clusterRes, Resource first, + Resource second, double[] firstShares, double[] secondShares) { + ResourceInformation[] firstRes = first.getResources(); + ResourceInformation[] secondRes = second.getResources(); + + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + firstShares[i] = calculateShare(clusterRes[i], firstRes[i]); + secondShares[i] = calculateShare(clusterRes[i], secondRes[i]); + } + } + + /** + * Calculate the shares for {@code first} and {@code second} according to + * {@code clusterRes}, and store the results in {@code firstShares} and + * {@code secondShares}, respectively. All parameters must be non-null. + * This method assumes that the length of {@code clusterRes} is exactly 2 and + * makes performance optimizations based on that assumption. + * @param clusterRes the array of ResourceInformation instances that + * represents the cluster's maximum resources + * @param first the first resource to compare + * @param second the second resource to compare + * @param firstShares an array to store the shares for the first resource + * @param secondShares an array to store the shares for the second resource + * @return -1.0, 0.0, or 1.0, depending on whether the max share of the first + * resource is less than, equal to, or greater than the max share of the + * second resource, respectively + * @throws NullPointerException if any parameter is null + */ + private int calculateSharesForTwoMandatoryResources( + ResourceInformation[] clusterRes, Resource first, Resource second, + double[] firstShares, double[] secondShares) { + ResourceInformation[] firstRes = first.getResources(); + ResourceInformation[] secondRes = second.getResources(); + firstShares[0] = calculateShare(clusterRes[0], firstRes[0]); + secondShares[0] = calculateShare(clusterRes[0], secondRes[0]); + firstShares[1] = calculateShare(clusterRes[1], firstRes[1]); + secondShares[1] = calculateShare(clusterRes[1], secondRes[1]); + + int firstDom = 0; + int firstSub = 1; + if (firstShares[1] > firstShares[0]) { + firstDom = 1; + firstSub = 0; + } + int secondDom = 0; + int secondSub = 1; + if (secondShares[1] > secondShares[0]) { + secondDom = 1; + secondSub = 0; + } + + if (firstShares[firstDom] > secondShares[secondDom]) { + return 1; + } else if (firstShares[firstDom] < secondShares[secondDom]) { return -1; - } else if (l > r) { + } else if (firstShares[firstSub] > secondShares[secondSub]) { return 1; - } else if (!singleType) { - l = getResourceAsValue(clusterResource, lhs, false); - r = getResourceAsValue(clusterResource, rhs, false); - if (l < r) { - return -1; - } else if (l > r) { - return 1; + } else if (firstShares[firstSub] < secondShares[secondSub]) { + return -1; + } else { + return 0; + } + } + + /** + * Calculate the shares for {@code first} and {@code second} according to + * {@code clusterRes}, and store the results in {@code firstShares} and + * {@code secondShares}, respectively. {@code max} will be populated with + * the max shares from {@code firstShare} and {@code secondShare} in the + * first and second indices, respectively. All parameters must be non-null, + * and {@code max} must have a length of at least 2. + * @param clusterRes the array of ResourceInformation instances that + * represents the cluster's maximum resources + * @param first the first resource to compare + * @param second the second resource to compare + * @param firstShares an array to store the shares for the first resource + * @param secondShares an array to store the shares for the second resource + * @param max an array to store the max shares of the first and second + * resources + * @return -1.0, 0.0, or 1.0, depending on whether the max share of the first + * resource is less than, equal to, or greater than the max share of the + * second resource, respectively + * @throws NullPointerException if any parameter is null + * @throws ArrayIndexOutOfBoundsException if the length of {@code max} is + * less than 2 + */ + private void calculateShares(ResourceInformation[] clusterRes, Resource first, + Resource second, double[] firstShares, double[] secondShares, + double[] max) { + ResourceInformation[] firstRes = first.getResources(); + ResourceInformation[] secondRes = second.getResources(); + + max[0] = 0.0; + max[1] = 0.0; + + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + firstShares[i] = calculateShare(clusterRes[i], firstRes[i]); + secondShares[i] = calculateShare(clusterRes[i], secondRes[i]); + + if (firstShares[i] > max[0]) { + max[0] = firstShares[i]; + } + + if (secondShares[i] > max[1]) { + max[1] = secondShares[i]; } } - - return 0; } /** - * Use 'dominant' for now since we only have 2 resources - gives us a slight - * performance boost. - * - * Once we add more resources, we'll need a more complicated (and slightly - * less performant algorithm). + * Calculate the share for a resource type. + * @param clusterRes the resource type for the cluster maximum + * @param res the resource type for which to calculate the share + * @return the share + */ + private double calculateShare(ResourceInformation clusterRes, + ResourceInformation res) { + // Convert the resources' units into the cluster resource's units + long value = UnitsConversionUtil.convert(res.getUnits(), + clusterRes.getUnits(), res.getValue()); + + return (double) value / clusterRes.getValue(); + } + + /** + * Compare the two shares arrays by comparing the largest elements, then the + * next largest if the previous were equal, etc. The share arrays must be + * sorted in ascending order. + * @param lhsShares the first share array to compare + * @param rhsShares the second share array to compare + * @return a number that is less than 0 if the first array is less than the + * second, equal to 0 if the arrays are equal, and greater than 0 if the + * first array is greater than the second */ - protected float getResourceAsValue( - Resource clusterResource, Resource resource, boolean dominant) { - // Just use 'dominant' resource - return (dominant) ? - Math.max( - (float)resource.getMemorySize() / clusterResource.getMemorySize(), - (float)resource.getVirtualCores() / clusterResource.getVirtualCores() - ) - : - Math.min( - (float)resource.getMemorySize() / clusterResource.getMemorySize(), - (float)resource.getVirtualCores() / clusterResource.getVirtualCores() - ); - } - + private double compareShares(double[] lhsShares, double[] rhsShares) { + double diff = 0.0; + + // lhsShares and rhsShares must necessarily have the same length, because + // everyone uses the same master resource list. + for (int i = lhsShares.length - 1; i >= 0; i--) { + diff = lhsShares[i] - rhsShares[i]; + + if (diff != 0.0) { + break; + } + } + + return diff; + } + @Override - public long computeAvailableContainers(Resource available, Resource required) { - return Math.min( - available.getMemorySize() / required.getMemorySize(), - available.getVirtualCores() / required.getVirtualCores()); + public long computeAvailableContainers(Resource available, + Resource required) { + long min = Long.MAX_VALUE; + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation availableResource = available + .getResourceInformation(i); + ResourceInformation requiredResource = required.getResourceInformation(i); + long requiredResourceValue = UnitsConversionUtil.convert( + requiredResource.getUnits(), availableResource.getUnits(), + requiredResource.getValue()); + if (requiredResourceValue != 0) { + long tmp = availableResource.getValue() / requiredResourceValue; + min = min < tmp ? min : tmp; + } + } + return min > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) min; } @Override - public float divide(Resource clusterResource, + public float divide(Resource clusterResource, Resource numerator, Resource denominator) { - return - getResourceAsValue(clusterResource, numerator, true) / - getResourceAsValue(clusterResource, denominator, true); + int nKnownResourceTypes = ResourceUtils.getNumberOfKnownResourceTypes(); + ResourceInformation[] clusterRes = clusterResource.getResources(); + // We have to provide the calculateShares() method with somewhere to store + // the shares. We don't actually need these shares afterwards. + double[] numeratorShares = new double[nKnownResourceTypes]; + double[] denominatorShares = new double[nKnownResourceTypes]; + // We also have to provide a place for calculateShares() to store the max + // shares so that we can use them. + double[] max = new double[2]; + + calculateShares(clusterRes, numerator, denominator, numeratorShares, + denominatorShares, max); + + return (float) (max[0] / max[1]); } - + @Override public boolean isInvalidDivisor(Resource r) { - if (r.getMemorySize() == 0.0f || r.getVirtualCores() == 0.0f) { - return true; + for (ResourceInformation res : r.getResources()) { + if (res.getValue() == 0L) { + return true; + } } return false; } @Override public float ratio(Resource a, Resource b) { - return Math.max( - (float)a.getMemorySize()/b.getMemorySize(), - (float)a.getVirtualCores()/b.getVirtualCores() - ); + float ratio = 0.0f; + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation aResourceInformation = a.getResourceInformation(i); + ResourceInformation bResourceInformation = b.getResourceInformation(i); + long bResourceValue = UnitsConversionUtil.convert( + bResourceInformation.getUnits(), aResourceInformation.getUnits(), + bResourceInformation.getValue()); + float tmp = (float) aResourceInformation.getValue() + / (float) bResourceValue; + ratio = ratio > tmp ? ratio : tmp; + } + return ratio; } @Override public Resource divideAndCeil(Resource numerator, int denominator) { - return Resources.createResource( - divideAndCeil(numerator.getMemorySize(), denominator), - divideAndCeil(numerator.getVirtualCores(), denominator) - ); + return divideAndCeil(numerator, (long) denominator); + } + + public Resource divideAndCeil(Resource numerator, long denominator) { + Resource ret = Resource.newInstance(numerator); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation resourceInformation = ret.getResourceInformation(i); + resourceInformation + .setValue(divideAndCeil(resourceInformation.getValue(), denominator)); + } + return ret; } @Override @@ -164,80 +423,136 @@ public Resource divideAndCeil(Resource numerator, float denominator) { @Override public Resource normalize(Resource r, Resource minimumResource, - Resource maximumResource, Resource stepFactor) { - if (stepFactor.getMemorySize() == 0 || stepFactor.getVirtualCores() == 0) { - Resource step = Resources.clone(stepFactor); - if (stepFactor.getMemorySize() == 0) { - LOG.error("Memory cannot be allocated in increments of zero. Assuming " - + minimumResource.getMemorySize() + "MB increment size. " - + "Please ensure the scheduler configuration is correct."); - step.setMemorySize(minimumResource.getMemorySize()); - } + Resource maximumResource, Resource stepFactor) { + Resource ret = Resource.newInstance(r); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation rResourceInformation = r.getResourceInformation(i); + ResourceInformation minimumResourceInformation = minimumResource + .getResourceInformation(i); + ResourceInformation maximumResourceInformation = maximumResource + .getResourceInformation(i); + ResourceInformation stepFactorResourceInformation = stepFactor + .getResourceInformation(i); + ResourceInformation tmp = ret.getResourceInformation(i); - if (stepFactor.getVirtualCores() == 0) { - LOG.error("VCore cannot be allocated in increments of zero. Assuming " - + minimumResource.getVirtualCores() + "VCores increment size. " - + "Please ensure the scheduler configuration is correct."); - step.setVirtualCores(minimumResource.getVirtualCores()); + long rValue = rResourceInformation.getValue(); + long minimumValue = UnitsConversionUtil.convert( + minimumResourceInformation.getUnits(), + rResourceInformation.getUnits(), + minimumResourceInformation.getValue()); + long maximumValue = UnitsConversionUtil.convert( + maximumResourceInformation.getUnits(), + rResourceInformation.getUnits(), + maximumResourceInformation.getValue()); + long stepFactorValue = UnitsConversionUtil.convert( + stepFactorResourceInformation.getUnits(), + rResourceInformation.getUnits(), + stepFactorResourceInformation.getValue()); + long value = Math.max(rValue, minimumValue); + if (stepFactorValue != 0) { + value = roundUp(value, stepFactorValue); } - - stepFactor = step; + tmp.setValue(Math.min(value, maximumValue)); + ret.setResourceInformation(i, tmp); } - - long normalizedMemory = Math.min( - roundUp( - Math.max(r.getMemorySize(), minimumResource.getMemorySize()), - stepFactor.getMemorySize()), - maximumResource.getMemorySize()); - int normalizedCores = Math.min( - roundUp( - Math.max(r.getVirtualCores(), minimumResource.getVirtualCores()), - stepFactor.getVirtualCores()), - maximumResource.getVirtualCores()); - return Resources.createResource(normalizedMemory, - normalizedCores); + return ret; } @Override public Resource roundUp(Resource r, Resource stepFactor) { - return Resources.createResource( - roundUp(r.getMemorySize(), stepFactor.getMemorySize()), - roundUp(r.getVirtualCores(), stepFactor.getVirtualCores()) - ); + return this.rounding(r, stepFactor, true); } @Override public Resource roundDown(Resource r, Resource stepFactor) { - return Resources.createResource( - roundDown(r.getMemorySize(), stepFactor.getMemorySize()), - roundDown(r.getVirtualCores(), stepFactor.getVirtualCores()) - ); + return this.rounding(r, stepFactor, false); + } + + private Resource rounding(Resource r, Resource stepFactor, boolean roundUp) { + Resource ret = Resource.newInstance(r); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation rResourceInformation = r.getResourceInformation(i); + ResourceInformation stepFactorResourceInformation = stepFactor + .getResourceInformation(i); + + long rValue = rResourceInformation.getValue(); + long stepFactorValue = UnitsConversionUtil.convert( + stepFactorResourceInformation.getUnits(), + rResourceInformation.getUnits(), + stepFactorResourceInformation.getValue()); + long value = rValue; + if (stepFactorValue != 0) { + value = roundUp + ? roundUp(rValue, stepFactorValue) + : roundDown(rValue, stepFactorValue); + } + ResourceInformation.copy(rResourceInformation, + ret.getResourceInformation(i)); + ret.getResourceInformation(i).setValue(value); + } + return ret; } @Override public Resource multiplyAndNormalizeUp(Resource r, double by, Resource stepFactor) { - return Resources.createResource( - roundUp((long) Math.ceil((float) (r.getMemorySize() * by)), - stepFactor.getMemorySize()), - roundUp((int) Math.ceil((float) (r.getVirtualCores() * by)), - stepFactor.getVirtualCores())); + return this.multiplyAndNormalize(r, by, stepFactor, true); } @Override public Resource multiplyAndNormalizeDown(Resource r, double by, Resource stepFactor) { - return Resources.createResource( - roundDown((long) (r.getMemorySize() * by), stepFactor.getMemorySize()), - roundDown((int) (r.getVirtualCores() * by), - stepFactor.getVirtualCores())); + return this.multiplyAndNormalize(r, by, stepFactor, false); + } + + private Resource multiplyAndNormalize(Resource r, double by, + Resource stepFactor, boolean roundUp) { + Resource ret = Resource.newInstance(r); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation rResourceInformation = r.getResourceInformation(i); + ResourceInformation stepFactorResourceInformation = stepFactor + .getResourceInformation(i); + ResourceInformation tmp = ret.getResourceInformation(i); + + long rValue = rResourceInformation.getValue(); + long stepFactorValue = UnitsConversionUtil.convert( + stepFactorResourceInformation.getUnits(), + rResourceInformation.getUnits(), + stepFactorResourceInformation.getValue()); + long value; + if (stepFactorValue != 0) { + value = roundUp + ? roundUp((long) Math.ceil((float) (rValue * by)), stepFactorValue) + : roundDown((long) (rValue * by), stepFactorValue); + } else { + value = roundUp + ? (long) Math.ceil((float) (rValue * by)) + : (long) (rValue * by); + } + tmp.setValue(value); + } + return ret; } @Override - public boolean fitsIn(Resource cluster, - Resource smaller, Resource bigger) { - return smaller.getMemorySize() <= bigger.getMemorySize() - && smaller.getVirtualCores() <= bigger.getVirtualCores(); + public boolean fitsIn(Resource cluster, Resource smaller, Resource bigger) { + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation sResourceInformation = smaller + .getResourceInformation(i); + ResourceInformation bResourceInformation = bigger + .getResourceInformation(i); + long sResourceValue = UnitsConversionUtil.convert( + sResourceInformation.getUnits(), bResourceInformation.getUnits(), + sResourceInformation.getValue()); + if (sResourceValue > bResourceInformation.getValue()) { + return false; + } + } + return true; } @Override diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java index 932fb821f4b..325bce465b0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java @@ -18,104 +18,124 @@ package org.apache.hadoop.yarn.util.resource; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.yarn.api.records.*; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; -@InterfaceAudience.LimitedPrivate({"YARN", "MapReduce"}) +/** + * Resources is a computation class which provides a set of apis to do + * mathematical operations on Resource object. + */ +@InterfaceAudience.LimitedPrivate({ "YARN", "MapReduce" }) @Unstable public class Resources { - - // Java doesn't have const :( - private static final Resource NONE = new Resource() { - @Override - @SuppressWarnings("deprecation") - public int getMemory() { - return 0; - } + private static final Log LOG = + LogFactory.getLog(Resources.class); - @Override - public long getMemorySize() { - return 0; + /** + * Helper class to create a resource with a fixed value for all resource + * types. For example, a NONE resource which returns 0 for any resource type. + */ + @InterfaceAudience.Private + @Unstable + static class FixedValueResource extends Resource { + + private final long resourceValue; + private String name; + + /** + * Constructor for a fixed value resource. + * @param rName the name of the resource + * @param value the fixed value to be returned for all resource types + */ + FixedValueResource(String rName, long value) { + this.resourceValue = value; + this.name = rName; + initResourceMap(); } - @Override - public void setMemorySize(long memory) { - throw new RuntimeException("NONE cannot be modified!"); + private int resourceValueToInt() { + if(this.resourceValue > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return Long.valueOf(this.resourceValue).intValue(); } @Override @SuppressWarnings("deprecation") - public void setMemory(int memory) { - throw new RuntimeException("NONE cannot be modified!"); + public int getMemory() { + return resourceValueToInt(); } @Override - public int getVirtualCores() { - return 0; + public long getMemorySize() { + return this.resourceValue; } @Override - public void setVirtualCores(int cores) { - throw new RuntimeException("NONE cannot be modified!"); + @SuppressWarnings("deprecation") + public void setMemory(int memory) { + throw new RuntimeException(name + " cannot be modified!"); } @Override - public int compareTo(Resource o) { - long diff = 0 - o.getMemorySize(); - if (diff == 0) { - diff = 0 - o.getVirtualCores(); - } - return Long.signum(diff); + public void setMemorySize(long memory) { + throw new RuntimeException(name + " cannot be modified!"); } - - }; - - private static final Resource UNBOUNDED = new Resource() { @Override - @SuppressWarnings("deprecation") - public int getMemory() { - return Integer.MAX_VALUE; + public int getVirtualCores() { + return resourceValueToInt(); } @Override - public long getMemorySize() { - return Long.MAX_VALUE; + public void setVirtualCores(int virtualCores) { + throw new RuntimeException(name + " cannot be modified!"); } @Override - @SuppressWarnings("deprecation") - public void setMemory(int memory) { - throw new RuntimeException("UNBOUNDED cannot be modified!"); + public void setResourceInformation(int index, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); } @Override - public void setMemorySize(long memory) { - throw new RuntimeException("UNBOUNDED cannot be modified!"); + public void setResourceValue(int index, long value) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); } @Override - public int getVirtualCores() { - return Integer.MAX_VALUE; + public void setResourceInformation(String resource, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); } @Override - public void setVirtualCores(int cores) { - throw new RuntimeException("UNBOUNDED cannot be modified!"); + public void setResourceValue(String resource, long value) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); } - @Override - public int compareTo(Resource o) { - long diff = Long.MAX_VALUE - o.getMemorySize(); - if (diff == 0) { - diff = Integer.MAX_VALUE - o.getVirtualCores(); + private void initResourceMap() { + ResourceInformation[] types = ResourceUtils.getResourceTypesArray(); + if (types != null) { + resources = new ResourceInformation[types.length]; + for (int index = 0; index < types.length; index++) { + resources[index] = ResourceInformation.newInstance(types[index]); + resources[index].setValue(resourceValue); + } } - return Long.signum(diff); } - - }; + } public static Resource createResource(int memory) { return createResource(memory, (memory > 0) ? 1 : 0); @@ -125,6 +145,11 @@ public static Resource createResource(int memory, int cores) { return Resource.newInstance(memory, cores); } + private static final Resource UNBOUNDED = + new FixedValueResource("UNBOUNDED", Long.MAX_VALUE); + + private static final Resource NONE = new FixedValueResource("NONE", 0L); + public static Resource createResource(long memory) { return createResource(memory, (memory > 0) ? 1 : 0); } @@ -152,12 +177,26 @@ public static Resource unbounded() { } public static Resource clone(Resource res) { - return createResource(res.getMemorySize(), res.getVirtualCores()); + return Resource.newInstance(res); } public static Resource addTo(Resource lhs, Resource rhs) { - lhs.setMemorySize(lhs.getMemorySize() + rhs.getMemorySize()); - lhs.setVirtualCores(lhs.getVirtualCores() + rhs.getVirtualCores()); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + lhs.setResourceValue(i, lhsValue.getValue() + convertedRhs); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -166,8 +205,22 @@ public static Resource add(Resource lhs, Resource rhs) { } public static Resource subtractFrom(Resource lhs, Resource rhs) { - lhs.setMemorySize(lhs.getMemorySize() - rhs.getMemorySize()); - lhs.setVirtualCores(lhs.getVirtualCores() - rhs.getVirtualCores()); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + lhs.setResourceValue(i, lhsValue.getValue() - convertedRhs); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -198,8 +251,16 @@ public static Resource negate(Resource resource) { } public static Resource multiplyTo(Resource lhs, double by) { - lhs.setMemorySize((long)(lhs.getMemorySize() * by)); - lhs.setVirtualCores((int)(lhs.getVirtualCores() * by)); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation lhsValue = lhs.getResourceInformation(i); + lhs.setResourceValue(i, (long) (lhsValue.getValue() * by)); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -213,9 +274,24 @@ public static Resource multiply(Resource lhs, double by) { */ public static Resource multiplyAndAddTo( Resource lhs, Resource rhs, double by) { - lhs.setMemorySize(lhs.getMemorySize() + (long)(rhs.getMemorySize() * by)); - lhs.setVirtualCores(lhs.getVirtualCores() - + (int)(rhs.getVirtualCores() * by)); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (long) (((rhsValue.getUnits() + .equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue())) + * by); + lhs.setResourceValue(i, lhsValue.getValue() + convertedRhs); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -231,8 +307,16 @@ public static Resource multiplyAndNormalizeDown( public static Resource multiplyAndRoundDown(Resource lhs, double by) { Resource out = clone(lhs); - out.setMemorySize((long)(lhs.getMemorySize() * by)); - out.setVirtualCores((int)(lhs.getVirtualCores() * by)); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation lhsValue = lhs.getResourceInformation(i); + out.setResourceValue(i, (long) (lhsValue.getValue() * by)); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return out; } @@ -332,8 +416,25 @@ public static Resource max( } public static boolean fitsIn(Resource smaller, Resource bigger) { - return smaller.getMemorySize() <= bigger.getMemorySize() && - smaller.getVirtualCores() <= bigger.getVirtualCores(); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = bigger.getResourceInformation(i); + ResourceInformation lhsValue = smaller.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + if (lhsValue.getValue() > convertedRhs) { + return false; + } + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } + return true; } public static boolean fitsIn(ResourceCalculator rc, Resource cluster, @@ -342,13 +443,51 @@ public static boolean fitsIn(ResourceCalculator rc, Resource cluster, } public static Resource componentwiseMin(Resource lhs, Resource rhs) { - return createResource(Math.min(lhs.getMemorySize(), rhs.getMemorySize()), - Math.min(lhs.getVirtualCores(), rhs.getVirtualCores())); + Resource ret = createResource(0); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + ResourceInformation outInfo = lhsValue.getValue() < convertedRhs + ? lhsValue + : rhsValue; + ret.setResourceInformation(i, outInfo); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } + return ret; } public static Resource componentwiseMax(Resource lhs, Resource rhs) { - return createResource(Math.max(lhs.getMemorySize(), rhs.getMemorySize()), - Math.max(lhs.getVirtualCores(), rhs.getVirtualCores())); + Resource ret = createResource(0); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + ResourceInformation outInfo = lhsValue.getValue() > convertedRhs + ? lhsValue + : rhsValue; + ret.setResourceInformation(i, outInfo); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } + return ret; } public static boolean isAnyMajorResourceZero(ResourceCalculator rc, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 3e5e5ca74d0..5392b397109 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -3455,6 +3455,45 @@ /confstore + + + When yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto specified, + YARN NodeManager needs to run GPU discovery binary (now only support + nvidia-smi) to get GPU-related information. + When value is empty (default), YARN NodeManager will try to locate + discovery executable itself. + An example of the config value is: /usr/local/bin/nvidia-smi + + yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables + + + + + + Enable additional discovery/isolation of resources on the NodeManager, + split by comma. By default, this is empty. Acceptable values: { "yarn-io/gpu" }. + + yarn.nodemanager.resource-plugins + + + + + + Specify GPU devices which can be managed by YARN NodeManager, split by comma + Number of GPU devices will be reported to RM to make scheduling decisions. + Set to auto (default) let YARN automatically discover GPU resource from + system. + Manually specify GPU devices if auto detect GPU device failed or admin + only want subset of GPU devices managed by YARN. GPU device is identified + by their minor device number. A common approach to get minor device number + of GPUs is using "nvidia-smi -q" and search "Minor Number" output. An + example of manual specification is "0,1,2,4" to allow YARN NodeManager + to manage GPU devices with minor number 0/1/2/4. + + yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices + auto + + The http address of the timeline reader web application. yarn.timeline-service.reader.webapp.address @@ -3479,4 +3518,13 @@ + + + yarn.resource-types + + + The resource types to be used for scheduling. Use resource-types.xml + to specify details about the individual resource types. + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java index 82170b31342..86946518db3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java @@ -37,6 +37,9 @@ @SuppressWarnings("checkstyle:visibilitymodifier") protected static HashMap typeValueCache = new HashMap(); + @SuppressWarnings("checkstyle:visibilitymodifier") + protected static HashMap> excludedPropertiesMap = + new HashMap<>(); private static Random rand = new Random(); private static byte [] bytes = new byte[] {'1', '2', '3', '4'}; @@ -167,6 +170,10 @@ public String toString() { private Map getGetSetPairs(Class recordClass) throws Exception { Map ret = new HashMap(); + List excluded = null; + if (excludedPropertiesMap.containsKey(recordClass.getClass())) { + excluded = excludedPropertiesMap.get(recordClass.getClass()); + } Method [] methods = recordClass.getDeclaredMethods(); // get all get methods for (int i = 0; i < methods.length; i++) { @@ -224,6 +231,11 @@ public String toString() { (gsp.setMethod == null)) { LOG.info(String.format("Exclude potential property: %s\n", gsp.propertyName)); itr.remove(); + } else if ((excluded != null && excluded.contains(gsp.propertyName))) { + LOG.info(String.format( + "Excluding potential property(present in exclusion list): %s\n", + gsp.propertyName)); + itr.remove(); } else { LOG.info(String.format("New property: %s type: %s", gsp.toString(), gsp.type)); gsp.testValue = genTypeValue(gsp.type); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java index 5998dd0bfd7..62a4fee0038 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java @@ -43,6 +43,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationMasterRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationMasterResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptsRequestPBImpl; @@ -141,9 +143,11 @@ import org.apache.hadoop.yarn.api.records.ReservationRequests; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceAllocationRequest; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.api.records.StrictPreemptionContract; @@ -182,12 +186,14 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ResourceOptionPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourceTypeInfoPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.StrictPreemptionContractPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.TokenPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.URLPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.UpdateContainerRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.YarnClusterMetricsPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; @@ -334,6 +340,7 @@ import org.junit.Test; import com.google.common.collect.ImmutableSet; +import java.util.Arrays; /** * Test class for YARN API protocol records. @@ -348,6 +355,8 @@ public static void setup() throws Exception { typeValueCache.put(SerializedException.class, SerializedException.newInstance(new IOException("exception for test"))); generateByNewInstance(ExecutionTypeRequest.class); + typeValueCache.put(ResourceInformation.class, ResourceInformation + .newInstance("localhost.test/sample", 1l)); generateByNewInstance(LogAggregationContext.class); generateByNewInstance(ApplicationId.class); generateByNewInstance(ApplicationAttemptId.class); @@ -411,6 +420,7 @@ public static void setup() throws Exception { generateByNewInstance(ContainerResourceIncreaseRequest.class); generateByNewInstance(QueueConfigurations.class); generateByNewInstance(CollectorInfo.class); + generateByNewInstance(ResourceTypeInfo.class); } @Test @@ -734,6 +744,8 @@ public void testApplicationReportPBImpl() throws Exception { @Test public void testApplicationResourceUsageReportPBImpl() throws Exception { + excludedPropertiesMap.put(ApplicationResourceUsageReportPBImpl.class.getClass(), + Arrays.asList("PreemptedResourceSecondsMap", "ResourceSecondsMap")); validatePBImplRecord(ApplicationResourceUsageReportPBImpl.class, ApplicationResourceUsageReportProto.class); } @@ -1156,4 +1168,22 @@ public void testExecutionTypeRequestPBImpl() throws Exception { validatePBImplRecord(ExecutionTypeRequestPBImpl.class, ExecutionTypeRequestProto.class); } + + @Test + public void testResourceTypesInfoPBImpl() throws Exception { + validatePBImplRecord(ResourceTypeInfoPBImpl.class, + YarnProtos.ResourceTypeInfoProto.class); + } + + @Test + public void testGetAllResourceTypesInfoRequestPBImpl() throws Exception { + validatePBImplRecord(GetAllResourceTypeInfoRequestPBImpl.class, + YarnServiceProtos.GetAllResourceTypeInfoRequestProto.class); + } + + @Test + public void testGetAllResourceTypesInfoResponsePBImpl() throws Exception { + validatePBImplRecord(GetAllResourceTypeInfoResponsePBImpl.class, + YarnServiceProtos.GetAllResourceTypeInfoResponseProto.class); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestResourcePBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestResourcePBImpl.java new file mode 100644 index 00000000000..569a7b74f8b --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestResourcePBImpl.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class to handle various proto related tests for resources. + */ +public class TestResourcePBImpl { + @Test + public void testEmptyResourcePBInit() throws Exception { + Resource res = new ResourcePBImpl(); + // Assert to check it sets resource value and unit to default. + Assert.assertEquals(0, res.getMemorySize()); + Assert.assertEquals(ResourceInformation.MEMORY_MB.getUnits(), + res.getResourceInformation(ResourceInformation.MEMORY_MB.getName()) + .getUnits()); + Assert.assertEquals(ResourceInformation.VCORES.getUnits(), + res.getResourceInformation(ResourceInformation.VCORES.getName()) + .getUnits()); + } + + @Test + public void testResourcePBInitFromOldPB() throws Exception { + YarnProtos.ResourceProto proto = + YarnProtos.ResourceProto.newBuilder().setMemory(1024).setVirtualCores(3) + .build(); + // Assert to check it sets resource value and unit to default. + Resource res = new ResourcePBImpl(proto); + Assert.assertEquals(1024, res.getMemorySize()); + Assert.assertEquals(3, res.getVirtualCores()); + Assert.assertEquals(ResourceInformation.MEMORY_MB.getUnits(), + res.getResourceInformation(ResourceInformation.MEMORY_MB.getName()) + .getUnits()); + Assert.assertEquals(ResourceInformation.VCORES.getUnits(), + res.getResourceInformation(ResourceInformation.VCORES.getName()) + .getUnits()); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java index b123b0520d4..5b4155cf845 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java @@ -21,15 +21,21 @@ import java.util.Arrays; import java.util.Collection; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; -import org.junit.Assert; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + @RunWith(Parameterized.class) public class TestResourceCalculator { - private ResourceCalculator resourceCalculator; + private final ResourceCalculator resourceCalculator; @Parameterized.Parameters public static Collection getParameters() { @@ -38,6 +44,20 @@ { new DominantResourceCalculator() } }); } + @Before + public void setupNoExtraResource() { + // This has to run before each test because we don't know when + // setupExtraResource() might be called + ResourceUtils.resetResourceTypes(new Configuration()); + } + + private static void setupExtraResource() { + Configuration conf = new Configuration(); + + conf.set(YarnConfiguration.RESOURCE_TYPES, "test"); + ResourceUtils.resetResourceTypes(conf); + } + public TestResourceCalculator(ResourceCalculator rs) { this.resourceCalculator = rs; } @@ -47,32 +67,177 @@ public void testFitsIn() { Resource cluster = Resource.newInstance(1024, 1); if (resourceCalculator instanceof DefaultResourceCalculator) { - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + assertTrue(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(2, 1))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + assertTrue(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(2, 2))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + assertTrue(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(1, 2))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + assertTrue(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(1, 1))); - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + assertFalse(resourceCalculator.fitsIn(cluster, Resource.newInstance(2, 1), Resource.newInstance(1, 2))); } else if (resourceCalculator instanceof DominantResourceCalculator) { - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + assertFalse(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(2, 1))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + assertTrue(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(2, 2))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + assertTrue(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(1, 2))); - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + assertFalse(resourceCalculator.fitsIn(cluster, Resource.newInstance(1, 2), Resource.newInstance(1, 1))); - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + assertFalse(resourceCalculator.fitsIn(cluster, Resource.newInstance(2, 1), Resource.newInstance(1, 2))); } } + private Resource newResource(long memory, int cpu) { + Resource res = Resource.newInstance(memory, cpu); + + return res; + } + + private Resource newResource(long memory, int cpu, int test) { + Resource res = newResource(memory, cpu); + + res.setResourceValue("test", test); + + return res; + } + + /** + * Test that the compare() method returns the expected result (0, -1, or 1). + * If the expected result is not 0, this method will also test the resources + * in the opposite order and check for the negative of the expected result. + * + * @param cluster the cluster resource + * @param res1 the LHS resource + * @param res2 the RHS resource + * @param expected the expected result + */ + private void assertComparison(Resource cluster, Resource res1, Resource res2, + int expected) { + int actual = resourceCalculator.compare(cluster, res1, res2); + + assertEquals(String.format("Resource comparison did not give the expected " + + "result for %s v/s %s", res1.toString(), res2.toString()), + expected, actual); + + if (expected != 0) { + // Try again with args in the opposite order and the negative of the + // expected result. + actual = resourceCalculator.compare(cluster, res2, res1); + assertEquals(String.format("Resource comparison did not give the " + + "expected result for %s v/s %s", res2.toString(), res1.toString()), + expected * -1, actual); + } + } + + @Test + public void testCompareWithOnlyMandatory() { + // This test is necessary because there are optimizations that are only + // triggered when only the mandatory resources are configured. + + // Keep cluster resources even so that the numbers are easy to understand + Resource cluster = newResource(4, 4); + + assertComparison(cluster, newResource(1, 1), newResource(1, 1), 0); + assertComparison(cluster, newResource(0, 0), newResource(0, 0), 0); + assertComparison(cluster, newResource(2, 2), newResource(1, 1), 1); + assertComparison(cluster, newResource(2, 2), newResource(0, 0), 1); + + if (resourceCalculator instanceof DefaultResourceCalculator) { + testCompareDefaultWithOnlyMandatory(cluster); + } else if (resourceCalculator instanceof DominantResourceCalculator) { + testCompareDominantWithOnlyMandatory(cluster); + } + } + + private void testCompareDefaultWithOnlyMandatory(Resource cluster) { + assertComparison(cluster, newResource(1, 1), newResource(1, 1), 0); + assertComparison(cluster, newResource(1, 2), newResource(1, 1), 0); + assertComparison(cluster, newResource(1, 1), newResource(1, 0), 0); + assertComparison(cluster, newResource(2, 1), newResource(1, 1), 1); + assertComparison(cluster, newResource(2, 1), newResource(1, 2), 1); + assertComparison(cluster, newResource(2, 1), newResource(1, 0), 1); + } + + private void testCompareDominantWithOnlyMandatory(Resource cluster) { + assertComparison(cluster, newResource(2, 1), newResource(2, 1), 0); + assertComparison(cluster, newResource(2, 1), newResource(1, 2), 0); + assertComparison(cluster, newResource(2, 1), newResource(1, 1), 1); + assertComparison(cluster, newResource(2, 2), newResource(2, 1), 1); + assertComparison(cluster, newResource(2, 2), newResource(1, 2), 1); + assertComparison(cluster, newResource(3, 1), newResource(3, 0), 1); + } + + @Test + public void testCompare() { + // Test with 3 resources + setupExtraResource(); + + // Keep cluster resources even so that the numbers are easy to understand + Resource cluster = newResource(4L, 4, 4); + + assertComparison(cluster, newResource(1, 1, 1), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(0, 0, 0), newResource(0, 0, 0), 0); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 1, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(0, 0, 0), 1); + + if (resourceCalculator instanceof DefaultResourceCalculator) { + testCompareDefault(cluster); + } else if (resourceCalculator instanceof DominantResourceCalculator) { + testCompareDominant(cluster); + } + } + + private void testCompareDefault(Resource cluster) { + assertComparison(cluster, newResource(1, 1, 2), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(1, 2, 1), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(1, 2, 2), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(1, 2, 2), newResource(1, 0, 0), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 1), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 2, 1), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 2), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 2, 2), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 0, 0), 1); + } + + private void testCompareDominant(Resource cluster) { + assertComparison(cluster, newResource(2, 1, 1), newResource(2, 1, 1), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 2, 1), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 2), 0); + assertComparison(cluster, newResource(2, 1, 0), newResource(0, 1, 2), 0); + assertComparison(cluster, newResource(2, 2, 1), newResource(1, 2, 2), 0); + assertComparison(cluster, newResource(2, 2, 1), newResource(2, 1, 2), 0); + assertComparison(cluster, newResource(2, 2, 1), newResource(2, 2, 1), 0); + assertComparison(cluster, newResource(2, 2, 0), newResource(2, 0, 2), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(3, 2, 1), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(3, 1, 2), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(1, 2, 3), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(1, 3, 2), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(2, 1, 3), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(2, 3, 1), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 1), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 0), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(2, 1, 1), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(1, 2, 1), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(1, 1, 2), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(0, 2, 2), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(2, 1, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 2, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 1, 2), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(2, 2, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(2, 1, 2), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 2, 2), 1); + assertComparison(cluster, newResource(3, 2, 1), newResource(2, 2, 2), 1); + assertComparison(cluster, newResource(3, 1, 1), newResource(2, 2, 2), 1); + assertComparison(cluster, newResource(3, 1, 1), newResource(3, 1, 0), 1); + assertComparison(cluster, newResource(3, 1, 1), newResource(3, 0, 0), 1); + } + @Test(timeout = 10000) - public void testResourceCalculatorCompareMethod() { + public void testCompareWithEmptyCluster() { Resource clusterResource = Resource.newInstance(0, 0); // For lhs == rhs @@ -126,27 +291,27 @@ private void assertResourcesOperations(Resource clusterResource, boolean greaterThan, boolean greaterThanOrEqual, Resource max, Resource min) { - Assert.assertEquals("Less Than operation is wrongly calculated.", lessThan, + assertEquals("Less Than operation is wrongly calculated.", lessThan, Resources.lessThan(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals( + assertEquals( "Less Than Or Equal To operation is wrongly calculated.", lessThanOrEqual, Resources.lessThanOrEqual(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals("Greater Than operation is wrongly calculated.", + assertEquals("Greater Than operation is wrongly calculated.", greaterThan, Resources.greaterThan(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals( + assertEquals( "Greater Than Or Equal To operation is wrongly calculated.", greaterThanOrEqual, Resources.greaterThanOrEqual(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals("Max(value) Operation wrongly calculated.", max, + assertEquals("Max(value) Operation wrongly calculated.", max, Resources.max(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals("Min(value) operation is wrongly calculated.", min, + assertEquals("Min(value) operation is wrongly calculated.", min, Resources.min(resourceCalculator, clusterResource, lhs, rhs)); } @@ -164,13 +329,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); - Assert.assertEquals(4, result.getVirtualCores()); + assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(4, result.getVirtualCores()); } // if resources asked are less than minimum resource, then normalize it to @@ -183,13 +348,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); - Assert.assertEquals(2, result.getVirtualCores()); + assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2, result.getVirtualCores()); } // if resources asked are larger than maximum resource, then normalize it to @@ -202,13 +367,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(8 * 1024, result.getMemorySize()); + assertEquals(8 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(8 * 1024, result.getMemorySize()); - Assert.assertEquals(8, result.getVirtualCores()); + assertEquals(8 * 1024, result.getMemorySize()); + assertEquals(8, result.getVirtualCores()); } // if increment is 0, use minimum resource as the increment resource. @@ -220,13 +385,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); - Assert.assertEquals(2, result.getVirtualCores()); + assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2, result.getVirtualCores()); } } } \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java new file mode 100644 index 00000000000..b5117056b30 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java @@ -0,0 +1,328 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util.resource; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +/** + * Test class to verify all resource utility methods. + */ +public class TestResourceUtils { + public static final String TEST_CONF_RESET_RESOURCE_TYPES = + "yarn.test.reset-resource-types"; + + static class ResourceFileInformation { + String filename; + int resourceCount; + Map resourceNameUnitsMap; + + public ResourceFileInformation(String name, int count) { + filename = name; + resourceCount = count; + resourceNameUnitsMap = new HashMap<>(); + } + } + + public static void addNewTypesToResources(String... resourceTypes) { + // Initialize resource map + Map riMap = new HashMap<>(); + + // Initialize mandatory resources + riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB); + riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES); + + for (String newResource : resourceTypes) { + riMap.put(newResource, ResourceInformation + .newInstance(newResource, "", 0, ResourceTypes.COUNTABLE, 0, + Integer.MAX_VALUE)); + } + + ResourceUtils.initializeResourcesFromResourceInformationMap(riMap); + } + + @Before + public void setup() { + ResourceUtils.resetResourceTypes(); + } + + @After + public void teardown() { + Configuration conf = new YarnConfiguration(); + File source = new File( + conf.getClassLoader().getResource("resource-types-1.xml").getFile()); + File dest = new File(source.getParent(), "resource-types.xml"); + if (dest.exists()) { + dest.delete(); + } + } + + private void testMemoryAndVcores(Map res) { + String memory = ResourceInformation.MEMORY_MB.getName(); + String vcores = ResourceInformation.VCORES.getName(); + Assert.assertTrue("Resource 'memory' missing", res.containsKey(memory)); + Assert.assertEquals("'memory' units incorrect", + ResourceInformation.MEMORY_MB.getUnits(), res.get(memory).getUnits()); + Assert.assertEquals("'memory' types incorrect", + ResourceInformation.MEMORY_MB.getResourceType(), + res.get(memory).getResourceType()); + Assert.assertTrue("Resource 'vcores' missing", res.containsKey(vcores)); + Assert.assertEquals("'vcores' units incorrect", + ResourceInformation.VCORES.getUnits(), res.get(vcores).getUnits()); + Assert.assertEquals("'vcores' type incorrect", + ResourceInformation.VCORES.getResourceType(), + res.get(vcores).getResourceType()); + } + + @Test + public void testGetResourceTypes() throws Exception { + + Map res = ResourceUtils.getResourceTypes(); + Assert.assertEquals(2, res.size()); + testMemoryAndVcores(res); + } + + @Test + public void testGetResourceTypesConfigs() throws Exception { + + Configuration conf = new YarnConfiguration(); + + ResourceFileInformation testFile1 = + new ResourceFileInformation("resource-types-1.xml", 2); + ResourceFileInformation testFile2 = + new ResourceFileInformation("resource-types-2.xml", 3); + testFile2.resourceNameUnitsMap.put("resource1", "G"); + ResourceFileInformation testFile3 = + new ResourceFileInformation("resource-types-3.xml", 3); + testFile3.resourceNameUnitsMap.put("resource2", ""); + ResourceFileInformation testFile4 = + new ResourceFileInformation("resource-types-4.xml", 5); + testFile4.resourceNameUnitsMap.put("resource1", "G"); + testFile4.resourceNameUnitsMap.put("resource2", "m"); + testFile4.resourceNameUnitsMap.put("yarn.io/gpu", ""); + + ResourceFileInformation[] tests = {testFile1, testFile2, testFile3, + testFile4}; + Map res; + for (ResourceFileInformation testInformation : tests) { + ResourceUtils.resetResourceTypes(); + File source = new File( + conf.getClassLoader().getResource(testInformation.filename) + .getFile()); + File dest = new File(source.getParent(), "resource-types.xml"); + FileUtils.copyFile(source, dest); + res = ResourceUtils.getResourceTypes(); + testMemoryAndVcores(res); + Assert.assertEquals(testInformation.resourceCount, res.size()); + for (Map.Entry entry : testInformation.resourceNameUnitsMap + .entrySet()) { + String resourceName = entry.getKey(); + Assert.assertTrue("Missing key " + resourceName, + res.containsKey(resourceName)); + Assert.assertEquals(entry.getValue(), res.get(resourceName).getUnits()); + } + dest.delete(); + } + } + + @Test + public void testGetResourceTypesConfigErrors() throws Exception { + Configuration conf = new YarnConfiguration(); + + String[] resourceFiles = {"resource-types-error-1.xml", + "resource-types-error-2.xml", "resource-types-error-3.xml", + "resource-types-error-4.xml"}; + for (String resourceFile : resourceFiles) { + ResourceUtils.resetResourceTypes(); + File dest = null; + try { + File source = + new File(conf.getClassLoader().getResource(resourceFile).getFile()); + dest = new File(source.getParent(), "resource-types.xml"); + FileUtils.copyFile(source, dest); + ResourceUtils.getResourceTypes(); + Assert.fail("Expected error with file " + resourceFile); + } catch (NullPointerException ne) { + throw ne; + } catch (Exception e) { + if (dest != null) { + dest.delete(); + } + } + } + } + + @Test + public void testInitializeResourcesMap() throws Exception { + String[] empty = {"", ""}; + String[] res1 = {"resource1", "m"}; + String[] res2 = {"resource2", "G"}; + String[][] test1 = {empty}; + String[][] test2 = {res1}; + String[][] test3 = {res2}; + String[][] test4 = {res1, res2}; + + String[][][] allTests = {test1, test2, test3, test4}; + + for (String[][] test : allTests) { + + Configuration conf = new YarnConfiguration(); + String resSt = ""; + for (String[] resources : test) { + resSt += (resources[0] + ","); + } + resSt = resSt.substring(0, resSt.length() - 1); + conf.set(YarnConfiguration.RESOURCE_TYPES, resSt); + for (String[] resources : test) { + String name = + YarnConfiguration.RESOURCE_TYPES + "." + resources[0] + ".units"; + conf.set(name, resources[1]); + } + Map ret = + ResourceUtils.resetResourceTypes(conf); + + // for test1, 4 - length will be 1, 4 + // for the others, len will be 3 + int len = 3; + if (test == test1) { + len = 2; + } else if (test == test4) { + len = 4; + } + + Assert.assertEquals(len, ret.size()); + for (String[] resources : test) { + if (resources[0].length() == 0) { + continue; + } + Assert.assertTrue(ret.containsKey(resources[0])); + ResourceInformation resInfo = ret.get(resources[0]); + Assert.assertEquals(resources[1], resInfo.getUnits()); + Assert.assertEquals(ResourceTypes.COUNTABLE, resInfo.getResourceType()); + } + // we must always have memory and vcores with their fixed units + Assert.assertTrue(ret.containsKey("memory-mb")); + ResourceInformation memInfo = ret.get("memory-mb"); + Assert.assertEquals("Mi", memInfo.getUnits()); + Assert.assertEquals(ResourceTypes.COUNTABLE, memInfo.getResourceType()); + Assert.assertTrue(ret.containsKey("vcores")); + ResourceInformation vcoresInfo = ret.get("vcores"); + Assert.assertEquals("", vcoresInfo.getUnits()); + Assert + .assertEquals(ResourceTypes.COUNTABLE, vcoresInfo.getResourceType()); + } + } + + @Test + public void testInitializeResourcesMapErrors() throws Exception { + + String[] mem1 = {"memory-mb", ""}; + String[] vcores1 = {"vcores", "M"}; + + String[] mem2 = {"memory-mb", "m"}; + String[] vcores2 = {"vcores", "G"}; + + String[] mem3 = {"memory", ""}; + + String[][] test1 = {mem1, vcores1}; + String[][] test2 = {mem2, vcores2}; + String[][] test3 = {mem3}; + + String[][][] allTests = {test1, test2, test3}; + + for (String[][] test : allTests) { + + Configuration conf = new YarnConfiguration(); + String resSt = ""; + for (String[] resources : test) { + resSt += (resources[0] + ","); + } + resSt = resSt.substring(0, resSt.length() - 1); + conf.set(YarnConfiguration.RESOURCE_TYPES, resSt); + for (String[] resources : test) { + String name = + YarnConfiguration.RESOURCE_TYPES + "." + resources[0] + ".units"; + conf.set(name, resources[1]); + } + try { + ResourceUtils.initializeResourcesMap(conf); + Assert.fail("resource map initialization should fail"); + } catch (Exception e) { + // do nothing + } + } + } + + @Test + public void testGetResourceInformation() throws Exception { + + Configuration conf = new YarnConfiguration(); + Map testRun = new HashMap<>(); + setupResourceTypes(conf, "resource-types-4.xml"); + // testRun.put("node-resources-1.xml", Resource.newInstance(1024, 1)); + Resource test3Resources = Resource.newInstance(1024, 1); + test3Resources.setResourceInformation("resource1", + ResourceInformation.newInstance("resource1", "Gi", 5L)); + test3Resources.setResourceInformation("resource2", + ResourceInformation.newInstance("resource2", "m", 2L)); + test3Resources.setResourceInformation("yarn.io/gpu", + ResourceInformation.newInstance("yarn.io/gpu", "", 1)); + testRun.put("node-resources-2.xml", test3Resources); + + for (Map.Entry entry : testRun.entrySet()) { + String resourceFile = entry.getKey(); + ResourceUtils.resetNodeResources(); + File dest; + File source = new File( + conf.getClassLoader().getResource(resourceFile).getFile()); + dest = new File(source.getParent(), "node-resources.xml"); + FileUtils.copyFile(source, dest); + Map actual = ResourceUtils + .getNodeResourceInformation(conf); + Assert.assertEquals(actual.size(), + entry.getValue().getResources().length); + for (ResourceInformation resInfo : entry.getValue().getResources()) { + Assert.assertEquals(resInfo, actual.get(resInfo.getName())); + } + dest.delete(); + } + } + + public static String setupResourceTypes(Configuration conf, String filename) + throws Exception { + File source = new File( + conf.getClassLoader().getResource(filename).getFile()); + File dest = new File(source.getParent(), "resource-types.xml"); + FileUtils.copyFile(source, dest); + ResourceUtils.getResourceTypes(); + return dest.getAbsolutePath(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java index d79179ac0d9..a8404fbaee7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java @@ -18,35 +18,102 @@ package org.apache.hadoop.yarn.util.resource; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.After; +import org.junit.Before; import org.junit.Test; +import java.io.File; + +import static org.apache.hadoop.yarn.util.resource.Resources.componentwiseMin; +import static org.apache.hadoop.yarn.util.resource.Resources.componentwiseMax; +import static org.apache.hadoop.yarn.util.resource.Resources.add; +import static org.apache.hadoop.yarn.util.resource.Resources.subtract; +import static org.apache.hadoop.yarn.util.resource.Resources.multiply; +import static org.apache.hadoop.yarn.util.resource.Resources.multiplyAndAddTo; +import static org.apache.hadoop.yarn.util.resource.Resources.multiplyAndRoundDown; +import static org.apache.hadoop.yarn.util.resource.Resources.fitsIn; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class TestResources { - + + static class ExtendedResources extends Resources { + public static Resource unbounded() { + return new FixedValueResource("UNBOUNDED", Long.MAX_VALUE); + } + + public static Resource none() { + return new FixedValueResource("NONE", 0L); + } + } + + private static final String EXTRA_RESOURCE_TYPE = "resource2"; + private String resourceTypesFile; + + private void setupExtraResourceType() throws Exception { + Configuration conf = new YarnConfiguration(); + resourceTypesFile = + TestResourceUtils.setupResourceTypes(conf, "resource-types-3.xml"); + } + + private void unsetExtraResourceType() { + deleteResourceTypesFile(); + ResourceUtils.resetResourceTypes(); + } + + private void deleteResourceTypesFile() { + if (resourceTypesFile != null && !resourceTypesFile.isEmpty()) { + File resourceFile = new File(resourceTypesFile); + resourceFile.delete(); + } + } + + @Before + public void setup() throws Exception { + setupExtraResourceType(); + } + + @After + public void teardown() { + deleteResourceTypesFile(); + } + public Resource createResource(long memory, int vCores) { return Resource.newInstance(memory, vCores); } - @Test(timeout=10000) + public Resource createResource(long memory, int vCores, long resource2) { + Resource ret = Resource.newInstance(memory, vCores); + ret.setResourceInformation(EXTRA_RESOURCE_TYPE, + ResourceInformation.newInstance(EXTRA_RESOURCE_TYPE, resource2)); + return ret; + } + + @Test(timeout = 10000) public void testCompareToWithUnboundedResource() { - assertTrue(Resources.unbounded().compareTo( - createResource(Long.MAX_VALUE, Integer.MAX_VALUE)) == 0); - assertTrue(Resources.unbounded().compareTo( - createResource(Long.MAX_VALUE, 0)) > 0); - assertTrue(Resources.unbounded().compareTo( - createResource(0, Integer.MAX_VALUE)) > 0); + unsetExtraResourceType(); + Resource unboundedClone = Resources.clone(ExtendedResources.unbounded()); + assertTrue(unboundedClone + .compareTo(createResource(Long.MAX_VALUE, Integer.MAX_VALUE)) == 0); + assertTrue(unboundedClone.compareTo(createResource(Long.MAX_VALUE, 0)) > 0); + assertTrue( + unboundedClone.compareTo(createResource(0, Integer.MAX_VALUE)) > 0); } - @Test(timeout=10000) + @Test(timeout = 10000) public void testCompareToWithNoneResource() { assertTrue(Resources.none().compareTo(createResource(0, 0)) == 0); - assertTrue(Resources.none().compareTo( - createResource(1, 0)) < 0); - assertTrue(Resources.none().compareTo( - createResource(0, 1)) < 0); + assertTrue(Resources.none().compareTo(createResource(1, 0)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 1)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 0, 0)) == 0); + assertTrue(Resources.none().compareTo(createResource(1, 0, 0)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 1, 0)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 0, 1)) < 0); } @Test(timeout=10000) @@ -69,4 +136,131 @@ public void testMultipleRoundUp() { assertEquals(memoryErrorMsg, result.getMemorySize(), 0); assertEquals(vcoreErrorMsg, result.getVirtualCores(), 0); } + + @Test(timeout = 1000) + public void testFitsIn() { + assertTrue(fitsIn(createResource(1, 1), createResource(2, 2))); + assertTrue(fitsIn(createResource(2, 2), createResource(2, 2))); + assertFalse(fitsIn(createResource(2, 2), createResource(1, 1))); + assertFalse(fitsIn(createResource(1, 2), createResource(2, 1))); + assertFalse(fitsIn(createResource(2, 1), createResource(1, 2))); + assertTrue(fitsIn(createResource(1, 1, 1), createResource(2, 2, 2))); + assertTrue(fitsIn(createResource(1, 1, 0), createResource(2, 2, 0))); + assertTrue(fitsIn(createResource(1, 1, 1), createResource(2, 2, 2))); + } + + @Test(timeout = 1000) + public void testComponentwiseMin() { + assertEquals(createResource(1, 1), + componentwiseMin(createResource(1, 1), createResource(2, 2))); + assertEquals(createResource(1, 1), + componentwiseMin(createResource(2, 2), createResource(1, 1))); + assertEquals(createResource(1, 1), + componentwiseMin(createResource(1, 2), createResource(2, 1))); + assertEquals(createResource(1, 1, 1), + componentwiseMin(createResource(1, 1, 1), createResource(2, 2, 2))); + assertEquals(createResource(1, 1, 0), + componentwiseMin(createResource(2, 2, 2), createResource(1, 1))); + assertEquals(createResource(1, 1, 2), + componentwiseMin(createResource(1, 2, 2), createResource(2, 1, 3))); + } + + @Test + public void testComponentwiseMax() { + assertEquals(createResource(2, 2), + componentwiseMax(createResource(1, 1), createResource(2, 2))); + assertEquals(createResource(2, 2), + componentwiseMax(createResource(2, 2), createResource(1, 1))); + assertEquals(createResource(2, 2), + componentwiseMax(createResource(1, 2), createResource(2, 1))); + assertEquals(createResource(2, 2, 2), + componentwiseMax(createResource(1, 1, 1), createResource(2, 2, 2))); + assertEquals(createResource(2, 2, 2), + componentwiseMax(createResource(2, 2, 2), createResource(1, 1))); + assertEquals(createResource(2, 2, 3), + componentwiseMax(createResource(1, 2, 2), createResource(2, 1, 3))); + assertEquals(createResource(2, 2, 1), + componentwiseMax(createResource(2, 2, 0), createResource(2, 1, 1))); + } + + @Test + public void testAdd() { + assertEquals(createResource(2, 3), + add(createResource(1, 1), createResource(1, 2))); + assertEquals(createResource(3, 2), + add(createResource(1, 1), createResource(2, 1))); + assertEquals(createResource(2, 2, 0), + add(createResource(1, 1, 0), createResource(1, 1, 0))); + assertEquals(createResource(2, 2, 3), + add(createResource(1, 1, 1), createResource(1, 1, 2))); + } + + @Test + public void testSubtract() { + assertEquals(createResource(1, 0), + subtract(createResource(2, 1), createResource(1, 1))); + assertEquals(createResource(0, 1), + subtract(createResource(1, 2), createResource(1, 1))); + assertEquals(createResource(2, 2, 0), + subtract(createResource(3, 3, 0), createResource(1, 1, 0))); + assertEquals(createResource(1, 1, 2), + subtract(createResource(2, 2, 3), createResource(1, 1, 1))); + } + + @Test + public void testClone() { + assertEquals(createResource(1, 1), Resources.clone(createResource(1, 1))); + assertEquals(createResource(1, 1, 0), + Resources.clone(createResource(1, 1))); + assertEquals(createResource(1, 1), + Resources.clone(createResource(1, 1, 0))); + assertEquals(createResource(1, 1, 2), + Resources.clone(createResource(1, 1, 2))); + } + + @Test + public void testMultiply() { + assertEquals(createResource(4, 2), multiply(createResource(2, 1), 2)); + assertEquals(createResource(4, 2, 0), multiply(createResource(2, 1), 2)); + assertEquals(createResource(2, 4), multiply(createResource(1, 2), 2)); + assertEquals(createResource(2, 4, 0), multiply(createResource(1, 2), 2)); + assertEquals(createResource(6, 6, 0), multiply(createResource(3, 3, 0), 2)); + assertEquals(createResource(4, 4, 6), multiply(createResource(2, 2, 3), 2)); + } + + @Test + public void testMultiplyAndRoundDown() { + assertEquals(createResource(4, 1), + multiplyAndRoundDown(createResource(3, 1), 1.5)); + assertEquals(createResource(4, 1, 0), + multiplyAndRoundDown(createResource(3, 1), 1.5)); + assertEquals(createResource(1, 4), + multiplyAndRoundDown(createResource(1, 3), 1.5)); + assertEquals(createResource(1, 4, 0), + multiplyAndRoundDown(createResource(1, 3), 1.5)); + assertEquals(createResource(7, 7, 0), + multiplyAndRoundDown(createResource(3, 3, 0), 2.5)); + assertEquals(createResource(2, 2, 7), + multiplyAndRoundDown(createResource(1, 1, 3), 2.5)); + } + + @Test + public void testMultiplyAndAddTo() throws Exception { + unsetExtraResourceType(); + setupExtraResourceType(); + assertEquals(createResource(6, 4), + multiplyAndAddTo(createResource(3, 1), createResource(2, 2), 1.5)); + assertEquals(createResource(6, 4, 0), + multiplyAndAddTo(createResource(3, 1), createResource(2, 2), 1.5)); + assertEquals(createResource(4, 7), + multiplyAndAddTo(createResource(1, 1), createResource(2, 4), 1.5)); + assertEquals(createResource(4, 7, 0), + multiplyAndAddTo(createResource(1, 1), createResource(2, 4), 1.5)); + assertEquals(createResource(6, 4, 0), + multiplyAndAddTo(createResource(3, 1, 0), createResource(2, 2, 0), + 1.5)); + assertEquals(createResource(6, 4, 6), + multiplyAndAddTo(createResource(3, 1, 2), createResource(2, 2, 3), + 1.5)); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-1.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-1.xml new file mode 100644 index 00000000000..f00573e3077 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-1.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.nodemanager.resource.memory-mb + 1024 + + + + yarn.nodemanager.resource.vcores + 1 + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-2.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-2.xml new file mode 100644 index 00000000000..382d5dd2ca2 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-2.xml @@ -0,0 +1,44 @@ + + + + + + + + yarn.nodemanager.resource-type.memory-mb + 1024Mi + + + + yarn.nodemanager.resource-type.vcores + 1 + + + + yarn.nodemanager.resource-type.resource1 + 5Gi + + + + yarn.nodemanager.resource-type.resource2 + 2m + + + + yarn.nodemanager.resource-type.yarn.io/gpu + 1 + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-1.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-1.xml new file mode 100644 index 00000000000..3ec106dfbb2 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-1.xml @@ -0,0 +1,18 @@ + + + + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-2.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-2.xml new file mode 100644 index 00000000000..6e5885ed7d7 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-2.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.resource-types + resource1 + + + + yarn.resource-types.resource1.units + G + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-3.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-3.xml new file mode 100644 index 00000000000..8fd6fefa8f1 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-3.xml @@ -0,0 +1,24 @@ + + + + + + + + yarn.resource-types + resource2 + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-4.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-4.xml new file mode 100644 index 00000000000..ea8d2bdb8f2 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-4.xml @@ -0,0 +1,39 @@ + + + + + + + + yarn.resource-types + resource1,resource2,yarn.io/gpu + + + + yarn.resource-types.resource1.units + G + + + + yarn.resource-types.resource2.units + m + + + + yarn.resource-types.yarn.io/gpu.units + + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-1.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-1.xml new file mode 100644 index 00000000000..d1942f2c97f --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-1.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.resource-types + memory-mb,resource1 + + + + yarn.resource-types.resource1.calculator-units + G + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-2.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-2.xml new file mode 100644 index 00000000000..fa43b6c14ef --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-2.xml @@ -0,0 +1,33 @@ + + + + + + + + yarn.resource-types + vcores,resource1 + + + + yarn.resource-types.resource1.calculator-units + G + + + + yarn.resource-types.vcores.units + Az + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-3.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-3.xml new file mode 100644 index 00000000000..539d657692e --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-3.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.resource-types + resource1,resource1 + + + + yarn.resource-types.resource1.calculator-units + A + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-4.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-4.xml new file mode 100644 index 00000000000..c8eb7662097 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-4.xml @@ -0,0 +1,24 @@ + + + + + + + + yarn.resource-types + memory,resource1 + + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java index ca78f063c65..a67ada4e33f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java @@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities; @@ -342,9 +343,20 @@ private static ApplicationReportExt convertToApplicationReport( ApplicationMetricsConstants.APP_MEM_PREEMPT_METRICS); long preemptedVcoreSeconds = parseLong(entityInfo, ApplicationMetricsConstants.APP_CPU_PREEMPT_METRICS); - appResources = ApplicationResourceUsageReport.newInstance(0, 0, null, - null, null, memorySeconds, vcoreSeconds, 0, 0, - preemptedMemorySeconds, preemptedVcoreSeconds); + Map resourceSecondsMap = new HashMap<>(); + Map preemptedResoureSecondsMap = new HashMap<>(); + resourceSecondsMap + .put(ResourceInformation.MEMORY_MB.getName(), memorySeconds); + resourceSecondsMap + .put(ResourceInformation.VCORES.getName(), vcoreSeconds); + preemptedResoureSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), + preemptedMemorySeconds); + preemptedResoureSecondsMap + .put(ResourceInformation.VCORES.getName(), preemptedVcoreSeconds); + + appResources = ApplicationResourceUsageReport + .newInstance(0, 0, null, null, null, resourceSecondsMap, 0, 0, + preemptedResoureSecondsMap); } if (entityInfo.containsKey(ApplicationMetricsConstants.APP_TAGS_INFO)) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java index 27ee855965c..0f0d8b1b729 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java @@ -65,8 +65,6 @@ import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnApplicationState; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; @@ -436,12 +434,12 @@ public static ApplicationSubmissionContext newApplicationSubmissionContext( queue, priority, amContainer, isUnmanagedAM, cancelTokensWhenComplete, maxAppAttempts, resource, null); } - + public static ApplicationResourceUsageReport newApplicationResourceUsageReport( int numUsedContainers, int numReservedContainers, Resource usedResources, - Resource reservedResources, Resource neededResources, long memorySeconds, - long vcoreSeconds, long preemptedMemorySeconds, - long preemptedVcoreSeconds) { + Resource reservedResources, Resource neededResources, + Map resourceSecondsMap, + Map preemptedResourceSecondsMap) { ApplicationResourceUsageReport report = recordFactory.newRecordInstance(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -449,10 +447,8 @@ public static ApplicationResourceUsageReport newApplicationResourceUsageReport( report.setUsedResources(usedResources); report.setReservedResources(reservedResources); report.setNeededResources(neededResources); - report.setMemorySeconds(memorySeconds); - report.setVcoreSeconds(vcoreSeconds); - report.setPreemptedMemorySeconds(preemptedMemorySeconds); - report.setPreemptedVcoreSeconds(preemptedVcoreSeconds); + report.setResourceSecondsMap(resourceSecondsMap); + report.setPreemptedResourceSecondsMap(preemptedResourceSecondsMap); return report; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java index df75f81e54a..39a10922607 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java @@ -41,6 +41,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -896,4 +898,10 @@ public RefreshClusterMaxPriorityResponse refreshClusterMaxPriority( return new String[0]; } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return null; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt index 0b1c3e9d805..e9f8aff2f46 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt @@ -101,9 +101,11 @@ add_library(container main/native/container-executor/impl/container-executor.c main/native/container-executor/impl/get_executable.c main/native/container-executor/impl/utils/string-utils.c + main/native/container-executor/impl/utils/docker-util.c main/native/container-executor/impl/utils/path-utils.c + main/native/container-executor/impl/modules/cgroups/cgroups-operations.c main/native/container-executor/impl/modules/common/module-configs.c - main/native/container-executor/impl/utils/docker-util.c + main/native/container-executor/impl/modules/gpu/gpu-module.c ) add_executable(container-executor @@ -135,6 +137,8 @@ add_executable(cetest main/native/container-executor/test/utils/test-string-utils.cc main/native/container-executor/test/utils/test-path-utils.cc main/native/container-executor/test/test_util.cc - main/native/container-executor/test/utils/test_docker_util.cc) + main/native/container-executor/test/utils/test_docker_util.cc + main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc + main/native/container-executor/test/modules/gpu/test-gpu-module.cc) target_link_libraries(cetest gtest container) output_directory(cetest test) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index 1851a1d8f5f..5f13bb445af 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -112,9 +112,10 @@ public Configuration getConf() { * Run the executor initialization steps. * Verify that the necessary configs and permissions are in place. * + * @param nmContext Context of NM * @throws IOException if initialization fails */ - public abstract void init() throws IOException; + public abstract void init(Context nmContext) throws IOException; /** * This function localizes the JAR file on-demand. diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 33cefea91f5..7e160346428 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -122,4 +123,6 @@ void setNMTimelinePublisher(NMTimelinePublisher nmMetricsPublisher); NMTimelinePublisher getNMTimelinePublisher(); + + ResourcePluginManager getResourcePluginManager(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java index b54b7f59664..e659c3e9897 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java @@ -134,7 +134,7 @@ protected void setScriptExecutable(Path script, String owner) } @Override - public void init() throws IOException { + public void init(Context nmContext) throws IOException { // nothing to do or verify here } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java index a044cb66a5c..6c2eb967ab6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java @@ -117,7 +117,7 @@ protected void copyFile(Path src, Path dst, String owner) throws IOException { } @Override - public void init() throws IOException { + public void init(Context nmContext) throws IOException { String auth = getConf().get(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION); if (auth != null && !auth.equals("simple")) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index c33d4be59a8..a1ec8206cb0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -281,7 +282,7 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { } @Override - public void init() throws IOException { + public void init(Context nmContext) throws IOException { Configuration conf = super.getConf(); // Send command to executor which will just start up, @@ -305,7 +306,7 @@ public void init() throws IOException { try { resourceHandlerChain = ResourceHandlerModule - .getConfiguredResourceHandlerChain(conf); + .getConfiguredResourceHandlerChain(conf, nmContext); if (LOG.isDebugEnabled()) { LOG.debug("Resource handler chain enabled = " + (resourceHandlerChain != null)); @@ -845,4 +846,9 @@ public void mountCgroups(List cgroupKVs, String hierarchy) e); } } + + @VisibleForTesting + public ResourceHandler getResourceHandler() { + return resourceHandlerChain; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index fcb5474e51f..536ac3a05e3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -18,23 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; -import org.apache.hadoop.yarn.state.MultiStateTransitionListener; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -65,12 +49,16 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; import org.apache.hadoop.yarn.server.api.records.AppCollectorData; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager; import org.apache.hadoop.yarn.server.nodemanager.collectormanager.NMCollectorService; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider; @@ -78,14 +66,25 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; -import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer; +import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.apache.hadoop.yarn.state.MultiStateTransitionListener; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicBoolean; public class NodeManager extends CompositeService implements EventHandler { @@ -332,6 +331,18 @@ public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration con nmCheckintervalTime, scriptTimeout, scriptArgs); } + @VisibleForTesting + protected ResourcePluginManager createResourcePluginManager() { + return new ResourcePluginManager(); + } + + @VisibleForTesting + protected ContainerExecutor createContainerExecutor(Configuration conf) { + return ReflectionUtils.newInstance( + conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, + DefaultContainerExecutor.class, ContainerExecutor.class), conf); + } + @Override protected void serviceInit(Configuration conf) throws Exception { @@ -360,11 +371,22 @@ protected void serviceInit(Configuration conf) throws Exception { this.aclsManager = new ApplicationACLsManager(conf); - ContainerExecutor exec = ReflectionUtils.newInstance( - conf.getClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, - DefaultContainerExecutor.class, ContainerExecutor.class), conf); + this.dirsHandler = new LocalDirsHandlerService(metrics); + + boolean isDistSchedulingEnabled = + conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); + + this.context = createNMContext(containerTokenSecretManager, + nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf); + + ResourcePluginManager pluginManager = createResourcePluginManager(); + pluginManager.initialize(context); + ((NMContext)context).setResourcePluginManager(pluginManager); + + ContainerExecutor exec = createContainerExecutor(conf); try { - exec.init(); + exec.init(context); } catch (IOException e) { throw new YarnRuntimeException("Failed to initialize container executor", e); } @@ -374,19 +396,11 @@ protected void serviceInit(Configuration conf) throws Exception { // NodeManager level dispatcher this.dispatcher = createNMDispatcher(); - dirsHandler = new LocalDirsHandlerService(metrics); nodeHealthChecker = new NodeHealthCheckerService( getNodeHealthScriptRunner(conf), dirsHandler); addService(nodeHealthChecker); - boolean isDistSchedulingEnabled = - conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, - YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); - - this.context = createNMContext(containerTokenSecretManager, - nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf); - ((NMContext)context).setContainerExecutor(exec); @@ -460,6 +474,12 @@ protected void serviceStop() throws Exception { try { super.serviceStop(); DefaultMetricsSystem.shutdown(); + + // Cleanup ResourcePluginManager + ResourcePluginManager rpm = context.getResourcePluginManager(); + if (rpm != null) { + rpm.cleanup(); + } } finally { // YARN-3641: NM's services stop get failed shouldn't block the // release of NMLevelDBStore. @@ -607,6 +627,8 @@ protected void reregisterCollectors() { private NMTimelinePublisher nmTimelinePublisher; + private ResourcePluginManager resourcePluginManager; + public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager, @@ -807,6 +829,15 @@ public void setNMTimelinePublisher(NMTimelinePublisher nmMetricsPublisher) { public NMTimelinePublisher getNMTimelinePublisher() { return nmTimelinePublisher; } + + public ResourcePluginManager getResourcePluginManager() { + return resourcePluginManager; + } + + public void setResourcePluginManager( + ResourcePluginManager resourcePluginManager) { + this.resourcePluginManager = resourcePluginManager; + } } /** diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index b4356cb6abf..d776bdf3676 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -33,6 +33,9 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -174,27 +177,29 @@ public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, @Override protected void serviceInit(Configuration conf) throws Exception { - int memoryMb = NodeManagerHardwareUtils.getContainerMemoryMB(conf); + this.totalResource = NodeManagerHardwareUtils.getNodeResources(conf); + long memoryMb = totalResource.getMemorySize(); float vMemToPMem = conf.getFloat( - YarnConfiguration.NM_VMEM_PMEM_RATIO, - YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); - int virtualMemoryMb = (int)Math.ceil(memoryMb * vMemToPMem); - - int virtualCores = NodeManagerHardwareUtils.getVCores(conf); - LOG.info("Nodemanager resources: memory set to " + memoryMb + "MB."); - LOG.info("Nodemanager resources: vcores set to " + virtualCores + "."); - - this.totalResource = Resource.newInstance(memoryMb, virtualCores); + YarnConfiguration.NM_VMEM_PMEM_RATIO, + YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); + long virtualMemoryMb = (long)Math.ceil(memoryMb * vMemToPMem); + int virtualCores = totalResource.getVirtualCores(); + + // Update configured resources via plugins. + updateConfiguredResourcesViaPlugins(totalResource); + + LOG.info("Nodemanager resources is set to: " + totalResource); + metrics.addResource(totalResource); // Get actual node physical resources - int physicalMemoryMb = memoryMb; + long physicalMemoryMb = memoryMb; int physicalCores = virtualCores; ResourceCalculatorPlugin rcp = ResourceCalculatorPlugin.getNodeResourceMonitorPlugin(conf); if (rcp != null) { - physicalMemoryMb = (int) (rcp.getPhysicalMemorySize() / (1024 * 1024)); + physicalMemoryMb = rcp.getPhysicalMemorySize() / (1024 * 1024); physicalCores = rcp.getNumProcessors(); } this.physicalResource = @@ -341,12 +346,27 @@ protected ResourceTracker getRMClient() throws IOException { return ServerRMProxy.createRMProxy(conf, ResourceTracker.class); } + private void updateConfiguredResourcesViaPlugins( + Resource configuredResource) throws YarnException { + ResourcePluginManager pluginManager = context.getResourcePluginManager(); + if (pluginManager != null && pluginManager.getNameToPlugins() != null) { + // Update configured resource + for (ResourcePlugin resourcePlugin : pluginManager.getNameToPlugins() + .values()) { + if (resourcePlugin.getNodeResourceHandlerInstance() != null) { + resourcePlugin.getNodeResourceHandlerInstance() + .updateConfiguredResource(configuredResource); + } + } + } + } + @VisibleForTesting protected void registerWithRM() throws YarnException, IOException { RegisterNodeManagerResponse regNMResponse; Set nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration(); - + // Synchronize NM-RM registration with // ContainerManagerImpl#increaseContainersResource and // ContainerManagerImpl#startContainers to avoid race condition @@ -357,6 +377,7 @@ protected void registerWithRM() RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource, nodeManagerVersionId, containerReports, getRunningApplications(), nodeLabels, physicalResource); + if (containerReports != null) { LOG.info("Registering with RM using containers :" + containerReports); } @@ -405,7 +426,7 @@ protected void registerWithRM() if (masterKey != null) { this.context.getContainerTokenSecretManager().setMasterKey(masterKey); } - + masterKey = regNMResponse.getNMTokenMasterKey(); if (masterKey != null) { this.context.getNMTokenSecretManager().setMasterKey(masterKey); @@ -732,7 +753,7 @@ public void removeVeryOldStoppedContainersFromCache() { } } } - + @Override public long getRMIdentifier() { return this.rmIdentifier; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index b9d1e31c7e2..b5e3aa18a5b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -98,4 +98,11 @@ void sendPauseEvent(String description); Priority getPriority(); + + /** + * Get assigned resource mappings to the container. + * + * @return Resource Mappings of the container + */ + ResourceMappings getResourceMappings(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 4675716ddea..e6c7bcee79c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -188,6 +188,7 @@ private ReInitializationContext createContextForRollback() { private boolean recoveredAsKilled = false; private Context context; private ResourceSet resourceSet; + private ResourceMappings resourceMappings; public ContainerImpl(Configuration conf, Dispatcher dispatcher, ContainerLaunchContext launchContext, Credentials creds, @@ -245,6 +246,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, stateMachine = stateMachineFactory.make(this, ContainerState.NEW, context.getContainerStateTransitionListener()); this.resourceSet = new ResourceSet(); + this.resourceMappings = new ResourceMappings(); } private static ContainerRetryContext configureRetryContext( @@ -285,6 +287,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, this.remainingRetryAttempts = rcs.getRemainingRetryAttempts(); this.workDir = rcs.getWorkDir(); this.logDir = rcs.getLogDir(); + this.resourceMappings = rcs.getResourceMappings(); } private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION = @@ -2172,4 +2175,14 @@ public boolean isRecovering() { public Priority getPriority() { return containerTokenIdentifier.getPriority(); } + + /** + * Get assigned resource mappings to the container. + * + * @return Resource Mappings of the container + */ + @Override + public ResourceMappings getResourceMappings() { + return resourceMappings; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java new file mode 100644 index 00000000000..d673341b01c --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.IOUtils; + +/** + * This class is used to store assigned resource to a single container by + * resource types. + * + * Assigned resource could be list of String + * + * For example, we can assign container to: + * "numa": ["numa0"] + * "gpu": ["0", "1", "2", "3"] + * "fpga": ["1", "3"] + * + * This will be used for NM restart container recovery. + */ +public class ResourceMappings { + + private Map assignedResourcesMap = new HashMap<>(); + + /** + * Get all resource mappings. + * @param resourceType resourceType + * @return map of resource mapping + */ + public List getAssignedResources(String resourceType) { + AssignedResources ar = assignedResourcesMap.get(resourceType); + if (null == ar) { + return Collections.emptyList(); + } + return ar.getAssignedResources(); + } + + /** + * Adds the resources for a given resource type. + * + * @param resourceType Resource Type + * @param assigned Assigned resources to add + */ + public void addAssignedResources(String resourceType, + AssignedResources assigned) { + assignedResourcesMap.put(resourceType, assigned); + } + + /** + * Stores resources assigned to a container for a given resource type. + */ + public static class AssignedResources implements Serializable { + private static final long serialVersionUID = -1059491941955757926L; + private List resources = Collections.emptyList(); + + public List getAssignedResources() { + return Collections.unmodifiableList(resources); + } + + public void updateAssignedResources(List list) { + this.resources = new ArrayList<>(list); + } + + @SuppressWarnings("unchecked") + public static AssignedResources fromBytes(byte[] bytes) + throws IOException { + ObjectInputStream ois = null; + List resources; + try { + ByteArrayInputStream bis = new ByteArrayInputStream(bytes); + ois = new ObjectInputStream(bis); + resources = (List) ois.readObject(); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } finally { + IOUtils.closeQuietly(ois); + } + AssignedResources ar = new AssignedResources(); + ar.updateAssignedResources(resources); + return ar; + } + + public byte[] toBytes() throws IOException { + ObjectOutputStream oos = null; + byte[] bytes; + try { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + oos = new ObjectOutputStream(bos); + oos.writeObject(resources); + bytes = bos.toByteArray(); + } finally { + IOUtils.closeQuietly(oos); + } + return bytes; + } + } +} \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java index 8402a16339d..db0b2251578 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java @@ -51,6 +51,7 @@ TC_READ_STATS("--tc-read-stats"), ADD_PID_TO_CGROUP(""), //no CLI switch supported yet. RUN_DOCKER_CMD("--run-docker"), + GPU("--module-gpu"), LIST_AS_USER(""); //no CLI switch supported yet. private final String option; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java index 955d2169fec..72bf30ce871 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerChain.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -135,7 +136,8 @@ public ResourceHandlerChain(List resourceHandlers) { return allOperations; } - List getResourceHandlerList() { + @VisibleForTesting + public List getResourceHandlerList() { return Collections.unmodifiableList(resourceHandlers); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java index 3c61cd4b5be..ce850ab3b7c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java @@ -21,25 +21,28 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; import com.google.common.annotations.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler; import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; -import java.util.Set; -import java.util.HashSet; -import java.util.Map; -import java.util.HashMap; -import java.util.Arrays; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; /** * Provides mechanisms to get various resource handlers - cpu, memory, network, @@ -206,22 +209,41 @@ private static void addHandlerIfNotNull(List handlerList, } private static void initializeConfiguredResourceHandlerChain( - Configuration conf) throws ResourceHandlerException { + Configuration conf, Context nmContext) + throws ResourceHandlerException { ArrayList handlerList = new ArrayList<>(); addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf)); addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf)); addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf)); addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf)); + addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext); resourceHandlerChain = new ResourceHandlerChain(handlerList); } + private static void addHandlersFromConfiguredResourcePlugins( + List handlerList, Configuration conf, + Context nmContext) throws ResourceHandlerException { + ResourcePluginManager pluginManager = nmContext.getResourcePluginManager(); + if (pluginManager != null) { + Map pluginMap = pluginManager.getNameToPlugins(); + if (pluginMap != null) { + for (ResourcePlugin plugin : pluginMap.values()) { + addHandlerIfNotNull(handlerList, plugin + .createResourceHandler(nmContext, + getInitializedCGroupsHandler(conf), + PrivilegedOperationExecutor.getInstance(conf))); + } + } + } + } + public static ResourceHandlerChain getConfiguredResourceHandlerChain( - Configuration conf) throws ResourceHandlerException { + Configuration conf, Context nmContext) throws ResourceHandlerException { if (resourceHandlerChain == null) { synchronized (ResourceHandlerModule.class) { if (resourceHandlerChain == null) { - initializeConfiguredResourceHandlerChain(conf); + initializeConfiguredResourceHandlerChain(conf, nmContext); } } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java new file mode 100644 index 00000000000..493aa7b7578 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java @@ -0,0 +1,245 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI; + +/** + * Allocate GPU resources according to requirements + */ +public class GpuResourceAllocator { + final static Log LOG = LogFactory.getLog(GpuResourceAllocator.class); + + private Set allowedGpuDevices = new TreeSet<>(); + private Map usedDevices = new TreeMap<>(); + private Context nmContext; + + public GpuResourceAllocator(Context ctx) { + this.nmContext = ctx; + } + + /** + * Contains allowed and denied devices + * Denied devices will be useful for cgroups devices module to do blacklisting + */ + static class GpuAllocation { + private Set allowed = Collections.emptySet(); + private Set denied = Collections.emptySet(); + + GpuAllocation(Set allowed, Set denied) { + if (allowed != null) { + this.allowed = ImmutableSet.copyOf(allowed); + } + if (denied != null) { + this.denied = ImmutableSet.copyOf(denied); + } + } + + public Set getAllowedGPUs() { + return allowed; + } + + public Set getDeniedGPUs() { + return denied; + } + } + + /** + * Add GPU to allowed list + * @param gpuDevice gpu device + */ + public synchronized void addGpu(GpuDevice gpuDevice) { + allowedGpuDevices.add(gpuDevice); + } + + private String getResourceHandlerExceptionMessage(int numRequestedGpuDevices, + ContainerId containerId) { + return "Failed to find enough GPUs, requestor=" + containerId + + ", #RequestedGPUs=" + numRequestedGpuDevices + ", #availableGpus=" + + getAvailableGpus(); + } + + @VisibleForTesting + public synchronized int getAvailableGpus() { + return allowedGpuDevices.size() - usedDevices.size(); + } + + public synchronized void recoverAssignedGpus(ContainerId containerId) + throws ResourceHandlerException { + Container c = nmContext.getContainers().get(containerId); + if (null == c) { + throw new ResourceHandlerException( + "This shouldn't happen, cannot find container with id=" + + containerId); + } + + for (Serializable gpuDeviceSerializable : c.getResourceMappings() + .getAssignedResources(GPU_URI)) { + if (!(gpuDeviceSerializable instanceof GpuDevice)) { + throw new ResourceHandlerException( + "Trying to recover device id, however it" + + " is not GpuDevice, this shouldn't happen"); + } + + GpuDevice gpuDevice = (GpuDevice) gpuDeviceSerializable; + + // Make sure it is in allowed GPU device. + if (!allowedGpuDevices.contains(gpuDevice)) { + throw new ResourceHandlerException( + "Try to recover device = " + gpuDevice + + " however it is not in allowed device list:" + StringUtils + .join(",", allowedGpuDevices)); + } + + // Make sure it is not occupied by anybody else + if (usedDevices.containsKey(gpuDevice)) { + throw new ResourceHandlerException( + "Try to recover device id = " + gpuDevice + + " however it is already assigned to container=" + usedDevices + .get(gpuDevice) + ", please double check what happened."); + } + + usedDevices.put(gpuDevice, containerId); + } + } + + /** + * Get number of requested GPUs from resource. + * @param requestedResource requested resource + * @return #gpus. + */ + public static int getRequestedGpus(Resource requestedResource) { + try { + return Long.valueOf(requestedResource.getResourceValue( + GPU_URI)).intValue(); + } catch (ResourceNotFoundException e) { + return 0; + } + } + + /** + * Assign GPU to requestor + * @param container container to allocate + * @return allocation results. + * @throws ResourceHandlerException When failed to assign GPUs. + */ + public synchronized GpuAllocation assignGpus(Container container) + throws ResourceHandlerException { + Resource requestedResource = container.getResource(); + ContainerId containerId = container.getContainerId(); + int numRequestedGpuDevices = getRequestedGpus(requestedResource); + // Assign Gpus to container if requested some. + if (numRequestedGpuDevices > 0) { + if (numRequestedGpuDevices > getAvailableGpus()) { + throw new ResourceHandlerException( + getResourceHandlerExceptionMessage(numRequestedGpuDevices, + containerId)); + } + + Set assignedGpus = new TreeSet<>(); + + for (GpuDevice gpu : allowedGpuDevices) { + if (!usedDevices.containsKey(gpu)) { + usedDevices.put(gpu, containerId); + assignedGpus.add(gpu); + if (assignedGpus.size() == numRequestedGpuDevices) { + break; + } + } + } + + // Record in state store if we allocated anything + if (!assignedGpus.isEmpty()) { + try { + // Update state store. + nmContext.getNMStateStore().storeAssignedResources(container, GPU_URI, + new ArrayList(assignedGpus)); + } catch (IOException e) { + cleanupAssignGpus(containerId); + throw new ResourceHandlerException(e); + } + } + + return new GpuAllocation(assignedGpus, + Sets.difference(allowedGpuDevices, assignedGpus)); + } + return new GpuAllocation(null, allowedGpuDevices); + } + + /** + * Clean up all Gpus assigned to containerId + * @param containerId containerId + */ + public synchronized void cleanupAssignGpus(ContainerId containerId) { + Iterator> iter = + usedDevices.entrySet().iterator(); + while (iter.hasNext()) { + if (iter.next().getValue().equals(containerId)) { + iter.remove(); + } + } + } + + @VisibleForTesting + public synchronized Map getDeviceAllocationMappingCopy() { + return new HashMap<>(usedDevices); + } + + public synchronized List getAllowedGpusCopy() { + return new ArrayList<>(allowedGpuDevices); + } + + public synchronized List getAssignedGpusCopy() { + List assigns = new ArrayList<>(); + for (Map.Entry entry : usedDevices.entrySet()) { + assigns.add(new AssignedGpuDevice(entry.getKey().getIndex(), + entry.getKey().getMinorNumber(), entry.getValue())); + } + return assigns; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java new file mode 100644 index 00000000000..500382162fb --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class GpuResourceHandlerImpl implements ResourceHandler { + final static Log LOG = LogFactory + .getLog(GpuResourceHandlerImpl.class); + + // This will be used by container-executor to add necessary clis + public static final String EXCLUDED_GPUS_CLI_OPTION = "--excluded_gpus"; + public static final String CONTAINER_ID_CLI_OPTION = "--container_id"; + + private GpuResourceAllocator gpuAllocator; + private CGroupsHandler cGroupsHandler; + private PrivilegedOperationExecutor privilegedOperationExecutor; + + public GpuResourceHandlerImpl(Context nmContext, + CGroupsHandler cGroupsHandler, + PrivilegedOperationExecutor privilegedOperationExecutor) { + this.cGroupsHandler = cGroupsHandler; + this.privilegedOperationExecutor = privilegedOperationExecutor; + gpuAllocator = new GpuResourceAllocator(nmContext); + } + + @Override + public List bootstrap(Configuration configuration) + throws ResourceHandlerException { + List usableGpus; + try { + usableGpus = GpuDiscoverer.getInstance() + .getGpusUsableByYarn(); + if (usableGpus == null || usableGpus.isEmpty()) { + String message = "GPU is enabled on the NodeManager, but couldn't find " + + "any usable GPU devices, please double check configuration."; + LOG.error(message); + throw new ResourceHandlerException(message); + } + } catch (YarnException e) { + LOG.error("Exception when trying to get usable GPU device", e); + throw new ResourceHandlerException(e); + } + + for (GpuDevice gpu : usableGpus) { + gpuAllocator.addGpu(gpu); + } + + // And initialize cgroups + this.cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); + + return null; + } + + @Override + public synchronized List preStart(Container container) + throws ResourceHandlerException { + String containerIdStr = container.getContainerId().toString(); + + // Assign Gpus to container if requested some. + GpuResourceAllocator.GpuAllocation allocation = gpuAllocator.assignGpus( + container); + + // Create device cgroups for the container + cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + try { + // Execute c-e to setup GPU isolation before launch the container + PrivilegedOperation privilegedOperation = new PrivilegedOperation( + PrivilegedOperation.OperationType.GPU, Arrays + .asList(CONTAINER_ID_CLI_OPTION, containerIdStr)); + if (!allocation.getDeniedGPUs().isEmpty()) { + List minorNumbers = new ArrayList<>(); + for (GpuDevice deniedGpu : allocation.getDeniedGPUs()) { + minorNumbers.add(deniedGpu.getMinorNumber()); + } + privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION, + StringUtils.join(",", minorNumbers))); + } + privilegedOperationExecutor.executePrivilegedOperation( + privilegedOperation, true); + } catch (PrivilegedOperationException e) { + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); + LOG.warn("Could not update cgroup for container", e); + throw new ResourceHandlerException(e); + } + + List ret = new ArrayList<>(); + ret.add(new PrivilegedOperation( + PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, + PrivilegedOperation.CGROUP_ARG_PREFIX + + cGroupsHandler.getPathForCGroupTasks( + CGroupsHandler.CGroupController.DEVICES, containerIdStr))); + + return ret; + } + + public GpuResourceAllocator getGpuAllocator() { + return gpuAllocator; + } + + @Override + public List reacquireContainer(ContainerId containerId) + throws ResourceHandlerException { + gpuAllocator.recoverAssignedGpus(containerId); + return null; + } + + @Override + public synchronized List postComplete( + ContainerId containerId) throws ResourceHandlerException { + gpuAllocator.cleanupAssignGpus(containerId); + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerId.toString()); + return null; + } + + @Override + public List teardown() throws ResourceHandlerException { + return null; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java new file mode 100644 index 00000000000..88f77ed12ed --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.YarnException; + +/** + * Plugins to handle resources on a node. This will be used by + * {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater} + */ +public abstract class NodeResourceUpdaterPlugin { + /** + * Update configured resource for the given component. + * @param res resource passed in by external mododule (such as + * {@link org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater} + * @throws YarnException when any issue happens. + */ + public abstract void updateConfiguredResource(Resource res) + throws YarnException; + + /** + * This method will be called when the node's resource is loaded from + * dynamic-resources.xml in ResourceManager. + * + * @param newResource newResource reported by RM + * @throws YarnException when any mismatch between NM/RM + */ + public void handleUpdatedResourceFromRM(Resource newResource) throws + YarnException { + // by default do nothing, subclass should implement this method when any + // special activities required upon new resource reported by RM. + } + + // TODO: add implementation to update node attribute once YARN-3409 merged. +} \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java new file mode 100644 index 00000000000..78167c4ef33 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin; + +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; + +/** + * {@link ResourcePlugin} is an interface for node manager to easier support + * discovery/manage/isolation for new resource types. + * + *

+ * It has two major part: {@link ResourcePlugin#createResourceHandler(Context, + * CGroupsHandler, PrivilegedOperationExecutor)} and + * {@link ResourcePlugin#getNodeResourceHandlerInstance()}, see javadocs below + * for more details. + *

+ */ +public interface ResourcePlugin { + /** + * Initialize the plugin, this will be invoked during NM startup. + * @param context NM Context + * @throws YarnException when any issue occurs + */ + void initialize(Context context) throws YarnException; + + /** + * Plugin needs to return {@link ResourceHandler} when any special isolation + * required for the resource type. This will be added to + * {@link ResourceHandlerChain} during NodeManager startup. When no special + * isolation need, return null. + * + * @param nmContext NodeManager context. + * @param cGroupsHandler CGroupsHandler + * @param privilegedOperationExecutor Privileged Operation Executor. + * @return ResourceHandler + */ + ResourceHandler createResourceHandler(Context nmContext, + CGroupsHandler cGroupsHandler, + PrivilegedOperationExecutor privilegedOperationExecutor); + + /** + * Plugin needs to return {@link NodeResourceUpdaterPlugin} when any discovery + * mechanism required for the resource type. For example, if we want to set + * resource-value during NM registration or send update during NM-RM heartbeat + * We can implement a {@link NodeResourceUpdaterPlugin} and update fields of + * {@link org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest} + * or {@link org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest} + * + * This will be invoked during every node status update or node registration, + * please avoid creating new instance every time. + * + * @return NodeResourceUpdaterPlugin, could be null when no discovery needed. + */ + NodeResourceUpdaterPlugin getNodeResourceHandlerInstance(); + + /** + * Do cleanup of the plugin, this will be invoked when + * {@link org.apache.hadoop.yarn.server.nodemanager.NodeManager} stops + * @throws YarnException if any issue occurs + */ + void cleanup() throws YarnException; + + /** + * Get resource information from this plugin. + * + * @return NMResourceInfo, an example is + * {@link org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation} + * + * @throws YarnException when any issue occurs + */ + NMResourceInfo getNMResourceInfo() throws YarnException; +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java new file mode 100644 index 00000000000..73d6038afb1 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin; + +import com.google.common.collect.ImmutableSet; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI; + +/** + * Manages {@link ResourcePlugin} configured on this NodeManager. + */ +public class ResourcePluginManager { + private static final Logger LOG = + LoggerFactory.getLogger(ResourcePluginManager.class); + private static final Set SUPPORTED_RESOURCE_PLUGINS = ImmutableSet.of( + GPU_URI); + + private Map configuredPlugins = Collections.EMPTY_MAP; + + public synchronized void initialize(Context context) + throws YarnException { + Configuration conf = context.getConf(); + String[] plugins = conf.getStrings(YarnConfiguration.NM_RESOURCE_PLUGINS); + + if (plugins != null) { + Map pluginMap = new HashMap<>(); + + // Initialize each plugins + for (String resourceName : plugins) { + resourceName = resourceName.trim(); + if (!SUPPORTED_RESOURCE_PLUGINS.contains(resourceName)) { + String msg = + "Trying to initialize resource plugin with name=" + resourceName + + ", it is not supported, list of supported plugins:" + + StringUtils.join(",", + SUPPORTED_RESOURCE_PLUGINS); + LOG.error(msg); + throw new YarnException(msg); + } + + if (pluginMap.containsKey(resourceName)) { + // Duplicated items, ignore ... + continue; + } + + ResourcePlugin plugin = null; + if (resourceName.equals(GPU_URI)) { + plugin = new GpuResourcePlugin(); + } + + if (plugin == null) { + throw new YarnException( + "This shouldn't happen, plugin=" + resourceName + + " should be loaded and initialized"); + } + plugin.initialize(context); + pluginMap.put(resourceName, plugin); + } + + configuredPlugins = Collections.unmodifiableMap(pluginMap); + } + } + + public synchronized void cleanup() throws YarnException { + for (ResourcePlugin plugin : configuredPlugins.values()) { + plugin.cleanup(); + } + } + + /** + * Get resource name (such as gpu/fpga) to plugin references. + * @return read-only map of resource name to plugins. + */ + public synchronized Map getNameToPlugins() { + return configuredPlugins; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/AssignedGpuDevice.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/AssignedGpuDevice.java new file mode 100644 index 00000000000..df4b905d185 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/AssignedGpuDevice.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; +import org.apache.hadoop.yarn.api.records.ContainerId; + +/** + * In addition to {@link GpuDevice}, this include container id and more runtime + * information related to who is using the GPU device if possible + */ +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class AssignedGpuDevice extends GpuDevice { + private static final long serialVersionUID = -12983712986315L; + + String containerId; + + public AssignedGpuDevice() { + + } + + public AssignedGpuDevice(int index, int minorNumber, + ContainerId containerId) { + super(index, minorNumber); + this.containerId = containerId.toString(); + } + + public String getContainerId() { + return containerId; + } + + public void setContainerId(String containerId) { + this.containerId = containerId; + } + + @Override + public boolean equals(Object obj) { + if (obj == null || !(obj instanceof AssignedGpuDevice)) { + return false; + } + AssignedGpuDevice other = (AssignedGpuDevice) obj; + return index == other.index && minorNumber == other.minorNumber + && containerId.equals(other.containerId); + } + + @Override + public int compareTo(Object obj) { + if (obj == null || (!(obj instanceof AssignedGpuDevice))) { + return -1; + } + + AssignedGpuDevice other = (AssignedGpuDevice) obj; + + int result = Integer.compare(index, other.index); + if (0 != result) { + return result; + } + result = Integer.compare(minorNumber, other.minorNumber); + if (0 != result) { + return result; + } + return containerId.compareTo(other.containerId); + } + + @Override + public int hashCode() { + final int prime = 47; + return prime * (prime * index + minorNumber) + containerId.hashCode(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDevice.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDevice.java new file mode 100644 index 00000000000..6f084e635b4 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDevice.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import java.io.Serializable; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; + + +/** + * This class is used to represent GPU device while allocation. + */ +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class GpuDevice implements Serializable, Comparable { + protected int index; + protected int minorNumber; + private static final long serialVersionUID = -6812314470754667710L; + + public GpuDevice() { + + } + + public GpuDevice(int index, int minorNumber) { + this.index = index; + this.minorNumber = minorNumber; + } + + public int getIndex() { + return index; + } + + public int getMinorNumber() { + return minorNumber; + } + + @Override + public boolean equals(Object obj) { + if (obj == null || !(obj instanceof GpuDevice)) { + return false; + } + GpuDevice other = (GpuDevice) obj; + return index == other.index && minorNumber == other.minorNumber; + } + + @Override + public int compareTo(Object obj) { + if (obj == null || (!(obj instanceof GpuDevice))) { + return -1; + } + + GpuDevice other = (GpuDevice) obj; + + int result = Integer.compare(index, other.index); + if (0 != result) { + return result; + } + return Integer.compare(minorNumber, other.minorNumber); + } + + @Override + public int hashCode() { + final int prime = 47; + return prime * index + minorNumber; + } + + @Override + public String toString() { + return "(index=" + index + ",minor_number=" + minorNumber + ")"; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java new file mode 100644 index 00000000000..6e3cf1315ce --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java @@ -0,0 +1,264 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformationParser; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GpuDiscoverer { + public static final Logger LOG = LoggerFactory.getLogger( + GpuDiscoverer.class); + @VisibleForTesting + protected static final String DEFAULT_BINARY_NAME = "nvidia-smi"; + + // When executable path not set, try to search default dirs + // By default search /usr/bin, /bin, and /usr/local/nvidia/bin (when + // launched by nvidia-docker. + private static final Set DEFAULT_BINARY_SEARCH_DIRS = ImmutableSet.of( + "/usr/bin", "/bin", "/usr/local/nvidia/bin"); + + // command should not run more than 10 sec. + private static final int MAX_EXEC_TIMEOUT_MS = 10 * 1000; + private static final int MAX_REPEATED_ERROR_ALLOWED = 10; + private static GpuDiscoverer instance; + + static { + instance = new GpuDiscoverer(); + } + + private Configuration conf = null; + private String pathOfGpuBinary = null; + private Map environment = new HashMap<>(); + private GpuDeviceInformationParser parser = new GpuDeviceInformationParser(); + + private int numOfErrorExecutionSinceLastSucceed = 0; + GpuDeviceInformation lastDiscoveredGpuInformation = null; + + private void validateConfOrThrowException() throws YarnException { + if (conf == null) { + throw new YarnException("Please initialize (call initialize) before use " + + GpuDiscoverer.class.getSimpleName()); + } + } + + /** + * Get GPU device information from system. + * This need to be called after initialize. + * + * Please note that this only works on *NIX platform, so external caller + * need to make sure this. + * + * @return GpuDeviceInformation + * @throws YarnException when any error happens + */ + public synchronized GpuDeviceInformation getGpuDeviceInformation() + throws YarnException { + validateConfOrThrowException(); + + if (null == pathOfGpuBinary) { + throw new YarnException( + "Failed to find GPU discovery executable, please double check " + + YarnConfiguration.NM_GPU_PATH_TO_EXEC + " setting."); + } + + if (numOfErrorExecutionSinceLastSucceed == MAX_REPEATED_ERROR_ALLOWED) { + String msg = + "Failed to execute GPU device information detection script for " + + MAX_REPEATED_ERROR_ALLOWED + + " times, skip following executions."; + LOG.error(msg); + throw new YarnException(msg); + } + + String output; + try { + output = Shell.execCommand(environment, + new String[] { pathOfGpuBinary, "-x", "-q" }, MAX_EXEC_TIMEOUT_MS); + GpuDeviceInformation info = parser.parseXml(output); + numOfErrorExecutionSinceLastSucceed = 0; + lastDiscoveredGpuInformation = info; + return info; + } catch (IOException e) { + numOfErrorExecutionSinceLastSucceed++; + String msg = + "Failed to execute " + pathOfGpuBinary + " exception message:" + e + .getMessage() + ", continue ..."; + if (LOG.isDebugEnabled()) { + LOG.debug(msg); + } + throw new YarnException(e); + } catch (YarnException e) { + numOfErrorExecutionSinceLastSucceed++; + String msg = "Failed to parse xml output" + e.getMessage(); + if (LOG.isDebugEnabled()) { + LOG.warn(msg, e); + } + throw e; + } + } + + /** + * Get list of GPU devices usable by YARN. + * + * @return List of GPU devices + * @throws YarnException when any issue happens + */ + public synchronized List getGpusUsableByYarn() + throws YarnException { + validateConfOrThrowException(); + + String allowedDevicesStr = conf.get( + YarnConfiguration.NM_GPU_ALLOWED_DEVICES, + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES); + + List gpuDevices = new ArrayList<>(); + + if (allowedDevicesStr.equals( + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) { + // Get gpu device information from system. + if (null == lastDiscoveredGpuInformation) { + String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to " + + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES + + ", however automatically discovering " + + "GPU information failed, please check NodeManager log for more" + + " details, as an alternative, admin can specify " + + YarnConfiguration.NM_GPU_ALLOWED_DEVICES + + " manually to enable GPU isolation."; + LOG.error(msg); + throw new YarnException(msg); + } + + if (lastDiscoveredGpuInformation.getGpus() != null) { + for (int i = 0; i < lastDiscoveredGpuInformation.getGpus().size(); + i++) { + List gpuInfos = + lastDiscoveredGpuInformation.getGpus(); + gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber())); + } + } + } else{ + for (String s : allowedDevicesStr.split(",")) { + if (s.trim().length() > 0) { + String[] kv = s.trim().split(":"); + if (kv.length != 2) { + throw new YarnException( + "Illegal format, it should be index:minor_number format, now it=" + + s); + } + + gpuDevices.add( + new GpuDevice(Integer.parseInt(kv[0]), Integer.parseInt(kv[1]))); + } + } + LOG.info("Allowed GPU devices:" + gpuDevices); + } + + return gpuDevices; + } + + public synchronized void initialize(Configuration conf) throws YarnException { + this.conf = conf; + numOfErrorExecutionSinceLastSucceed = 0; + String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC, + YarnConfiguration.DEFAULT_NM_GPU_PATH_TO_EXEC); + if (pathToExecutable.isEmpty()) { + pathToExecutable = DEFAULT_BINARY_NAME; + } + + // Validate file existence + File binaryPath = new File(pathToExecutable); + + if (!binaryPath.exists()) { + // When binary not exist, use default setting. + boolean found = false; + for (String dir : DEFAULT_BINARY_SEARCH_DIRS) { + binaryPath = new File(dir, DEFAULT_BINARY_NAME); + if (binaryPath.exists()) { + found = true; + pathOfGpuBinary = binaryPath.getAbsolutePath(); + break; + } + } + + if (!found) { + LOG.warn("Failed to locate binary at:" + binaryPath.getAbsolutePath() + + ", please double check [" + YarnConfiguration.NM_GPU_PATH_TO_EXEC + + "] setting. Now use " + "default binary:" + DEFAULT_BINARY_NAME); + } + } else{ + // If path specified by user is a directory, use + if (binaryPath.isDirectory()) { + binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME); + LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME + + " under the directory, updated path-to-executable:" + binaryPath + .getAbsolutePath()); + } + // Validated + pathOfGpuBinary = binaryPath.getAbsolutePath(); + } + + // Try to discover GPU information once and print + try { + LOG.info("Trying to discover GPU information ..."); + GpuDeviceInformation info = getGpuDeviceInformation(); + LOG.info(info.toString()); + } catch (YarnException e) { + String msg = + "Failed to discover GPU information from system, exception message:" + + e.getMessage() + " continue..."; + LOG.warn(msg); + } + } + + @VisibleForTesting + protected Map getEnvironmentToRunCommand() { + return environment; + } + + @VisibleForTesting + protected String getPathOfGpuBinary() { + return pathOfGpuBinary; + } + + public static GpuDiscoverer getInstance() { + return instance; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java new file mode 100644 index 00000000000..796eb25b431 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI; + +public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin { + private static final Logger LOG = + LoggerFactory.getLogger(GpuNodeResourceUpdateHandler.class); + + @Override + public void updateConfiguredResource(Resource res) throws YarnException { + LOG.info("Initializing configured GPU resources for the NodeManager."); + + List usableGpus = + GpuDiscoverer.getInstance().getGpusUsableByYarn(); + if (null == usableGpus || usableGpus.isEmpty()) { + String message = "GPU is enabled, but couldn't find any usable GPUs on the " + + "NodeManager."; + LOG.error(message); + // No gpu can be used by YARN. + throw new YarnException(message); + } + + long nUsableGpus = usableGpus.size(); + + Map configuredResourceTypes = + ResourceUtils.getResourceTypes(); + if (!configuredResourceTypes.containsKey(GPU_URI)) { + throw new YarnException("Found " + nUsableGpus + " usable GPUs, however " + + GPU_URI + + " resource-type is not configured inside" + + " resource-types.xml, please configure it to enable GPU feature or" + + " remove " + GPU_URI + " from " + + YarnConfiguration.NM_RESOURCE_PLUGINS); + } + + res.setResourceValue(GPU_URI, nUsableGpus); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java new file mode 100644 index 00000000000..d294503704e --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceHandlerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; + +import java.util.List; +import java.util.Map; + +public class GpuResourcePlugin implements ResourcePlugin { + private GpuResourceHandlerImpl gpuResourceHandler = null; + private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null; + + @Override + public synchronized void initialize(Context context) throws YarnException { + resourceDiscoverHandler = new GpuNodeResourceUpdateHandler(); + GpuDiscoverer.getInstance().initialize(context.getConf()); + } + + @Override + public synchronized ResourceHandler createResourceHandler( + Context context, CGroupsHandler cGroupsHandler, + PrivilegedOperationExecutor privilegedOperationExecutor) { + if (gpuResourceHandler == null) { + gpuResourceHandler = new GpuResourceHandlerImpl(context, cGroupsHandler, + privilegedOperationExecutor); + } + + return gpuResourceHandler; + } + + @Override + public synchronized NodeResourceUpdaterPlugin getNodeResourceHandlerInstance() { + return resourceDiscoverHandler; + } + + @Override + public void cleanup() throws YarnException { + // Do nothing. + } + + @Override + public NMResourceInfo getNMResourceInfo() throws YarnException { + GpuDeviceInformation gpuDeviceInformation = + GpuDiscoverer.getInstance().getGpuDeviceInformation(); + GpuResourceAllocator gpuResourceAllocator = + gpuResourceHandler.getGpuAllocator(); + List totalGpus = gpuResourceAllocator.getAllowedGpusCopy(); + List assignedGpuDevices = + gpuResourceAllocator.getAssignedGpusCopy(); + + return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus, + assignedGpuDevices); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java index 129fa8f34c1..0cbf0782f08 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java @@ -18,40 +18,25 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; -import static org.fusesource.leveldbjni.JniDBFactory.asString; -import static org.fusesource.leveldbjni.JniDBFactory.bytes; - -import org.slf4j.Logger; -import org.apache.hadoop.yarn.api.records.Token; -import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Timer; -import java.util.TimerTask; -import java.util.Set; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ListMultimap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestPBImpl; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; +import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto; @@ -59,9 +44,11 @@ import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto; -import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto; +import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -73,10 +60,26 @@ import org.iq80.leveldb.DBException; import org.iq80.leveldb.Options; import org.iq80.leveldb.WriteBatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; + +import static org.fusesource.leveldbjni.JniDBFactory.asString; +import static org.fusesource.leveldbjni.JniDBFactory.bytes; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ListMultimap; public class NMLeveldbStateStoreService extends NMStateStoreService { @@ -148,6 +151,9 @@ private static final String AMRMPROXY_KEY_PREFIX = "AMRMProxy/"; + private static final String CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX = + "/assignedResources_"; + private static final byte[] EMPTY_VALUE = new byte[0]; private DB db; @@ -309,6 +315,13 @@ private RecoveredContainerState loadContainerState(ContainerId containerId, rcs.setWorkDir(asString(entry.getValue())); } else if (suffix.equals(CONTAINER_LOG_DIR_KEY_SUFFIX)) { rcs.setLogDir(asString(entry.getValue())); + } else if (suffix.startsWith(CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX)) { + String resourceType = suffix.substring( + CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX.length()); + ResourceMappings.AssignedResources assignedResources = + ResourceMappings.AssignedResources.fromBytes(entry.getValue()); + rcs.getResourceMappings().addAssignedResources(resourceType, + assignedResources); } else { LOG.warn("the container " + containerId + " will be killed because of the unknown key " + key @@ -1166,6 +1179,41 @@ public void removeLogDeleter(ApplicationId appId) throws IOException { } } + @Override + public void storeAssignedResources(Container container, + String resourceType, List assignedResources) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug( + "storeAssignedResources: containerId=" + container.getContainerId() + + ", assignedResources=" + StringUtils + .join(",", assignedResources)); + + } + + String keyResChng = CONTAINERS_KEY_PREFIX + container.getContainerId().toString() + + CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX + resourceType; + try { + WriteBatch batch = db.createWriteBatch(); + try { + ResourceMappings.AssignedResources res = + new ResourceMappings.AssignedResources(); + res.updateAssignedResources(assignedResources); + + // New value will overwrite old values for the same key + batch.put(bytes(keyResChng), res.toBytes()); + db.write(batch); + } finally { + batch.close(); + } + } catch (DBException e) { + throw new IOException(e); + } + + // update container resource mapping. + updateContainerResourceMapping(container, resourceType, assignedResources); + } + @SuppressWarnings("deprecation") private void cleanupDeprecatedFinishedApps() { try { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java index aaf6fb2cdbc..95ec61ae1a3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import java.io.IOException; +import java.io.Serializable; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; // The state store to use when state isn't being stored public class NMNullStateStoreService extends NMStateStoreService { @@ -266,6 +268,13 @@ public void removeAMRMProxyAppContext(ApplicationAttemptId attempt) throws IOException { } + @Override + public void storeAssignedResources(Container container, + String resourceType, List assignedResources) + throws IOException { + updateContainerResourceMapping(container, resourceType, assignedResources); + } + @Override protected void initStorage(Configuration conf) throws IOException { } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java index 1cdbd277ff3..350f2423834 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import java.io.IOException; +import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -43,6 +44,8 @@ import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; @Private @Unstable @@ -90,6 +93,7 @@ public NMStateStoreService(String name) { private RecoveredContainerType recoveryType = RecoveredContainerType.RECOVER; private long startTime; + private ResourceMappings resMappings = new ResourceMappings(); public RecoveredContainerStatus getStatus() { return status; @@ -174,6 +178,14 @@ public RecoveredContainerType getRecoveryType() { public void setRecoveryType(RecoveredContainerType recoveryType) { this.recoveryType = recoveryType; } + + public ResourceMappings getResourceMappings() { + return resMappings; + } + + public void setResourceMappings(ResourceMappings mappings) { + this.resMappings = mappings; + } } public static class LocalResourceTrackerState { @@ -718,9 +730,31 @@ public abstract void removeAMRMProxyAppContextEntry( public abstract void removeAMRMProxyAppContext(ApplicationAttemptId attempt) throws IOException; + /** + * Store the assigned resources to a container. + * + * @param container NMContainer + * @param resourceType Resource Type + * @param assignedResources Assigned resources + * @throws IOException if fails + */ + public abstract void storeAssignedResources(Container container, + String resourceType, List assignedResources) + throws IOException; + protected abstract void initStorage(Configuration conf) throws IOException; protected abstract void startStorage() throws IOException; protected abstract void closeStorage() throws IOException; + + protected void updateContainerResourceMapping(Container container, + String resourceType, List assignedResources) { + // Update Container#getResourceMapping. + ResourceMappings.AssignedResources newAssigned = + new ResourceMappings.AssignedResources(); + newAssigned.updateAssignedResources(assignedResources); + container.getResourceMappings().addAssignedResources(resourceType, + newAssigned); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java index 32f73c85a0c..6fe5bbe73fd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java @@ -21,10 +21,16 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import java.util.Map; /** * Helper class to determine hardware related characteristics such as the @@ -240,8 +246,8 @@ private static int getVCoresInternal(ResourceCalculatorPlugin plugin, return cores; } - private static int getConfiguredMemoryMB(Configuration conf) { - int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, + private static long getConfiguredMemoryMB(Configuration conf) { + long memoryMb = conf.getLong(YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB); if (memoryMb == -1) { memoryMb = YarnConfiguration.DEFAULT_NM_PMEM_MB; @@ -264,7 +270,7 @@ private static int getConfiguredMemoryMB(Configuration conf) { * - the configuration for the NodeManager * @return the amount of memory that will be used for YARN containers in MB. */ - public static int getContainerMemoryMB(Configuration conf) { + public static long getContainerMemoryMB(Configuration conf) { if (!isHardwareDetectionEnabled(conf)) { return getConfiguredMemoryMB(conf); } @@ -293,7 +299,7 @@ public static int getContainerMemoryMB(Configuration conf) { * - the configuration for the NodeManager * @return the amount of memory that will be used for YARN containers in MB. */ - public static int getContainerMemoryMB(ResourceCalculatorPlugin plugin, + public static long getContainerMemoryMB(ResourceCalculatorPlugin plugin, Configuration conf) { if (!isHardwareDetectionEnabled(conf) || plugin == null) { return getConfiguredMemoryMB(conf); @@ -301,26 +307,24 @@ public static int getContainerMemoryMB(ResourceCalculatorPlugin plugin, return getContainerMemoryMBInternal(plugin, conf); } - private static int getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin, + private static long getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin, Configuration conf) { - int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, -1); + long memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, -1); if (memoryMb == -1) { - int physicalMemoryMB = - (int) (plugin.getPhysicalMemorySize() / (1024 * 1024)); - int hadoopHeapSizeMB = - (int) (Runtime.getRuntime().maxMemory() / (1024 * 1024)); - int containerPhysicalMemoryMB = - (int) (0.8f * (physicalMemoryMB - (2 * hadoopHeapSizeMB))); - int reservedMemoryMB = - conf.getInt(YarnConfiguration.NM_SYSTEM_RESERVED_PMEM_MB, -1); + long physicalMemoryMB = (plugin.getPhysicalMemorySize() / (1024 * 1024)); + long hadoopHeapSizeMB = (Runtime.getRuntime().maxMemory() + / (1024 * 1024)); + long containerPhysicalMemoryMB = (long) (0.8f + * (physicalMemoryMB - (2 * hadoopHeapSizeMB))); + long reservedMemoryMB = conf + .getInt(YarnConfiguration.NM_SYSTEM_RESERVED_PMEM_MB, -1); if (reservedMemoryMB != -1) { containerPhysicalMemoryMB = physicalMemoryMB - reservedMemoryMB; } - if(containerPhysicalMemoryMB <= 0) { + if (containerPhysicalMemoryMB <= 0) { LOG.error("Calculated memory for YARN containers is too low." + " Node memory is " + physicalMemoryMB - + " MB, system reserved memory is " - + reservedMemoryMB + " MB."); + + " MB, system reserved memory is " + reservedMemoryMB + " MB."); } containerPhysicalMemoryMB = Math.max(containerPhysicalMemoryMB, 0); memoryMb = containerPhysicalMemoryMB; @@ -332,4 +336,50 @@ private static int getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin, } return memoryMb; } + + /** + * Get the resources for the node. + * @param configuration configuration file + * @return the resources for the node + */ + public static Resource getNodeResources(Configuration configuration) { + Configuration conf = new Configuration(configuration); + String memory = ResourceInformation.MEMORY_MB.getName(); + String vcores = ResourceInformation.VCORES.getName(); + + Resource ret = Resource.newInstance(0, 0); + Map resourceInformation = + ResourceUtils.getNodeResourceInformation(conf); + for (Map.Entry entry : resourceInformation + .entrySet()) { + ret.setResourceInformation(entry.getKey(), entry.getValue()); + LOG.debug("Setting key " + entry.getKey() + " to " + entry.getValue()); + } + if (resourceInformation.containsKey(memory)) { + Long value = resourceInformation.get(memory).getValue(); + if (value > Integer.MAX_VALUE) { + throw new YarnRuntimeException("Value '" + value + + "' for resource memory is more than the maximum for an integer."); + } + ResourceInformation memResInfo = resourceInformation.get(memory); + if(memResInfo.getValue() == 0) { + ret.setMemorySize(getContainerMemoryMB(conf)); + LOG.debug("Set memory to " + ret.getMemorySize()); + } + } + if (resourceInformation.containsKey(vcores)) { + Long value = resourceInformation.get(vcores).getValue(); + if (value > Integer.MAX_VALUE) { + throw new YarnRuntimeException("Value '" + value + + "' for resource vcores is more than the maximum for an integer."); + } + ResourceInformation vcoresResInfo = resourceInformation.get(vcores); + if(vcoresResInfo.getValue() == 0) { + ret.setVirtualCores(getVCores(conf)); + LOG.debug("Set vcores to " + ret.getVirtualCores()); + } + } + LOG.debug("Node resource information map is " + ret); + return ret; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java index 60905d7bf45..77020044a17 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NMWebServices.java @@ -27,6 +27,10 @@ import java.util.List; import java.util.Map.Entry; import java.util.Set; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -488,6 +492,27 @@ public void write(OutputStream os) throws IOException, } } + @GET + @Path("/resources/{resourcename}") + @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + public Object getNMResourceInfo( + @PathParam("resourcename") + String resourceName) throws YarnException { + init(); + ResourcePluginManager rpm = this.nmContext.getResourcePluginManager(); + if (rpm != null && rpm.getNameToPlugins() != null) { + ResourcePlugin plugin = rpm.getNameToPlugins().get(resourceName); + if (plugin != null) { + NMResourceInfo nmResourceInfo = plugin.getNMResourceInfo(); + if (nmResourceInfo != null) { + return nmResourceInfo; + } + } + } + + return new NMResourceInfo(); + } + private long parseLongParam(String bytes) { if (bytes == null || bytes.isEmpty()) { return Long.MAX_VALUE; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMResourceInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMResourceInfo.java new file mode 100644 index 00000000000..18ce8ea7a68 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMResourceInfo.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class NMResourceInfo { +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java new file mode 100644 index 00000000000..837d5cc99cd --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import javax.xml.bind.annotation.XmlRootElement; +import java.util.List; + +/** + * All GPU Device Information in the system, fetched from nvidia-smi. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +@XmlRootElement(name = "nvidia_smi_log") +public class GpuDeviceInformation { + List gpus; + + String driverVersion = "N/A"; + + // More fields like topology information could be added when needed. + // ... + + @javax.xml.bind.annotation.XmlElement(name = "gpu") + public List getGpus() { + return gpus; + } + + public void setGpus(List gpus) { + this.gpus = gpus; + } + + @javax.xml.bind.annotation.XmlElement(name = "driver_version") + public String getDriverVersion() { + return driverVersion; + } + + public void setDriverVersion(String driverVersion) { + this.driverVersion = driverVersion; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("=== Gpus in the system ===\n").append("\tDriver Version:").append( + getDriverVersion()).append("\n"); + + if (gpus != null) { + for (PerGpuDeviceInformation gpu : gpus) { + sb.append("\t").append(gpu.toString()).append("\n"); + } + } + return sb.toString(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java new file mode 100644 index 00000000000..1bd92f63a88 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Unmarshaller; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.sax.SAXSource; +import java.io.StringReader; + +/** + * Parse XML and get GPU device information + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GpuDeviceInformationParser { + private static final Logger LOG = LoggerFactory.getLogger( + GpuDeviceInformationParser.class); + + private Unmarshaller unmarshaller = null; + private XMLReader xmlReader = null; + + private void init() + throws SAXException, ParserConfigurationException, JAXBException { + SAXParserFactory spf = SAXParserFactory.newInstance(); + // Disable external-dtd since by default nvidia-smi output contains + // in header + spf.setFeature( + "http://apache.org/xml/features/nonvalidating/load-external-dtd", + false); + spf.setFeature("http://xml.org/sax/features/validation", false); + + JAXBContext jaxbContext = JAXBContext.newInstance( + GpuDeviceInformation.class); + + this.xmlReader = spf.newSAXParser().getXMLReader(); + this.unmarshaller = jaxbContext.createUnmarshaller(); + } + + public synchronized GpuDeviceInformation parseXml(String xmlContent) + throws YarnException { + if (unmarshaller == null) { + try { + init(); + } catch (SAXException | ParserConfigurationException | JAXBException e) { + LOG.error("Exception while initialize parser", e); + throw new YarnException(e); + } + } + + InputSource inputSource = new InputSource(new StringReader(xmlContent)); + SAXSource source = new SAXSource(xmlReader, inputSource); + try { + return (GpuDeviceInformation) unmarshaller.unmarshal(source); + } catch (JAXBException e) { + LOG.error("Exception while parsing xml", e); + throw new YarnException(e); + } + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/NMGpuResourceInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/NMGpuResourceInfo.java new file mode 100644 index 00000000000..e5855374314 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/NMGpuResourceInfo.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; + +import java.util.List; + +/** + * Gpu device information return to client when + * {@link org.apache.hadoop.yarn.server.nodemanager.webapp.NMWebServices#getNMResourceInfo(String)} + * is invoked. + */ +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class NMGpuResourceInfo extends NMResourceInfo { + GpuDeviceInformation gpuDeviceInformation; + + List totalGpuDevices; + List assignedGpuDevices; + + public NMGpuResourceInfo() { + + } + + public NMGpuResourceInfo(GpuDeviceInformation gpuDeviceInformation, + List totalGpuDevices, + List assignedGpuDevices) { + this.gpuDeviceInformation = gpuDeviceInformation; + this.totalGpuDevices = totalGpuDevices; + this.assignedGpuDevices = assignedGpuDevices; + } + + public GpuDeviceInformation getGpuDeviceInformation() { + return gpuDeviceInformation; + } + + public void setGpuDeviceInformation( + GpuDeviceInformation gpuDeviceInformation) { + this.gpuDeviceInformation = gpuDeviceInformation; + } + + public List getTotalGpuDevices() { + return totalGpuDevices; + } + + public void setTotalGpuDevices(List totalGpuDevices) { + this.totalGpuDevices = totalGpuDevices; + } + + public List getAssignedGpuDevices() { + return assignedGpuDevices; + } + + public void setAssignedGpuDevices( + List assignedGpuDevices) { + this.assignedGpuDevices = assignedGpuDevices; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java new file mode 100644 index 00000000000..25c2e3a1f1d --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.adapters.XmlAdapter; + +/** + * Capture single GPU device information such as memory size, temperature, + * utilization. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +@XmlRootElement(name = "gpu") +public class PerGpuDeviceInformation { + + private String productName = "N/A"; + private String uuid = "N/A"; + private int minorNumber = -1; + + private PerGpuUtilizations gpuUtilizations; + private PerGpuMemoryUsage gpuMemoryUsage; + private PerGpuTemperature temperature; + + /** + * Convert formats like "34 C", "75.6 %" to float. + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + static class StrToFloatBeforeSpaceAdapter extends + XmlAdapter { + @Override + public String marshal(Float v) throws Exception { + if (v == null) { + return ""; + } + return String.valueOf(v); + } + + @Override + public Float unmarshal(String v) throws Exception { + if (v == null) { + return -1f; + } + + return Float.valueOf(v.split(" ")[0]); + } + } + + /** + * Convert formats like "725 MiB" to long. + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + static class StrToMemAdapter extends XmlAdapter { + @Override + public String marshal(Long v) throws Exception { + if (v == null) { + return ""; + } + return String.valueOf(v) + " MiB"; + } + + @Override + public Long unmarshal(String v) throws Exception { + if (v == null) { + return -1L; + } + return Long.valueOf(v.split(" ")[0]); + } + } + + @XmlElement(name = "temperature") + public PerGpuTemperature getTemperature() { + return temperature; + } + + public void setTemperature(PerGpuTemperature temperature) { + this.temperature = temperature; + } + + @XmlElement(name = "uuid") + public String getUuid() { + return uuid; + } + + public void setUuid(String uuid) { + this.uuid = uuid; + } + + @XmlElement(name = "product_name") + public String getProductName() { + return productName; + } + + public void setProductName(String productName) { + this.productName = productName; + } + + @XmlElement(name = "minor_number") + public int getMinorNumber() { + return minorNumber; + } + + public void setMinorNumber(int minorNumber) { + this.minorNumber = minorNumber; + } + + @XmlElement(name = "utilization") + public PerGpuUtilizations getGpuUtilizations() { + return gpuUtilizations; + } + + public void setGpuUtilizations(PerGpuUtilizations utilizations) { + this.gpuUtilizations = utilizations; + } + + @XmlElement(name = "fb_memory_usage") + public PerGpuMemoryUsage getGpuMemoryUsage() { + return gpuMemoryUsage; + } + + public void setGpuMemoryUsage(PerGpuMemoryUsage gpuMemoryUsage) { + this.gpuMemoryUsage = gpuMemoryUsage; + } + + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("ProductName=").append(productName).append(", MinorNumber=") + .append(minorNumber); + + if (getGpuMemoryUsage() != null) { + sb.append(", TotalMemory=").append( + getGpuMemoryUsage().getTotalMemoryMiB()).append("MiB"); + } + + if (getGpuUtilizations() != null) { + sb.append(", Utilization=").append( + getGpuUtilizations().getOverallGpuUtilization()).append("%"); + } + return sb.toString(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java new file mode 100644 index 00000000000..afc1a9679b7 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +@XmlRootElement(name = "fb_memory_usage") +public class PerGpuMemoryUsage { + long usedMemoryMiB = -1L; + long availMemoryMiB = -1L; + + @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToMemAdapter.class) + @XmlElement(name = "used") + public Long getUsedMemoryMiB() { + return usedMemoryMiB; + } + + public void setUsedMemoryMiB(Long usedMemoryMiB) { + this.usedMemoryMiB = usedMemoryMiB; + } + + @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToMemAdapter.class) + @XmlElement(name = "free") + public Long getAvailMemoryMiB() { + return availMemoryMiB; + } + + public void setAvailMemoryMiB(Long availMemoryMiB) { + this.availMemoryMiB = availMemoryMiB; + } + + public long getTotalMemoryMiB() { + return usedMemoryMiB + availMemoryMiB; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java new file mode 100644 index 00000000000..ccd60cbf5e5 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter; + +/** + * Temperature of GPU + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +@XmlRootElement(name = "temperature") +public class PerGpuTemperature { + private float currentGpuTemp = Float.MIN_VALUE; + private float maxGpuTemp = Float.MIN_VALUE; + private float slowThresholdGpuTemp = Float.MIN_VALUE; + + /** + * Get current celsius GPU temperature + * @return temperature + */ + @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class) + @XmlElement(name = "gpu_temp") + public Float getCurrentGpuTemp() { + return currentGpuTemp; + } + + public void setCurrentGpuTemp(Float currentGpuTemp) { + this.currentGpuTemp = currentGpuTemp; + } + + /** + * Get max possible celsius GPU temperature + * @return temperature + */ + @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class) + @XmlElement(name = "gpu_temp_max_threshold") + public Float getMaxGpuTemp() { + return maxGpuTemp; + } + + public void setMaxGpuTemp(Float maxGpuTemp) { + this.maxGpuTemp = maxGpuTemp; + } + + /** + * Get celsius GPU temperature which could make GPU runs slower + * @return temperature + */ + @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class) + @XmlElement(name = "gpu_temp_slow_threshold") + public Float getSlowThresholdGpuTemp() { + return slowThresholdGpuTemp; + } + + public void setSlowThresholdGpuTemp(Float slowThresholdGpuTemp) { + this.slowThresholdGpuTemp = slowThresholdGpuTemp; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java new file mode 100644 index 00000000000..4ef218ba7ea --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter; + +/** + * GPU utilizations + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +@XmlRootElement(name = "utilization") +public class PerGpuUtilizations { + private float overallGpuUtilization; + + /** + * Overall percent GPU utilization + * @return utilization + */ + @XmlJavaTypeAdapter(PerGpuDeviceInformation.StrToFloatBeforeSpaceAdapter.class) + @XmlElement(name = "gpu_util") + public Float getOverallGpuUtilization() { + return overallGpuUtilization; + } + + public void setOverallGpuUtilization(Float overallGpuUtilization) { + this.overallGpuUtilization = overallGpuUtilization; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index 956b38c7276..a78b077d9b2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -285,3 +285,5 @@ int execute_regex_match(const char *regex_str, const char *input); * Return 0 on success. */ int validate_docker_image_name(const char *image_name); + +struct configuration* get_cfg(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index 930dabe5029..9cf34a0c4f4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -22,6 +22,8 @@ #include "util.h" #include "get_executable.h" #include "utils/string-utils.h" +#include "modules/gpu/gpu-module.h" +#include "modules/cgroups/cgroups-operations.h" #include #include @@ -241,6 +243,14 @@ static int validate_arguments(int argc, char **argv , int *operation) { return INVALID_ARGUMENT_NUMBER; } + /* + * Check if it is a known module, if yes, redirect to module + */ + if (strcmp("--module-gpu", argv[1]) == 0) { + return handle_gpu_request(&update_cgroups_parameters, "gpu", argc - 1, + &argv[1]); + } + if (strcmp("--checksetup", argv[1]) == 0) { *operation = CHECK_SETUP; return 0; @@ -325,6 +335,7 @@ static int validate_arguments(int argc, char **argv , int *operation) { return FEATURE_DISABLED; } } + /* Now we have to validate 'run as user' operations that don't use a 'long option' - we should fix this at some point. The validation/argument parsing here is extensive enough that it done in a separate function */ diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c new file mode 100644 index 00000000000..b23410928bf --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "configuration.h" +#include "container-executor.h" +#include "utils/string-utils.h" +#include "utils/path-utils.h" +#include "modules/common/module-configs.h" +#include "modules/common/constants.h" +#include "modules/cgroups/cgroups-operations.h" +#include "util.h" + +#include +#include +#include +#include + +#define MAX_PATH_LEN 4096 + +static const struct section* cgroup_cfg_section = NULL; + +void reload_cgroups_configuration() { + cgroup_cfg_section = get_configuration_section(CGROUPS_SECTION_NAME, get_cfg()); +} + +char* get_cgroups_path_to_write( + const char* hierarchy_name, + const char* param_name, + const char* group_id) { + int failed = 0; + char* buffer = NULL; + const char* cgroups_root = get_section_value(CGROUPS_ROOT_KEY, + cgroup_cfg_section); + const char* yarn_hierarchy_name = get_section_value( + CGROUPS_YARN_HIERARCHY_KEY, cgroup_cfg_section); + + // Make sure it is defined. + if (!cgroups_root || cgroups_root[0] == 0) { + fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n", + CGROUPS_ROOT_KEY); + failed = 1; + goto cleanup; + } + + // Make sure it is defined. + if (!yarn_hierarchy_name || yarn_hierarchy_name[0] == 0) { + fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n", + CGROUPS_YARN_HIERARCHY_KEY); + failed = 1; + goto cleanup; + } + + buffer = malloc(MAX_PATH_LEN + 1); + if (!buffer) { + fprintf(ERRORFILE, "Failed to allocate memory for output path.\n"); + failed = 1; + goto cleanup; + } + + // Make a path. + // CGroups path should not be too long. + if (snprintf(buffer, MAX_PATH_LEN, "%s/%s/%s/%s/%s.%s", + cgroups_root, hierarchy_name, yarn_hierarchy_name, + group_id, hierarchy_name, param_name) < 0) { + fprintf(ERRORFILE, "Failed to print output path.\n"); + failed = 1; + goto cleanup; + } + +cleanup: + if (failed) { + if (buffer) { + free(buffer); + } + return NULL; + } + return buffer; +} + +int update_cgroups_parameters( + const char* hierarchy_name, + const char* param_name, + const char* group_id, + const char* value) { +#ifndef __linux + fprintf(ERRORFILE, "Failed to update cgroups parameters, not supported\n"); + return -1; +#endif + int failure = 0; + + if (!cgroup_cfg_section) { + reload_cgroups_configuration(); + } + + char* full_path = get_cgroups_path_to_write(hierarchy_name, param_name, + group_id); + + if (!full_path) { + fprintf(ERRORFILE, + "Failed to get cgroups path to write, it should be a configuration issue"); + failure = 1; + goto cleanup; + } + + if (!verify_path_safety(full_path)) { + failure = 1; + goto cleanup; + } + + // Make sure file exists + struct stat sb; + if (stat(full_path, &sb) != 0) { + fprintf(ERRORFILE, "CGroups: Could not find file to write, %s", full_path); + failure = 1; + goto cleanup; + } + + fprintf(ERRORFILE, "CGroups: Updating cgroups, path=%s, value=%s", + full_path, value); + + // Write values to file + FILE *f; + f = fopen(full_path, "a"); + if (!f) { + fprintf(ERRORFILE, "CGroups: Failed to open cgroups file, %s", full_path); + failure = 1; + goto cleanup; + } + if (fprintf(f, "%s", value) < 0) { + fprintf(ERRORFILE, "CGroups: Failed to write cgroups file, %s", full_path); + fclose(f); + failure = 1; + goto cleanup; + } + if (fclose(f) != 0) { + fprintf(ERRORFILE, "CGroups: Failed to close cgroups file, %s", full_path); + failure = 1; + goto cleanup; + } + +cleanup: + if (full_path) { + free(full_path); + } + return -failure; +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h new file mode 100644 index 00000000000..cf80bcf6059 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _CGROUPS_OPERATIONS_H_ +#define _CGROUPS_OPERATIONS_H_ + +#define CGROUPS_SECTION_NAME "cgroups" +#define CGROUPS_ROOT_KEY "root" +#define CGROUPS_YARN_HIERARCHY_KEY "yarn-hierarchy" + +/** + * Handle update CGroups parameter update requests: + * - hierarchy_name: e.g. devices / cpu,cpuacct + * - param_name: e.g. deny + * - group_id: e.g. container_x_y + * - value: e.g. "a *:* rwm" + * + * return 0 if succeeded + */ +int update_cgroups_parameters( + const char* hierarchy_name, + const char* param_name, + const char* group_id, + const char* value); + + /** + * Get CGroups path to update. Visible for testing. + * Return 0 if succeeded + */ + char* get_cgroups_path_to_write( + const char* hierarchy_name, + const char* param_name, + const char* group_id); + + /** + * Reload config from filesystem, visible for testing. + */ + void reload_cgroups_configuration(); + +#endif \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c new file mode 100644 index 00000000000..1a1b164f2ba --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "configuration.h" +#include "container-executor.h" +#include "utils/string-utils.h" +#include "modules/gpu/gpu-module.h" +#include "modules/cgroups/cgroups-operations.h" +#include "modules/common/module-configs.h" +#include "modules/common/constants.h" +#include "util.h" + +#include +#include +#include +#include +#include + +#define EXCLUDED_GPUS_OPTION "excluded_gpus" +#define CONTAINER_ID_OPTION "container_id" +#define DEFAULT_NVIDIA_MAJOR_NUMBER 195 +#define MAX_CONTAINER_ID_LEN 128 + +static const struct section* cfg_section; + +static int internal_handle_gpu_request( + update_cgroups_parameters_func update_cgroups_parameters_func_p, + size_t n_minor_devices_to_block, int minor_devices[], + const char* container_id) { + char* allowed_minor_numbers_str = NULL; + int* allowed_minor_numbers = NULL; + size_t n_allowed_minor_numbers = 0; + int return_code = 0; + + if (n_minor_devices_to_block == 0) { + // no device to block, just return; + return 0; + } + + // Get major device number from cfg, if not set, major number of (Nvidia) + // will be the default value. + int major_device_number; + char* major_number_str = get_section_value(GPU_MAJOR_NUMBER_CONFIG_KEY, + cfg_section); + if (!major_number_str || 0 == major_number_str[0]) { + // Default major number of Nvidia devices + major_device_number = DEFAULT_NVIDIA_MAJOR_NUMBER; + } else { + major_device_number = strtol(major_number_str, NULL, 0); + } + + // Get allowed minor device numbers from cfg, if not set, means all minor + // devices can be used by YARN + allowed_minor_numbers_str = get_section_value( + GPU_ALLOWED_DEVICES_MINOR_NUMBERS, + cfg_section); + if (!allowed_minor_numbers_str || 0 == allowed_minor_numbers_str[0]) { + allowed_minor_numbers = NULL; + } else { + int rc = get_numbers_split_by_comma(allowed_minor_numbers_str, + &allowed_minor_numbers, + &n_allowed_minor_numbers); + if (0 != rc) { + fprintf(ERRORFILE, + "Failed to get allowed minor device numbers from cfg, value=%s\n", + allowed_minor_numbers_str); + return_code = -1; + goto cleanup; + } + + // Make sure we're trying to black devices allowed in config + for (int i = 0; i < n_minor_devices_to_block; i++) { + int found = 0; + for (int j = 0; j < n_allowed_minor_numbers; j++) { + if (minor_devices[i] == allowed_minor_numbers[j]) { + found = 1; + break; + } + } + + if (!found) { + fprintf(ERRORFILE, + "Trying to blacklist device with minor-number=%d which is not on allowed list\n", + minor_devices[i]); + return_code = -1; + goto cleanup; + } + } + } + + // Use cgroup helpers to blacklist devices + for (int i = 0; i < n_minor_devices_to_block; i++) { + char param_value[128]; + memset(param_value, 0, sizeof(param_value)); + snprintf(param_value, sizeof(param_value), "c %d:%d rwm", + major_device_number, minor_devices[i]); + + int rc = update_cgroups_parameters_func_p("devices", "deny", + container_id, param_value); + + if (0 != rc) { + fprintf(ERRORFILE, "CGroups: Failed to update cgroups\n"); + return_code = -1; + goto cleanup; + } + } + +cleanup: + if (major_number_str) { + free(major_number_str); + } + if (allowed_minor_numbers) { + free(allowed_minor_numbers); + } + if (allowed_minor_numbers_str) { + free(allowed_minor_numbers_str); + } + + return return_code; +} + +void reload_gpu_configuration() { + cfg_section = get_configuration_section(GPU_MODULE_SECTION_NAME, get_cfg()); +} + +/* + * Format of GPU request commandline: + * + * c-e gpu --excluded_gpus 0,1,3 --container_id container_x_y + */ +int handle_gpu_request(update_cgroups_parameters_func func, + const char* module_name, int module_argc, char** module_argv) { + if (!cfg_section) { + reload_gpu_configuration(); + } + + if (!module_enabled(cfg_section, GPU_MODULE_SECTION_NAME)) { + fprintf(ERRORFILE, + "Please make sure gpu module is enabled before using it.\n"); + return -1; + } + + static struct option long_options[] = { + {EXCLUDED_GPUS_OPTION, required_argument, 0, 'e' }, + {CONTAINER_ID_OPTION, required_argument, 0, 'c' }, + {0, 0, 0, 0} + }; + + int rc = 0; + int c = 0; + int option_index = 0; + + int* minor_devices = NULL; + char container_id[MAX_CONTAINER_ID_LEN]; + memset(container_id, 0, sizeof(container_id)); + size_t n_minor_devices_to_block = 0; + int failed = 0; + + optind = 1; + while((c = getopt_long(module_argc, module_argv, "e:c:", + long_options, &option_index)) != -1) { + switch(c) { + case 'e': + rc = get_numbers_split_by_comma(optarg, &minor_devices, + &n_minor_devices_to_block); + if (0 != rc) { + fprintf(ERRORFILE, + "Failed to get minor devices number from command line, value=%s\n", + optarg); + failed = 1; + goto cleanup; + } + break; + case 'c': + if (!validate_container_id(optarg)) { + fprintf(ERRORFILE, + "Specified container_id=%s is invalid\n", optarg); + failed = 1; + goto cleanup; + } + strncpy(container_id, optarg, MAX_CONTAINER_ID_LEN); + break; + default: + fprintf(ERRORFILE, + "Unknown option in gpu command character %d %c, optionindex = %d\n", + c, c, optind); + failed = 1; + goto cleanup; + } + } + + if (0 == container_id[0]) { + fprintf(ERRORFILE, + "[%s] --container_id must be specified.\n", __func__); + failed = 1; + goto cleanup; + } + + if (!minor_devices) { + // Minor devices is null, skip following call. + fprintf(ERRORFILE, "is not specified, skip cgroups call.\n"); + goto cleanup; + } + + failed = internal_handle_gpu_request(func, n_minor_devices_to_block, + minor_devices, + container_id); + +cleanup: + if (minor_devices) { + free(minor_devices); + } + return failed; +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h new file mode 100644 index 00000000000..59d4c7e9cb1 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#ifndef _MODULES_GPU_GPU_MUDULE_H_ +#define _MODULES_GPU_GPU_MUDULE_H_ + +#define GPU_MAJOR_NUMBER_CONFIG_KEY "gpu.major-device-number" +#define GPU_ALLOWED_DEVICES_MINOR_NUMBERS "gpu.allowed-device-minor-numbers" +#define GPU_MODULE_SECTION_NAME "gpu" + +// For unit test stubbing +typedef int (*update_cgroups_parameters_func)(const char*, const char*, + const char*, const char*); + +/** + * Handle gpu requests + */ +int handle_gpu_request(update_cgroups_parameters_func func, + const char* module_name, int module_argc, char** module_argv); + +/** + * Reload config from filesystem, visible for testing. + */ +void reload_gpu_configuration(); + +#endif \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc new file mode 100644 index 00000000000..8ffbe884a64 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern "C" { +#include "configuration.h" +#include "container-executor.h" +#include "modules/cgroups/cgroups-operations.h" +#include "test/test-container-executor-common.h" +#include "util.h" +} + +namespace ContainerExecutor { + +class TestCGroupsModule : public ::testing::Test { +protected: + virtual void SetUp() { + if (mkdirs(TEST_ROOT, 0755) != 0) { + fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT); + exit(1); + } + LOGFILE = stdout; + ERRORFILE = stderr; + } + + virtual void TearDown() {} +}; + +TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_root) { + // Write config file. + const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg"; + FILE *file = fopen(filename, "w"); + if (file == NULL) { + printf("FAIL: Could not open configuration file: %s\n", filename); + exit(1); + } + fprintf(file, "[cgroups]\n"); + fprintf(file, "yarn-hierarchy=yarn\n"); + fclose(file); + + // Read config file + read_executor_config(filename); + reload_cgroups_configuration(); + + char* path = get_cgroups_path_to_write("devices", "deny", "container_1"); + + ASSERT_TRUE(NULL == path) << "Should fail.\n"; +} + +TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_yarn_hierarchy) { + // Write config file. + const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg"; + FILE *file = fopen(filename, "w"); + + ASSERT_TRUE(file) << "FAIL: Could not open configuration file: " << filename + << "\n"; + fprintf(file, "[cgroups]\n"); + fprintf(file, "root=/sys/fs/cgroups\n"); + fclose(file); + + // Read config file + read_executor_config(filename); + reload_cgroups_configuration(); + char* path = get_cgroups_path_to_write("devices", "deny", "container_1"); + + ASSERT_TRUE(NULL == path) << "Should fail.\n"; +} + +TEST_F(TestCGroupsModule, test_cgroups_get_path_succeeded) { + // Write config file. + const char *filename = TEST_ROOT "/test_cgroups_get_path.cfg"; + FILE *file = fopen(filename, "w"); + + ASSERT_TRUE(file) << "FAIL: Could not open configuration file\n"; + fprintf(file, "[cgroups]\n"); + fprintf(file, "root=/sys/fs/cgroups \n"); + fprintf(file, "yarn-hierarchy=yarn \n"); + fclose(file); + + // Read config file + read_executor_config(filename); + reload_cgroups_configuration(); + + char* path = get_cgroups_path_to_write("devices", "deny", "container_1"); + ASSERT_TRUE(NULL != path) << "Should success.\n"; + + const char *EXPECTED = + "/sys/fs/cgroups/devices/yarn/container_1/devices.deny"; + + ASSERT_STREQ(EXPECTED, path) + << "Return cgroup-path-to-write is not expected\n"; +} +} // namespace ContainerExecutor \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc new file mode 100644 index 00000000000..b3d93dcecf3 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern "C" { +#include "configuration.h" +#include "container-executor.h" +#include "modules/cgroups/cgroups-operations.h" +#include "modules/gpu/gpu-module.h" +#include "test/test-container-executor-common.h" +#include "util.h" +} + +namespace ContainerExecutor { + +class TestGpuModule : public ::testing::Test { +protected: + virtual void SetUp() { + if (mkdirs(TEST_ROOT, 0755) != 0) { + fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT); + exit(1); + } + LOGFILE = stdout; + ERRORFILE = stderr; + } + + virtual void TearDown() { + + } +}; + +static std::vector cgroups_parameters_invoked; + +static int mock_update_cgroups_parameters( + const char* controller_name, + const char* param_name, + const char* group_id, + const char* value) { + char* buf = (char*) malloc(128); + strcpy(buf, controller_name); + cgroups_parameters_invoked.push_back(buf); + + buf = (char*) malloc(128); + strcpy(buf, param_name); + cgroups_parameters_invoked.push_back(buf); + + buf = (char*) malloc(128); + strcpy(buf, group_id); + cgroups_parameters_invoked.push_back(buf); + + buf = (char*) malloc(128); + strcpy(buf, value); + cgroups_parameters_invoked.push_back(buf); + return 0; +} + +static void verify_param_updated_to_cgroups( + int argc, const char** argv) { + ASSERT_EQ(argc, cgroups_parameters_invoked.size()); + + int offset = 0; + while (offset < argc) { + ASSERT_STREQ(argv[offset], cgroups_parameters_invoked[offset]); + offset++; + } +} + +static void write_and_load_gpu_module_to_cfg(const char* cfg_filepath, int enabled) { + FILE *file = fopen(cfg_filepath, "w"); + if (file == NULL) { + printf("FAIL: Could not open configuration file: %s\n", cfg_filepath); + exit(1); + } + fprintf(file, "[gpu]\n"); + if (enabled) { + fprintf(file, "module.enabled=true\n"); + } else { + fprintf(file, "module.enabled=false\n"); + } + fclose(file); + + // Read config file + read_executor_config(cfg_filepath); + reload_gpu_configuration(); +} + +static void test_gpu_module_enabled_disabled(int enabled) { + // Write config file. + const char *filename = TEST_ROOT "/test_cgroups_module_enabled_disabled.cfg"; + write_and_load_gpu_module_to_cfg(filename, enabled); + + char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1", + (char*) "--container_id", + (char*) "container_1498064906505_0001_01_000001" }; + + int rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 5, argv); + + int EXPECTED_RC; + if (enabled) { + EXPECTED_RC = 0; + } else { + EXPECTED_RC = -1; + } + ASSERT_EQ(EXPECTED_RC, rc); +} + +TEST_F(TestGpuModule, test_verify_gpu_module_calls_cgroup_parameter) { + // Write config file. + const char *filename = TEST_ROOT "/test_verify_gpu_module_calls_cgroup_parameter.cfg"; + write_and_load_gpu_module_to_cfg(filename, 1); + + char* container_id = (char*) "container_1498064906505_0001_01_000001"; + char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1", + (char*) "--container_id", + container_id }; + + /* Test case 1: block 2 devices */ + cgroups_parameters_invoked.clear(); + int rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 5, argv); + ASSERT_EQ(0, rc) << "Should success.\n"; + + // Verify cgroups parameters + const char* expected_cgroups_argv[] = { "devices", "deny", container_id, "c 195:0 rwm", + "devices", "deny", container_id, "c 195:1 rwm"}; + verify_param_updated_to_cgroups(8, expected_cgroups_argv); + + /* Test case 2: block 0 devices */ + cgroups_parameters_invoked.clear(); + char* argv_1[] = { (char*) "--module-gpu", (char*) "--container_id", container_id }; + rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 3, argv_1); + ASSERT_EQ(0, rc) << "Should success.\n"; + + // Verify cgroups parameters + verify_param_updated_to_cgroups(0, NULL); + + /* Test case 3: block 2 non-sequential devices */ + cgroups_parameters_invoked.clear(); + char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "1,3", + (char*) "--container_id", container_id }; + rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 5, argv_2); + ASSERT_EQ(0, rc) << "Should success.\n"; + + // Verify cgroups parameters + const char* expected_cgroups_argv_2[] = { "devices", "deny", container_id, "c 195:1 rwm", + "devices", "deny", container_id, "c 195:3 rwm"}; + verify_param_updated_to_cgroups(8, expected_cgroups_argv_2); +} + +TEST_F(TestGpuModule, test_illegal_cli_parameters) { + // Write config file. + const char *filename = TEST_ROOT "/test_illegal_cli_parameters.cfg"; + write_and_load_gpu_module_to_cfg(filename, 1); + + // Illegal container id - 1 + char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1", + (char*) "--container_id", (char*) "xxxx" }; + int rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 5, argv); + ASSERT_NE(0, rc) << "Should fail.\n"; + + // Illegal container id - 2 + char* argv_1[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1", + (char*) "--container_id", (char*) "container_1" }; + rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 5, argv_1); + ASSERT_NE(0, rc) << "Should fail.\n"; + + // Illegal container id - 3 + char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1" }; + rc = handle_gpu_request(&mock_update_cgroups_parameters, + "gpu", 3, argv_2); + ASSERT_NE(0, rc) << "Should fail.\n"; +} + +TEST_F(TestGpuModule, test_gpu_module_disabled) { + test_gpu_module_enabled_disabled(0); +} + +TEST_F(TestGpuModule, test_gpu_module_enabled) { + test_gpu_module_enabled_disabled(1); +} +} // namespace ContainerExecutor \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c index 9e85b3fbf52..235ea77a270 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c @@ -1392,7 +1392,6 @@ int main(int argc, char **argv) { #endif test_trim_function(); - run("rm -fr " TEST_ROOT); printf("\nFinished tests\n"); free(current_username); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java new file mode 100644 index 00000000000..13b3ee91bdc --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.net.ServerSocketUtil; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.api.ResourceTracker; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.NodeHeartbeatResponsePBImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeManagerResponsePBImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UnRegisterNodeManagerResponsePBImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.junit.Assert; +import org.junit.Before; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; + +public class NodeManagerTestBase { + // temp fix until metrics system can auto-detect itself running in unit test: + static { + DefaultMetricsSystem.setMiniClusterMode(true); + } + + protected static final Logger LOG = + LoggerFactory.getLogger(TestNodeStatusUpdater.class); + protected static final File basedir = + new File("target", TestNodeStatusUpdater.class.getName()); + protected static final File nmLocalDir = new File(basedir, "nm0"); + protected static final File tmpDir = new File(basedir, "tmpDir"); + protected static final File remoteLogsDir = new File(basedir, "remotelogs"); + protected static final File logsDir = new File(basedir, "logs"); + protected static final RecordFactory recordFactory = RecordFactoryProvider + .getRecordFactory(null); + protected Configuration conf; + + protected YarnConfiguration createNMConfig() throws IOException { + return createNMConfig(ServerSocketUtil.getPort(49170, 10)); + } + + protected YarnConfiguration createNMConfig(int port) throws IOException { + YarnConfiguration conf = new YarnConfiguration(); + String localhostAddress = null; + try { + localhostAddress = InetAddress.getByName("localhost") + .getCanonicalHostName(); + } catch (UnknownHostException e) { + Assert.fail("Unable to get localhost address: " + e.getMessage()); + } + conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB + conf.set(YarnConfiguration.NM_ADDRESS, localhostAddress + ":" + port); + conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, localhostAddress + ":" + + ServerSocketUtil.getPort(49160, 10)); + conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + remoteLogsDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); + return conf; + } + + public static class BaseResourceTrackerForTest implements ResourceTracker { + @Override + public RegisterNodeManagerResponse registerNodeManager( + RegisterNodeManagerRequest request) throws YarnException, IOException { + return new RegisterNodeManagerResponsePBImpl(); + } + + @Override + public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) + throws YarnException, IOException { + return new NodeHeartbeatResponsePBImpl(); + } + + @Override + public UnRegisterNodeManagerResponse unRegisterNodeManager( + UnRegisterNodeManagerRequest request) + throws YarnException, IOException { + return new UnRegisterNodeManagerResponsePBImpl(); + } + } + + protected static class BaseNodeStatusUpdaterForTest extends NodeStatusUpdaterImpl { + public ResourceTracker resourceTracker; + protected Context context; + + public BaseNodeStatusUpdaterForTest(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, + ResourceTracker resourceTracker) { + super(context, dispatcher, healthChecker, metrics); + this.context = context; + this.resourceTracker = resourceTracker; + } + @Override + protected ResourceTracker getRMClient() { + return resourceTracker; + } + + @Override + protected void stopRMProxy() { + return; + } + } + + public class MyContainerManager extends ContainerManagerImpl { + public boolean signaled = false; + + public MyContainerManager(Context context, ContainerExecutor exec, + DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater, + NodeManagerMetrics metrics, + LocalDirsHandlerService dirsHandler) { + super(context, exec, deletionContext, nodeStatusUpdater, + metrics, dirsHandler); + } + + @Override + public void handle(ContainerManagerEvent event) { + if (event.getType() == ContainerManagerEventType.SIGNAL_CONTAINERS) { + signaled = true; + } + } + } + + @Before + public void setUp() throws IOException { + nmLocalDir.mkdirs(); + tmpDir.mkdirs(); + logsDir.mkdirs(); + remoteLogsDir.mkdirs(); + conf = createNMConfig(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java index 2e9eff529cd..9b180c7eff6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java @@ -178,7 +178,7 @@ public void testDirPermissions() throws Exception { FileContext lfs = FileContext.getLocalFSFileContext(conf); DefaultContainerExecutor executor = new DefaultContainerExecutor(lfs); executor.setConf(conf); - executor.init(); + executor.init(null); try { executor.createUserLocalDirs(localDirs, user); @@ -317,7 +317,7 @@ public Object answer(InvocationOnMock invocationOnMock) Path workDir = localDir; Path pidFile = new Path(workDir, "pid.txt"); - mockExec.init(); + mockExec.init(null); mockExec.activateContainer(cId, pidFile); int ret = mockExec.launchContainer(new ContainerStartContext.Builder() .setContainer(container) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDockerContainerExecutorWithMocks.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDockerContainerExecutorWithMocks.java index f1194c9c095..7e1752b737b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDockerContainerExecutorWithMocks.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDockerContainerExecutorWithMocks.java @@ -116,7 +116,7 @@ public void tearDown() { public void testContainerInitSecure() throws IOException { dockerContainerExecutor.getConf().set( CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); - dockerContainerExecutor.init(); + dockerContainerExecutor.init(mock(Context.class)); } @Test(expected = IllegalArgumentException.class) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java index cf8d977c2bf..95c8f5e685c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java @@ -628,7 +628,7 @@ public void testPostExecuteAfterReacquisition() throws Exception { LinuxContainerExecutor lce = new LinuxContainerExecutor(); lce.setConf(conf); try { - lce.init(); + lce.init(null); } catch (IOException e) { // expected if LCE isn't setup right, but not necessary for this test } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 79b88cf5eed..249e017dc43 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -426,7 +426,7 @@ public Object answer(InvocationOnMock invocationOnMock) @Test public void testInit() throws Exception { - mockExec.init(); + mockExec.init(mock(Context.class)); assertEquals(Arrays.asList("--checksetup"), readMockParams()); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java index 92797116075..b31215b0f3d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java @@ -37,7 +37,7 @@ public static final class InvalidContainerExecutor extends DefaultContainerExecutor { @Override - public void init() throws IOException { + public void init(Context nmContext) throws IOException { throw new IOException("dummy executor init called"); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 055dab44897..533cf2a6c7b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -20,16 +20,14 @@ import static org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils.newNodeHeartbeatResponse; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.EOFException; import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.net.InetSocketAddress; -import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; @@ -80,8 +78,6 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; -import org.apache.hadoop.yarn.factories.RecordFactory; -import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatResponseProto; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceTracker; @@ -117,41 +113,14 @@ import org.junit.Test; @SuppressWarnings("rawtypes") -public class TestNodeStatusUpdater { - - // temp fix until metrics system can auto-detect itself running in unit test: - static { - DefaultMetricsSystem.setMiniClusterMode(true); - } - - static final Logger LOG = - LoggerFactory.getLogger(TestNodeStatusUpdater.class); - static final File basedir = - new File("target", TestNodeStatusUpdater.class.getName()); - static final File nmLocalDir = new File(basedir, "nm0"); - static final File tmpDir = new File(basedir, "tmpDir"); - static final File remoteLogsDir = new File(basedir, "remotelogs"); - static final File logsDir = new File(basedir, "logs"); - private static final RecordFactory recordFactory = RecordFactoryProvider - .getRecordFactory(null); - +public class TestNodeStatusUpdater extends NodeManagerTestBase { volatile int heartBeatID = 0; volatile Throwable nmStartError = null; private final List registeredNodes = new ArrayList(); private boolean triggered = false; - private Configuration conf; private NodeManager nm; private AtomicBoolean assertionFailedInThread = new AtomicBoolean(false); - @Before - public void setUp() throws IOException { - nmLocalDir.mkdirs(); - tmpDir.mkdirs(); - logsDir.mkdirs(); - remoteLogsDir.mkdirs(); - conf = createNMConfig(); - } - @After public void tearDown() { this.registeredNodes.clear(); @@ -332,29 +301,7 @@ public UnRegisterNodeManagerResponse unRegisterNodeManager( } } - private class MyContainerManager extends ContainerManagerImpl { - public boolean signaled = false; - - public MyContainerManager(Context context, ContainerExecutor exec, - DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater, - NodeManagerMetrics metrics, - LocalDirsHandlerService dirsHandler) { - super(context, exec, deletionContext, nodeStatusUpdater, - metrics, dirsHandler); - } - - @Override - public void handle(ContainerManagerEvent event) { - if (event.getType() == ContainerManagerEventType.SIGNAL_CONTAINERS) { - signaled = true; - } - } - } - - private class MyNodeStatusUpdater extends NodeStatusUpdaterImpl { - public ResourceTracker resourceTracker; - private Context context; - + private class MyNodeStatusUpdater extends BaseNodeStatusUpdaterForTest { public MyNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { this(context, dispatcher, healthChecker, metrics, false); @@ -363,19 +310,8 @@ public MyNodeStatusUpdater(Context context, Dispatcher dispatcher, public MyNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, boolean signalContainer) { - super(context, dispatcher, healthChecker, metrics); - this.context = context; - resourceTracker = new MyResourceTracker(this.context, signalContainer); - } - - @Override - protected ResourceTracker getRMClient() { - return resourceTracker; - } - - @Override - protected void stopRMProxy() { - return; + super(context, dispatcher, healthChecker, metrics, + new MyResourceTracker(context, signalContainer)); } } @@ -1818,7 +1754,6 @@ public void run() { Assert.assertTrue("Test failed with exception(s)" + exceptions, exceptions.isEmpty()); } - // Add new containers info into NM context each time node heart beats. private class MyNMContext extends NMContext { @@ -1922,31 +1857,6 @@ private void verifyNodeStartFailure(String errMessage) throws Exception { this.registeredNodes.size()); } - private YarnConfiguration createNMConfig(int port) throws IOException { - YarnConfiguration conf = new YarnConfiguration(); - String localhostAddress = null; - try { - localhostAddress = InetAddress.getByName("localhost") - .getCanonicalHostName(); - } catch (UnknownHostException e) { - Assert.fail("Unable to get localhost address: " + e.getMessage()); - } - conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB - conf.set(YarnConfiguration.NM_ADDRESS, localhostAddress + ":" + port); - conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, localhostAddress + ":" - + ServerSocketUtil.getPort(49160, 10)); - conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); - conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, - remoteLogsDir.getAbsolutePath()); - conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); - conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); - return conf; - } - - private YarnConfiguration createNMConfig() throws IOException { - return createNMConfig(ServerSocketUtil.getPort(49170, 10)); - } - private NodeManager getNodeManager(final NodeAction nodeHeartBeatAction) { return new NodeManager() { @Override diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 3c432d30338..4b4f3566f31 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -18,26 +18,6 @@ package org.apache.hadoop.yarn.server.nodemanager.amrmproxy; -import java.io.IOException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; @@ -66,6 +46,7 @@ import org.apache.hadoop.yarn.server.api.records.AppCollectorData; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; @@ -74,18 +55,37 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredAMRMProxyState; -import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher; +import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.util.Records; import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; /** * Base class for all the AMRMProxyService test cases. It provides utility @@ -805,5 +805,9 @@ public void setNMTimelinePublisher(NMTimelinePublisher nmMetricsPublisher) { public NMTimelinePublisher getNMTimelinePublisher() { return null; } + + public ResourcePluginManager getResourcePluginManager() { + return null; + } } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 8980a49d51b..52fa9f3eda1 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.io.Serializable; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; @@ -91,6 +92,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; @@ -110,6 +112,7 @@ import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -457,7 +460,7 @@ public void testContainerResizeRecovery() throws Exception { NMStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.init(conf); stateStore.start(); - Context context = createContext(conf, stateStore); + context = createContext(conf, stateStore); ContainerManagerImpl cm = createContainerManager(context, delSrvc); ((NMContext) context).setContainerManager(cm); cm.init(conf); @@ -467,55 +470,12 @@ public void testContainerResizeRecovery() throws Exception { ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); ContainerId cid = ContainerId.newContainerId(attemptId, 1); - Map containerEnv = new HashMap<>(); - setFlowContext(containerEnv, "app_name1", appId); - Map serviceData = Collections.emptyMap(); - Credentials containerCreds = new Credentials(); - DataOutputBuffer dob = new DataOutputBuffer(); - containerCreds.writeTokenStorageToStream(dob); - ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, - dob.getLength()); - Map acls = Collections.emptyMap(); - File tmpDir = new File("target", - this.getClass().getSimpleName() + "-tmpDir"); - File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); - PrintWriter fileWriter = new PrintWriter(scriptFile); - if (Shell.WINDOWS) { - fileWriter.println("@ping -n 100 127.0.0.1 >nul"); - } else { - fileWriter.write("\numask 0"); - fileWriter.write("\nexec sleep 100"); - } - fileWriter.close(); - FileContext localFS = FileContext.getLocalFSFileContext(); - URL resource_alpha = - URL.fromPath(localFS - .makeQualified(new Path(scriptFile.getAbsolutePath()))); - LocalResource rsrc_alpha = RecordFactoryProvider - .getRecordFactory(null).newRecordInstance(LocalResource.class); - rsrc_alpha.setResource(resource_alpha); - rsrc_alpha.setSize(-1); - rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); - rsrc_alpha.setType(LocalResourceType.FILE); - rsrc_alpha.setTimestamp(scriptFile.lastModified()); - String destinationFile = "dest_file"; - Map localResources = new HashMap<>(); - localResources.put(destinationFile, rsrc_alpha); - List commands = - Arrays.asList(Shell.getRunScriptCommand(scriptFile)); - ContainerLaunchContext clc = ContainerLaunchContext.newInstance( - localResources, containerEnv, commands, serviceData, - containerTokens, acls); - StartContainersResponse startResponse = startContainer( - context, cm, cid, clc, null); - assertTrue(startResponse.getFailedRequests().isEmpty()); - assertEquals(1, context.getApplications().size()); + + commonLaunchContainer(appId, cid, cm); + Application app = context.getApplications().get(appId); assertNotNull(app); - // make sure the container reaches RUNNING state - waitForNMContainerState(cm, cid, - org.apache.hadoop.yarn.server.nodemanager - .containermanager.container.ContainerState.RUNNING); + Resource targetResource = Resource.newInstance(2048, 2); ContainerUpdateResponse updateResponse = updateContainers(context, cm, cid, targetResource); @@ -538,6 +498,63 @@ public void testContainerResizeRecovery() throws Exception { assertEquals(targetResource, containerStatus.getCapability()); } + @Test + public void testResourceMappingRecoveryForContainer() throws Exception { + conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true); + NMStateStoreService stateStore = new NMMemoryStateStoreService(); + stateStore.init(conf); + stateStore.start(); + context = createContext(conf, stateStore); + ContainerManagerImpl cm = createContainerManager(context, delSrvc); + ((NMContext) context).setContainerManager(cm); + cm.init(conf); + cm.start(); + + // add an application by starting a container + ApplicationId appId = ApplicationId.newInstance(0, 1); + ApplicationAttemptId attemptId = + ApplicationAttemptId.newInstance(appId, 1); + ContainerId cid = ContainerId.newContainerId(attemptId, 1); + + commonLaunchContainer(appId, cid, cm); + + Container nmContainer = context.getContainers().get(cid); + + Application app = context.getApplications().get(appId); + assertNotNull(app); + + // store resource mapping of the container + List gpuResources = + Arrays.asList("1", "2", "3"); + stateStore.storeAssignedResources(nmContainer, "gpu", gpuResources); + List numaResources = Arrays.asList("numa1"); + stateStore.storeAssignedResources(nmContainer, "numa", numaResources); + List fpgaResources = + Arrays.asList("fpga1", "fpga2"); + stateStore.storeAssignedResources(nmContainer, "fpga", fpgaResources); + + cm.stop(); + context = createContext(conf, stateStore); + cm = createContainerManager(context); + ((NMContext) context).setContainerManager(cm); + cm.init(conf); + cm.start(); + assertEquals(1, context.getApplications().size()); + app = context.getApplications().get(appId); + assertNotNull(app); + + Assert.assertNotNull(nmContainer); + ResourceMappings resourceMappings = nmContainer.getResourceMappings(); + List assignedResource = resourceMappings + .getAssignedResources("gpu"); + Assert.assertTrue(assignedResource.equals(gpuResources)); + Assert.assertTrue( + resourceMappings.getAssignedResources("numa").equals(numaResources)); + Assert.assertTrue( + resourceMappings.getAssignedResources("fpga").equals(fpgaResources)); + } + @Test public void testContainerCleanupOnShutdown() throws Exception { ApplicationId appId = ApplicationId.newInstance(0, 1); @@ -610,6 +627,57 @@ public void testContainerCleanupOnShutdown() throws Exception { verify(cm, never()).handle(isA(CMgrCompletedAppsEvent.class)); } + private void commonLaunchContainer(ApplicationId appId, ContainerId cid, + ContainerManagerImpl cm) throws Exception { + Map containerEnv = new HashMap<>(); + setFlowContext(containerEnv, "app_name1", appId); + Map serviceData = Collections.emptyMap(); + Credentials containerCreds = new Credentials(); + DataOutputBuffer dob = new DataOutputBuffer(); + containerCreds.writeTokenStorageToStream(dob); + ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, + dob.getLength()); + Map acls = Collections.emptyMap(); + File tmpDir = new File("target", + this.getClass().getSimpleName() + "-tmpDir"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + if (Shell.WINDOWS) { + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + FileContext localFS = FileContext.getLocalFSFileContext(); + URL resource_alpha = + URL.fromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource rsrc_alpha = RecordFactoryProvider + .getRecordFactory(null).newRecordInstance(LocalResource.class); + rsrc_alpha.setResource(resource_alpha); + rsrc_alpha.setSize(-1); + rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); + rsrc_alpha.setType(LocalResourceType.FILE); + rsrc_alpha.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = new HashMap<>(); + localResources.put(destinationFile, rsrc_alpha); + List commands = + Arrays.asList(Shell.getRunScriptCommand(scriptFile)); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, containerEnv, commands, serviceData, + containerTokens, acls); + StartContainersResponse startResponse = startContainer( + context, cm, cid, clc, null); + assertTrue(startResponse.getFailedRequests().isEmpty()); + assertEquals(1, context.getApplications().size()); + // make sure the container reaches RUNNING state + waitForNMContainerState(cm, cid, + org.apache.hadoop.yarn.server.nodemanager + .containermanager.container.ContainerState.RUNNING); + } + private ContainerManagerImpl createContainerManager(Context context, DeletionService delSrvc) { return new ContainerManagerImpl(context, exec, delSrvc, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java index e5414a587f1..0563694f004 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestResourceHandlerModule.java @@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -30,6 +31,8 @@ import java.util.List; +import static org.mockito.Mockito.mock; + public class TestResourceHandlerModule { private static final Logger LOG = LoggerFactory.getLogger(TestResourceHandlerModule.class); @@ -62,7 +65,7 @@ public void testOutboundBandwidthHandler() { //Ensure that outbound bandwidth resource handler is present in the chain ResourceHandlerChain resourceHandlerChain = ResourceHandlerModule - .getConfiguredResourceHandlerChain(networkEnabledConf); + .getConfiguredResourceHandlerChain(networkEnabledConf, mock(Context.class)); List resourceHandlers = resourceHandlerChain .getResourceHandlerList(); //Exactly one resource handler in chain @@ -88,7 +91,8 @@ public void testDiskResourceHandler() throws Exception { Assert.assertNotNull(handler); ResourceHandlerChain resourceHandlerChain = - ResourceHandlerModule.getConfiguredResourceHandlerChain(diskConf); + ResourceHandlerModule.getConfiguredResourceHandlerChain(diskConf, + mock(Context.class)); List resourceHandlers = resourceHandlerChain.getResourceHandlerList(); // Exactly one resource handler in chain diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java new file mode 100644 index 00000000000..7a3bd028994 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java @@ -0,0 +1,480 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; +import org.apache.hadoop.yarn.util.resource.TestResourceUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyList; +import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestGpuResourceHandler { + private CGroupsHandler mockCGroupsHandler; + private PrivilegedOperationExecutor mockPrivilegedExecutor; + private GpuResourceHandlerImpl gpuResourceHandler; + private NMStateStoreService mockNMStateStore; + private ConcurrentHashMap runningContainersMap; + + @Before + public void setup() { + TestResourceUtils.addNewTypesToResources(ResourceInformation.GPU_URI); + + mockCGroupsHandler = mock(CGroupsHandler.class); + mockPrivilegedExecutor = mock(PrivilegedOperationExecutor.class); + mockNMStateStore = mock(NMStateStoreService.class); + + Context nmctx = mock(Context.class); + when(nmctx.getNMStateStore()).thenReturn(mockNMStateStore); + runningContainersMap = new ConcurrentHashMap<>(); + when(nmctx.getContainers()).thenReturn(runningContainersMap); + + gpuResourceHandler = new GpuResourceHandlerImpl(nmctx, mockCGroupsHandler, + mockPrivilegedExecutor); + } + + @Test + public void testBootStrap() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0"); + + GpuDiscoverer.getInstance().initialize(conf); + + gpuResourceHandler.bootstrap(conf); + verify(mockCGroupsHandler, times(1)).initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); + } + + private static ContainerId getContainerId(int id) { + return ContainerId.newContainerId(ApplicationAttemptId + .newInstance(ApplicationId.newInstance(1234L, 1), 1), id); + } + + private static Container mockContainerWithGpuRequest(int id, int numGpuRequest, + boolean dockerContainerEnabled) { + Container c = mock(Container.class); + when(c.getContainerId()).thenReturn(getContainerId(id)); + + Resource res = Resource.newInstance(1024, 1); + ResourceMappings resMapping = new ResourceMappings(); + + res.setResourceValue(ResourceInformation.GPU_URI, numGpuRequest); + when(c.getResource()).thenReturn(res); + when(c.getResourceMappings()).thenReturn(resMapping); + + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + Map env = new HashMap<>(); + if (dockerContainerEnabled) { + env.put(ContainerRuntimeConstants.ENV_CONTAINER_TYPE, "docker"); + } + when(clc.getEnvironment()).thenReturn(env); + when(c.getLaunchContext()).thenReturn(clc); + return c; + } + + private static Container mockContainerWithGpuRequest(int id, + int numGpuRequest) { + return mockContainerWithGpuRequest(id, numGpuRequest, false); + } + + private void verifyDeniedDevices(ContainerId containerId, + List deniedDevices) + throws ResourceHandlerException, PrivilegedOperationException { + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, containerId.toString()); + + if (null != deniedDevices && !deniedDevices.isEmpty()) { + List deniedDevicesMinorNumber = new ArrayList<>(); + for (GpuDevice deniedDevice : deniedDevices) { + deniedDevicesMinorNumber.add(deniedDevice.getMinorNumber()); + } + verify(mockPrivilegedExecutor, times(1)).executePrivilegedOperation( + new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, Arrays + .asList(GpuResourceHandlerImpl.CONTAINER_ID_CLI_OPTION, + containerId.toString(), + GpuResourceHandlerImpl.EXCLUDED_GPUS_CLI_OPTION, + StringUtils.join(",", deniedDevicesMinorNumber))), true); + } + } + + private void commonTestAllocation(boolean dockerContainerEnabled) + throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); + GpuDiscoverer.getInstance().initialize(conf); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 3 containers */ + gpuResourceHandler.preStart( + mockContainerWithGpuRequest(1, 3, dockerContainerEnabled)); + + // Only device=4 will be blocked. + if (dockerContainerEnabled) { + verifyDeniedDevices(getContainerId(1), + Collections.emptyList()); + } else{ + verifyDeniedDevices(getContainerId(1), Arrays.asList(new GpuDevice(3,4))); + } + + /* Start container 2, asks 2 containers. Excepted to fail */ + boolean failedToAllocate = false; + try { + gpuResourceHandler.preStart( + mockContainerWithGpuRequest(2, 2, dockerContainerEnabled)); + } catch (ResourceHandlerException e) { + failedToAllocate = true; + } + Assert.assertTrue(failedToAllocate); + + /* Start container 3, ask 1 container, succeeded */ + gpuResourceHandler.preStart( + mockContainerWithGpuRequest(3, 1, dockerContainerEnabled)); + + // devices = 0/1/3 will be blocked + if (dockerContainerEnabled) { + verifyDeniedDevices(getContainerId(3), + Collections.emptyList()); + } else { + verifyDeniedDevices(getContainerId(3), Arrays + .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), + new GpuDevice(2, 3))); + } + + + /* Start container 4, ask 0 container, succeeded */ + gpuResourceHandler.preStart( + mockContainerWithGpuRequest(4, 0, dockerContainerEnabled)); + + if (dockerContainerEnabled) { + verifyDeniedDevices(getContainerId(4), + Collections.emptyList()); + } else{ + // All devices will be blocked + verifyDeniedDevices(getContainerId(4), Arrays + .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3), + new GpuDevice(3, 4))); + } + + /* Release container-1, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(1)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); + Assert.assertEquals(3, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Release container-3, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(3)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(3).toString()); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } + + @Test + public void testAllocationWhenDockerContainerEnabled() throws Exception { + // When docker container is enabled, no devices should be written to + // devices.deny. + commonTestAllocation(true); + } + + @Test + public void testAllocation() throws Exception { + commonTestAllocation(false); + } + + @SuppressWarnings("unchecked") + @Test + public void testAssignedGpuWillBeCleanedupWhenStoreOpFails() + throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); + GpuDiscoverer.getInstance().initialize(conf); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + doThrow(new IOException("Exception ...")).when(mockNMStateStore) + .storeAssignedResources( + any(Container.class), anyString(), anyList()); + + boolean exception = false; + /* Start container 1, asks 3 containers */ + try { + gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 3)); + } catch (ResourceHandlerException e) { + exception = true; + } + + Assert.assertTrue("preStart should throw exception", exception); + + // After preStart, we still have 4 available GPU since the store op fails. + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } + + @Test + public void testAllocationWithoutAllowedGpus() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " "); + GpuDiscoverer.getInstance().initialize(conf); + + try { + gpuResourceHandler.bootstrap(conf); + Assert.fail("Should fail because no GPU available"); + } catch (ResourceHandlerException e) { + // Expected because of no resource available + } + + /* Start container 1, asks 0 containers */ + gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 0)); + verifyDeniedDevices(getContainerId(1), Collections.emptyList()); + + /* Start container 2, asks 1 containers. Excepted to fail */ + boolean failedToAllocate = false; + try { + gpuResourceHandler.preStart(mockContainerWithGpuRequest(2, 1)); + } catch (ResourceHandlerException e) { + failedToAllocate = true; + } + Assert.assertTrue(failedToAllocate); + + /* Release container 1, expect cgroups deleted */ + gpuResourceHandler.postComplete(getContainerId(1)); + + verify(mockCGroupsHandler, times(1)).createCGroup( + CGroupsHandler.CGroupController.DEVICES, getContainerId(1).toString()); + Assert.assertEquals(0, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + } + + @Test + public void testAllocationStored() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); + GpuDiscoverer.getInstance().initialize(conf); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 3 containers */ + Container container = mockContainerWithGpuRequest(1, 3); + gpuResourceHandler.preStart(container); + + verify(mockNMStateStore).storeAssignedResources(container, + ResourceInformation.GPU_URI, Arrays + .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), + new GpuDevice(2, 3))); + + // Only device=4 will be blocked. + verifyDeniedDevices(getContainerId(1), Arrays.asList(new GpuDevice(3, 4))); + + /* Start container 2, ask 0 container, succeeded */ + container = mockContainerWithGpuRequest(2, 0); + gpuResourceHandler.preStart(container); + + verifyDeniedDevices(getContainerId(2), Arrays + .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3), + new GpuDevice(3, 4))); + Assert.assertEquals(0, container.getResourceMappings() + .getAssignedResources(ResourceInformation.GPU_URI).size()); + + // Store assigned resource will not be invoked. + verify(mockNMStateStore, never()).storeAssignedResources( + eq(container), eq(ResourceInformation.GPU_URI), + anyListOf(Serializable.class)); + } + + @Test + public void testAllocationStoredWithNULLStateStore() throws Exception { + NMNullStateStoreService mockNMNULLStateStore = mock(NMNullStateStoreService.class); + + Context nmnctx = mock(Context.class); + when(nmnctx.getNMStateStore()).thenReturn(mockNMNULLStateStore); + + GpuResourceHandlerImpl gpuNULLStateResourceHandler = + new GpuResourceHandlerImpl(nmnctx, mockCGroupsHandler, + mockPrivilegedExecutor); + + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); + GpuDiscoverer.getInstance().initialize(conf); + + gpuNULLStateResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuNULLStateResourceHandler.getGpuAllocator().getAvailableGpus()); + + /* Start container 1, asks 3 containers */ + Container container = mockContainerWithGpuRequest(1, 3); + gpuNULLStateResourceHandler.preStart(container); + + verify(nmnctx.getNMStateStore()).storeAssignedResources(container, + ResourceInformation.GPU_URI, Arrays + .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), + new GpuDevice(2, 3))); + } + + @Test + public void testRecoverResourceAllocation() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); + GpuDiscoverer.getInstance().initialize(conf); + + gpuResourceHandler.bootstrap(conf); + Assert.assertEquals(4, + gpuResourceHandler.getGpuAllocator().getAvailableGpus()); + + Container nmContainer = mock(Container.class); + ResourceMappings rmap = new ResourceMappings(); + ResourceMappings.AssignedResources ar = + new ResourceMappings.AssignedResources(); + ar.updateAssignedResources( + Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3))); + rmap.addAssignedResources(ResourceInformation.GPU_URI, ar); + when(nmContainer.getResourceMappings()).thenReturn(rmap); + + runningContainersMap.put(getContainerId(1), nmContainer); + + // TEST CASE + // Reacquire container restore state of GPU Resource Allocator. + gpuResourceHandler.reacquireContainer(getContainerId(1)); + + Map deviceAllocationMapping = + gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy(); + Assert.assertEquals(2, deviceAllocationMapping.size()); + Assert.assertTrue( + deviceAllocationMapping.keySet().contains(new GpuDevice(1, 1))); + Assert.assertTrue( + deviceAllocationMapping.keySet().contains(new GpuDevice(2, 3))); + Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)), + getContainerId(1)); + + // TEST CASE + // Try to reacquire a container but requested device is not in allowed list. + nmContainer = mock(Container.class); + rmap = new ResourceMappings(); + ar = new ResourceMappings.AssignedResources(); + // id=5 is not in allowed list. + ar.updateAssignedResources( + Arrays.asList(new GpuDevice(3, 4), new GpuDevice(4, 5))); + rmap.addAssignedResources(ResourceInformation.GPU_URI, ar); + when(nmContainer.getResourceMappings()).thenReturn(rmap); + + runningContainersMap.put(getContainerId(2), nmContainer); + + boolean caughtException = false; + try { + gpuResourceHandler.reacquireContainer(getContainerId(1)); + } catch (ResourceHandlerException e) { + caughtException = true; + } + Assert.assertTrue( + "Should fail since requested device Id is not in allowed list", + caughtException); + + // Make sure internal state not changed. + deviceAllocationMapping = + gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy(); + Assert.assertEquals(2, deviceAllocationMapping.size()); + Assert.assertTrue(deviceAllocationMapping.keySet() + .containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3)))); + Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)), + getContainerId(1)); + + // TEST CASE + // Try to reacquire a container but requested device is already assigned. + nmContainer = mock(Container.class); + rmap = new ResourceMappings(); + ar = new ResourceMappings.AssignedResources(); + // id=3 is already assigned + ar.updateAssignedResources( + Arrays.asList(new GpuDevice(3, 4), new GpuDevice(2, 3))); + rmap.addAssignedResources("gpu", ar); + when(nmContainer.getResourceMappings()).thenReturn(rmap); + + runningContainersMap.put(getContainerId(2), nmContainer); + + caughtException = false; + try { + gpuResourceHandler.reacquireContainer(getContainerId(1)); + } catch (ResourceHandlerException e) { + caughtException = true; + } + Assert.assertTrue( + "Should fail since requested device Id is not in allowed list", + caughtException); + + // Make sure internal state not changed. + deviceAllocationMapping = + gpuResourceHandler.getGpuAllocator().getDeviceAllocationMappingCopy(); + Assert.assertEquals(2, deviceAllocationMapping.size()); + Assert.assertTrue(deviceAllocationMapping.keySet() + .containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3)))); + Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)), + getContainerId(1)); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java index 318ae6bb73a..a147afb881c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java @@ -70,7 +70,7 @@ private static class MockExecutor extends ContainerExecutor { @Override - public void init() throws IOException { + public void init(Context nmContext) throws IOException { } @Override public void startLocalizer(LocalizerStartContext ctx) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java new file mode 100644 index 00000000000..bcadf76e4bd --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.ServiceOperations; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; +import org.apache.hadoop.yarn.server.nodemanager.NodeManagerTestBase; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerChain; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestResourcePluginManager extends NodeManagerTestBase { + private NodeManager nm; + + ResourcePluginManager stubResourcePluginmanager() { + // Stub ResourcePluginManager + final ResourcePluginManager rpm = mock(ResourcePluginManager.class); + Map plugins = new HashMap<>(); + + // First resource plugin + ResourcePlugin resourcePlugin = mock(ResourcePlugin.class); + NodeResourceUpdaterPlugin nodeResourceUpdaterPlugin = mock( + NodeResourceUpdaterPlugin.class); + when(resourcePlugin.getNodeResourceHandlerInstance()).thenReturn( + nodeResourceUpdaterPlugin); + plugins.put("resource1", resourcePlugin); + + // Second resource plugin + resourcePlugin = mock(ResourcePlugin.class); + when(resourcePlugin.createResourceHandler(any(Context.class), any( + CGroupsHandler.class), any(PrivilegedOperationExecutor.class))) + .thenReturn(new CustomizedResourceHandler()); + plugins.put("resource2", resourcePlugin); + when(rpm.getNameToPlugins()).thenReturn(plugins); + return rpm; + } + + @After + public void tearDown() { + if (nm != null) { + try { + ServiceOperations.stop(nm); + } catch (Throwable t) { + // ignore + } + } + } + + private class CustomizedResourceHandler implements ResourceHandler { + + @Override + public List bootstrap(Configuration configuration) + throws ResourceHandlerException { + return null; + } + + @Override + public List preStart(Container container) + throws ResourceHandlerException { + return null; + } + + @Override + public List reacquireContainer(ContainerId containerId) + throws ResourceHandlerException { + return null; + } + + @Override + public List postComplete(ContainerId containerId) + throws ResourceHandlerException { + return null; + } + + @Override + public List teardown() + throws ResourceHandlerException { + return null; + } + } + + private class MyMockNM extends NodeManager { + private final ResourcePluginManager rpm; + + public MyMockNM(ResourcePluginManager rpm) { + this.rpm = rpm; + } + + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + ((NodeManager.NMContext)context).setResourcePluginManager(rpm); + return new BaseNodeStatusUpdaterForTest(context, dispatcher, healthChecker, + metrics, new BaseResourceTrackerForTest()); + } + + @Override + protected ContainerManagerImpl createContainerManager(Context context, + ContainerExecutor exec, DeletionService del, + NodeStatusUpdater nodeStatusUpdater, + ApplicationACLsManager aclsManager, + LocalDirsHandlerService diskhandler) { + return new MyContainerManager(context, exec, del, nodeStatusUpdater, + metrics, diskhandler); + } + + @Override + protected ResourcePluginManager createResourcePluginManager() { + return rpm; + } + } + + public class MyLCE extends LinuxContainerExecutor { + private PrivilegedOperationExecutor poe = mock(PrivilegedOperationExecutor.class); + + @Override + protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { + return poe; + } + } + + /* + * Make sure ResourcePluginManager is initialized during NM start up. + */ + @Test(timeout = 30000) + public void testResourcePluginManagerInitialization() throws Exception { + final ResourcePluginManager rpm = stubResourcePluginmanager(); + nm = new MyMockNM(rpm); + + YarnConfiguration conf = createNMConfig(); + nm.init(conf); + verify(rpm, times(1)).initialize( + any(Context.class)); + } + + /* + * Make sure ResourcePluginManager is invoked during NM update. + */ + @Test(timeout = 30000) + public void testNodeStatusUpdaterWithResourcePluginsEnabled() throws Exception { + final ResourcePluginManager rpm = stubResourcePluginmanager(); + + nm = new MyMockNM(rpm); + + YarnConfiguration conf = createNMConfig(); + nm.init(conf); + nm.start(); + + NodeResourceUpdaterPlugin nodeResourceUpdaterPlugin = + rpm.getNameToPlugins().get("resource1") + .getNodeResourceHandlerInstance(); + + verify(nodeResourceUpdaterPlugin, times(1)).updateConfiguredResource( + any(Resource.class)); + } + + /* + * Make sure ResourcePluginManager is used to initialize ResourceHandlerChain + */ + @Test(timeout = 30000) + public void testLinuxContainerExecutorWithResourcePluginsEnabled() throws Exception { + final ResourcePluginManager rpm = stubResourcePluginmanager(); + final LinuxContainerExecutor lce = new MyLCE(); + + nm = new NodeManager() { + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + ((NMContext)context).setResourcePluginManager(rpm); + return new BaseNodeStatusUpdaterForTest(context, dispatcher, healthChecker, + metrics, new BaseResourceTrackerForTest()); + } + + @Override + protected ContainerManagerImpl createContainerManager(Context context, + ContainerExecutor exec, DeletionService del, + NodeStatusUpdater nodeStatusUpdater, + ApplicationACLsManager aclsManager, + LocalDirsHandlerService diskhandler) { + return new MyContainerManager(context, exec, del, nodeStatusUpdater, + metrics, diskhandler); + } + + @Override + protected ContainerExecutor createContainerExecutor(Configuration conf) { + ((NMContext)this.getNMContext()).setResourcePluginManager(rpm); + lce.setConf(conf); + return lce; + } + }; + + YarnConfiguration conf = createNMConfig(); + + nm.init(conf); + nm.start(); + + ResourceHandler handler = lce.getResourceHandler(); + Assert.assertNotNull(handler); + Assert.assertTrue(handler instanceof ResourceHandlerChain); + + boolean newHandlerAdded = false; + for (ResourceHandler h : ((ResourceHandlerChain) handler) + .getResourceHandlerList()) { + if (h instanceof CustomizedResourceHandler) { + newHandlerAdded = true; + break; + } + } + Assert.assertTrue("New ResourceHandler should be added", newHandlerAdded); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java new file mode 100644 index 00000000000..4abb633a69a --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.List; + +public class TestGpuDiscoverer { + private String getTestParentFolder() { + File f = new File("target/temp/" + TestGpuDiscoverer.class.getName()); + return f.getAbsolutePath(); + } + + private void touchFile(File f) throws IOException { + new FileOutputStream(f).close(); + } + + @Before + public void before() throws IOException { + String folder = getTestParentFolder(); + File f = new File(folder); + FileUtils.deleteDirectory(f); + f.mkdirs(); + } + + @Test + public void testLinuxGpuResourceDiscoverPluginConfig() throws Exception { + // Only run this on demand. + Assume.assumeTrue(Boolean.valueOf( + System.getProperty("RunLinuxGpuResourceDiscoverPluginConfigTest"))); + + // test case 1, check default setting. + Configuration conf = new Configuration(false); + GpuDiscoverer plugin = new GpuDiscoverer(); + plugin.initialize(conf); + Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME, + plugin.getPathOfGpuBinary()); + Assert.assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH")); + Assert.assertTrue( + plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia")); + + // test case 2, check mandatory set path. + File fakeBinary = new File(getTestParentFolder(), + GpuDiscoverer.DEFAULT_BINARY_NAME); + touchFile(fakeBinary); + conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder()); + plugin = new GpuDiscoverer(); + plugin.initialize(conf); + Assert.assertEquals(fakeBinary.getAbsolutePath(), + plugin.getPathOfGpuBinary()); + Assert.assertNull(plugin.getEnvironmentToRunCommand().get("PATH")); + + // test case 3, check mandatory set path, but binary doesn't exist so default + // path will be used. + fakeBinary.delete(); + plugin = new GpuDiscoverer(); + plugin.initialize(conf); + Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME, + plugin.getPathOfGpuBinary()); + Assert.assertTrue( + plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia")); + } + + @Test + public void testGpuDiscover() throws YarnException { + // Since this is more of a performance unit test, only run if + // RunUserLimitThroughput is set (-DRunUserLimitThroughput=true) + Assume.assumeTrue( + Boolean.valueOf(System.getProperty("runGpuDiscoverUnitTest"))); + Configuration conf = new Configuration(false); + GpuDiscoverer plugin = new GpuDiscoverer(); + plugin.initialize(conf); + GpuDeviceInformation info = plugin.getGpuDeviceInformation(); + + Assert.assertTrue(info.getGpus().size() > 0); + Assert.assertEquals(plugin.getGpusUsableByYarn().size(), + info.getGpus().size()); + } + + @Test + public void getNumberOfUsableGpusFromConfig() throws YarnException { + Configuration conf = new Configuration(false); + + // Illegal format + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3"); + GpuDiscoverer plugin = new GpuDiscoverer(); + try { + plugin.initialize(conf); + plugin.getGpusUsableByYarn(); + Assert.fail("Illegal format, should fail."); + } catch (YarnException e) { + // Expected + } + + // Valid format + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3:4"); + plugin = new GpuDiscoverer(); + plugin.initialize(conf); + + List usableGpuDevices = plugin.getGpusUsableByYarn(); + Assert.assertEquals(4, usableGpuDevices.size()); + + Assert.assertTrue(0 == usableGpuDevices.get(0).getIndex()); + Assert.assertTrue(1 == usableGpuDevices.get(1).getIndex()); + Assert.assertTrue(2 == usableGpuDevices.get(2).getIndex()); + Assert.assertTrue(3 == usableGpuDevices.get(3).getIndex()); + + Assert.assertTrue(0 == usableGpuDevices.get(0).getMinorNumber()); + Assert.assertTrue(1 == usableGpuDevices.get(1).getMinorNumber()); + Assert.assertTrue(2 == usableGpuDevices.get(2).getMinorNumber()); + Assert.assertTrue(4 == usableGpuDevices.get(3).getMinorNumber()); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java index 0e46234a91f..4364709b56f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import java.io.IOException; +import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -42,6 +43,8 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -124,6 +127,7 @@ public synchronized void removeApplication(ApplicationId appId) rcsCopy.setRemainingRetryAttempts(rcs.getRemainingRetryAttempts()); rcsCopy.setWorkDir(rcs.getWorkDir()); rcsCopy.setLogDir(rcs.getLogDir()); + rcsCopy.setResourceMappings(rcs.getResourceMappings()); result.add(rcsCopy); } return result; @@ -511,6 +515,20 @@ public synchronized void removeAMRMProxyAppContext( amrmProxyState.getAppContexts().remove(attempt); } + @Override + public void storeAssignedResources(Container container, + String resourceType, List assignedResources) + throws IOException { + ResourceMappings.AssignedResources ar = + new ResourceMappings.AssignedResources(); + ar.updateAssignedResources(assignedResources); + containerStates.get(container.getContainerId()).getResourceMappings() + .addAssignedResources(resourceType, ar); + + // update container resource mapping. + updateContainerResourceMapping(container, resourceType, assignedResources); + } + private static class TrackerState { Map inProgressMap = new HashMap(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java index a5079382c1d..20c5240c30b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java @@ -29,9 +29,11 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; +import java.io.Serializable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -68,6 +70,8 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyTokenSecretManager; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredAMRMProxyState; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredApplicationsState; @@ -1003,46 +1007,12 @@ public void testUnexpectedKeyDoesntThrowException() throws IOException { .loadContainersState(); assertTrue(recoveredContainers.isEmpty()); - // create a container request ApplicationId appId = ApplicationId.newInstance(1234, 3); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 4); ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5); - LocalResource lrsrc = LocalResource.newInstance( - URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), - LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, - 1234567890L); - Map localResources = - new HashMap(); - localResources.put("rsrc", lrsrc); - Map env = new HashMap(); - env.put("somevar", "someval"); - List containerCmds = new ArrayList(); - containerCmds.add("somecmd"); - containerCmds.add("somearg"); - Map serviceData = new HashMap(); - serviceData.put("someservice", - ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 })); - ByteBuffer containerTokens = ByteBuffer - .wrap(new byte[] { 0x7, 0x8, 0x9, 0xa }); - Map acls = - new HashMap(); - acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); - acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); - ContainerLaunchContext clc = ContainerLaunchContext.newInstance( - localResources, env, containerCmds, - serviceData, containerTokens, acls); - Resource containerRsrc = Resource.newInstance(1357, 3); - ContainerTokenIdentifier containerTokenId = new ContainerTokenIdentifier( - containerId, "host", "user", containerRsrc, 9876543210L, 42, 2468, - Priority.newInstance(7), 13579); - Token containerToken = Token.newInstance(containerTokenId.getBytes(), - ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), - "tokenservice"); - StartContainerRequest containerReq = StartContainerRequest.newInstance(clc, - containerToken); - - stateStore.storeContainer(containerId, 0, 0, containerReq); + StartContainerRequest startContainerRequest = storeMockContainer( + containerId); // add a invalid key byte[] invalidKey = ("ContainerManager/containers/" @@ -1055,7 +1025,7 @@ public void testUnexpectedKeyDoesntThrowException() throws IOException { assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus()); assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode()); assertEquals(false, rcs.getKilled()); - assertEquals(containerReq, rcs.getStartRequest()); + assertEquals(startContainerRequest, rcs.getStartRequest()); assertTrue(rcs.getDiagnostics().isEmpty()); assertEquals(RecoveredContainerType.KILL, rcs.getRecoveryType()); // assert unknown keys are cleaned up finally @@ -1163,6 +1133,98 @@ public void testAMRMProxyStorage() throws IOException { } } + @Test + public void testStateStoreForResourceMapping() throws IOException { + // test empty when no state + List recoveredContainers = stateStore + .loadContainersState(); + assertTrue(recoveredContainers.isEmpty()); + + ApplicationId appId = ApplicationId.newInstance(1234, 3); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, + 4); + ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5); + storeMockContainer(containerId); + + Container container = mock(Container.class); + when(container.getContainerId()).thenReturn(containerId); + ResourceMappings resourceMappings = new ResourceMappings(); + when(container.getResourceMappings()).thenReturn(resourceMappings); + + // Store ResourceMapping + stateStore.storeAssignedResources(container, "gpu", + Arrays.asList("1", "2", "3")); + // This will overwrite above + List gpuRes1 = Arrays.asList("1", "2", "4"); + stateStore.storeAssignedResources(container, "gpu", gpuRes1); + List fpgaRes = + Arrays.asList("3", "4", "5", "6"); + stateStore.storeAssignedResources(container, "fpga", fpgaRes); + List numaRes = Arrays.asList("numa1"); + stateStore.storeAssignedResources(container, "numa", numaRes); + + // add a invalid key + restartStateStore(); + recoveredContainers = stateStore.loadContainersState(); + assertEquals(1, recoveredContainers.size()); + RecoveredContainerState rcs = recoveredContainers.get(0); + List res = rcs.getResourceMappings() + .getAssignedResources("gpu"); + Assert.assertTrue(res.equals(gpuRes1)); + Assert.assertTrue( + resourceMappings.getAssignedResources("gpu").equals(gpuRes1)); + + res = rcs.getResourceMappings().getAssignedResources("fpga"); + Assert.assertTrue(res.equals(fpgaRes)); + Assert.assertTrue( + resourceMappings.getAssignedResources("fpga").equals(fpgaRes)); + + res = rcs.getResourceMappings().getAssignedResources("numa"); + Assert.assertTrue(res.equals(numaRes)); + Assert.assertTrue( + resourceMappings.getAssignedResources("numa").equals(numaRes)); + } + + private StartContainerRequest storeMockContainer(ContainerId containerId) + throws IOException { + // create a container request + LocalResource lrsrc = LocalResource.newInstance( + URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), + LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, + 1234567890L); + Map localResources = + new HashMap(); + localResources.put("rsrc", lrsrc); + Map env = new HashMap(); + env.put("somevar", "someval"); + List containerCmds = new ArrayList(); + containerCmds.add("somecmd"); + containerCmds.add("somearg"); + Map serviceData = new HashMap(); + serviceData.put("someservice", + ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 })); + ByteBuffer containerTokens = ByteBuffer + .wrap(new byte[] { 0x7, 0x8, 0x9, 0xa }); + Map acls = + new HashMap(); + acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); + acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, env, containerCmds, + serviceData, containerTokens, acls); + Resource containerRsrc = Resource.newInstance(1357, 3); + ContainerTokenIdentifier containerTokenId = new ContainerTokenIdentifier( + containerId, "host", "user", containerRsrc, 9876543210L, 42, 2468, + Priority.newInstance(7), 13579); + Token containerToken = Token.newInstance(containerTokenId.getBytes(), + ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), + "tokenservice"); + StartContainerRequest containerReq = StartContainerRequest.newInstance(clc, + containerToken); + stateStore.storeContainer(containerId, 0, 0, containerReq); + return containerReq; + } + private static class NMTokenSecretManagerForTest extends BaseNMTokenSecretManager { public MasterKey generateKey() { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java index 4add586bbf1..767c308aeb6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java @@ -172,7 +172,7 @@ public void testGetContainerMemoryMB() throws Exception { YarnConfiguration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.NM_ENABLE_HARDWARE_CAPABILITY_DETECTION, true); - int mem = NodeManagerHardwareUtils.getContainerMemoryMB(null, conf); + long mem = NodeManagerHardwareUtils.getContainerMemoryMB(null, conf); Assert.assertEquals(YarnConfiguration.DEFAULT_NM_PMEM_MB, mem); mem = NodeManagerHardwareUtils.getContainerMemoryMB(plugin, conf); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index b9c6fffd6b2..29c20382ea8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceSet; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -242,4 +243,9 @@ public void sendPauseEvent(String description) { public long getContainerStartTime() { return 0; } + + @Override + public ResourceMappings getResourceMappings() { + return null; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 4586a7b88c4..980eae95841 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -18,25 +18,20 @@ package org.apache.hadoop.yarn.server.nodemanager.webapp; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringReader; -import java.net.HttpURLConnection; -import java.net.URI; -import java.net.URL; -import java.util.List; -import javax.servlet.http.HttpServletResponse; -import javax.ws.rs.core.MediaType; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - -import org.junit.Assert; +import com.google.inject.Guice; +import com.google.inject.Injector; +import com.google.inject.servlet.GuiceServletContextListener; +import com.google.inject.servlet.ServletModule; +import com.sun.jersey.api.client.ClientResponse; +import com.sun.jersey.api.client.ClientResponse.Status; +import com.sun.jersey.api.client.GenericType; +import com.sun.jersey.api.client.UniformInterfaceException; +import com.sun.jersey.api.client.WebResource; +import com.sun.jersey.guice.spi.container.servlet.GuiceContainer; +import com.sun.jersey.test.framework.WebAppDescriptor; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlRootElement; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -48,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.logaggregation.TestContainerLogsUtils; @@ -59,7 +55,15 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.AssignedGpuDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer.NMWebApp; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; +import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.webapp.YarnWebServiceParams; @@ -73,6 +77,7 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.w3c.dom.Document; @@ -80,24 +85,35 @@ import org.w3c.dom.NodeList; import org.xml.sax.InputSource; -import com.google.inject.Guice; -import com.google.inject.Injector; -import com.google.inject.servlet.GuiceServletContextListener; -import com.google.inject.servlet.ServletModule; -import com.sun.jersey.api.client.ClientResponse; -import com.sun.jersey.api.client.ClientResponse.Status; -import com.sun.jersey.api.client.GenericType; -import com.sun.jersey.api.client.UniformInterfaceException; -import com.sun.jersey.api.client.WebResource; -import com.sun.jersey.guice.spi.container.servlet.GuiceContainer; -import com.sun.jersey.test.framework.WebAppDescriptor; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.MediaType; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringReader; +import java.net.HttpURLConnection; +import java.net.URI; +import java.net.URL; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; /** * Test the nodemanager node info web services api's */ public class TestNMWebServices extends JerseyTestBase { - private static Context nmContext; + private static NodeManager.NMContext nmContext; private static ResourceView resourceView; private static ApplicationACLsManager aclsManager; private static LocalDirsHandlerService dirsHandler; @@ -411,6 +427,112 @@ public void testNMRedirect() { assertFalse(redirectURL.contains(YarnWebServiceParams.NM_ID)); } + @XmlRootElement + @XmlAccessorType(XmlAccessType.FIELD) + private static class MockNMResourceInfo extends NMResourceInfo { + public long a = 1000L; + public MockNMResourceInfo() { } + } + + @Test + public void testGetNMResourceInfo() + throws YarnException, InterruptedException, JSONException { + ResourcePluginManager rpm = mock(ResourcePluginManager.class); + Map namesToPlugins = new HashMap<>(); + ResourcePlugin mockPlugin1 = mock(ResourcePlugin.class); + NMResourceInfo nmResourceInfo1 = new MockNMResourceInfo(); + when(mockPlugin1.getNMResourceInfo()).thenReturn(nmResourceInfo1); + namesToPlugins.put("resource-1", mockPlugin1); + namesToPlugins.put("yarn.io/resource-1", mockPlugin1); + ResourcePlugin mockPlugin2 = mock(ResourcePlugin.class); + namesToPlugins.put("resource-2", mockPlugin2); + when(rpm.getNameToPlugins()).thenReturn(namesToPlugins); + + nmContext.setResourcePluginManager(rpm); + + WebResource r = resource(); + ClientResponse response = r.path("ws").path("v1").path("node").path( + "resources").path("resource-2").accept(MediaType.APPLICATION_JSON).get( + ClientResponse.class); + assertEquals(MediaType.APPLICATION_JSON, response.getType().toString()); + + // Access resource-2 should fail (empty NMResourceInfo returned). + String resp = response.getEntity(String.class); + assertEquals("null", resp); + + // Access resource-3 should fail (unknown plugin) + response = r.path("ws").path("v1").path("node").path( + "resources").path("resource-3").accept(MediaType.APPLICATION_JSON).get( + ClientResponse.class); + assertEquals(MediaType.APPLICATION_JSON, response.getType().toString()); + resp = response.getEntity(String.class); + assertEquals("null", resp); + + // Access resource-1 should success + response = r.path("ws").path("v1").path("node").path( + "resources").path("resource-1").accept(MediaType.APPLICATION_JSON).get( + ClientResponse.class); + assertEquals(MediaType.APPLICATION_JSON, response.getType().toString()); + JSONObject json = response.getEntity(JSONObject.class); + assertEquals(1000, Long.parseLong(json.get("a").toString())); + + // Access resource-1 should success (encoded yarn.io/Fresource-1). + response = r.path("ws").path("v1").path("node").path("resources").path( + "yarn.io%2Fresource-1").accept(MediaType.APPLICATION_JSON).get( + ClientResponse.class); + assertEquals(MediaType.APPLICATION_JSON, response.getType().toString()); + json = response.getEntity(JSONObject.class); + assertEquals(1000, Long.parseLong(json.get("a").toString())); + } + + private ContainerId createContainerId(int id) { + ApplicationId appId = ApplicationId.newInstance(0, 0); + ApplicationAttemptId appAttemptId = + ApplicationAttemptId.newInstance(appId, 1); + ContainerId containerId = ContainerId.newContainerId(appAttemptId, id); + return containerId; + } + + @Test + public void testGetYarnGpuResourceInfo() + throws YarnException, InterruptedException, JSONException { + ResourcePluginManager rpm = mock(ResourcePluginManager.class); + Map namesToPlugins = new HashMap<>(); + ResourcePlugin mockPlugin1 = mock(ResourcePlugin.class); + GpuDeviceInformation gpuDeviceInformation = new GpuDeviceInformation(); + gpuDeviceInformation.setDriverVersion("1.2.3"); + gpuDeviceInformation.setGpus(Arrays.asList(new PerGpuDeviceInformation())); + NMResourceInfo nmResourceInfo1 = new NMGpuResourceInfo(gpuDeviceInformation, + Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 2), + new GpuDevice(3, 3)), Arrays + .asList(new AssignedGpuDevice(2, 2, createContainerId(1)), + new AssignedGpuDevice(3, 3, createContainerId(2)))); + when(mockPlugin1.getNMResourceInfo()).thenReturn(nmResourceInfo1); + namesToPlugins.put("resource-1", mockPlugin1); + namesToPlugins.put("yarn.io/resource-1", mockPlugin1); + ResourcePlugin mockPlugin2 = mock(ResourcePlugin.class); + namesToPlugins.put("resource-2", mockPlugin2); + when(rpm.getNameToPlugins()).thenReturn(namesToPlugins); + + nmContext.setResourcePluginManager(rpm); + + WebResource r = resource(); + ClientResponse response; + JSONObject json; + + // Access resource-1 should success + response = r.path("ws").path("v1").path("node").path( + "resources").path("resource-1").accept(MediaType.APPLICATION_JSON).get( + ClientResponse.class); + assertEquals(MediaType.APPLICATION_JSON, response.getType().toString()); + json = response.getEntity(JSONObject.class); + assertEquals("1.2.3", + json.getJSONObject("gpuDeviceInformation").get("driver_version")); + assertEquals(3, json.getJSONArray("totalGpuDevices").length()); + assertEquals(2, json.getJSONArray("assignedGpuDevices").length()); + assertEquals(2, json.getJSONArray("assignedGpuDevices").length()); + } + private void testContainerLogs(WebResource r, ContainerId containerId) throws IOException { final String containerIdStr = containerId.toString(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java new file mode 100644 index 00000000000..dc96746cf5d --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; + +public class TestGpuDeviceInformationParser { + @Test + public void testParse() throws IOException, YarnException { + File f = new File("src/test/resources/nvidia-smi-sample-xml-output"); + String s = FileUtils.readFileToString(f, "UTF-8"); + + GpuDeviceInformationParser parser = new GpuDeviceInformationParser(); + + GpuDeviceInformation info = parser.parseXml(s); + Assert.assertEquals("375.66", info.getDriverVersion()); + Assert.assertEquals(2, info.getGpus().size()); + PerGpuDeviceInformation gpu1 = info.getGpus().get(1); + Assert.assertEquals("Tesla P100-PCIE-12GB", gpu1.getProductName()); + Assert.assertEquals(12193, gpu1.getGpuMemoryUsage().getTotalMemoryMiB()); + Assert.assertEquals(10.3f, + gpu1.getGpuUtilizations().getOverallGpuUtilization(), 1e-6); + Assert.assertEquals(34f, gpu1.getTemperature().getCurrentGpuTemp(), 1e-6); + Assert.assertEquals(85f, gpu1.getTemperature().getMaxGpuTemp(), 1e-6); + Assert.assertEquals(82f, gpu1.getTemperature().getSlowThresholdGpuTemp(), + 1e-6); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output new file mode 100644 index 00000000000..5ccb72265b5 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output @@ -0,0 +1,547 @@ + + + + + + + Wed Sep 6 21:52:51 2017 + 375.66 + 2 + + Tesla P100-PCIE-12GB + Tesla + Disabled + Disabled + Disabled + Disabled + 1920 + + N/A + N/A + + 0320717030197 + GPU-28604e81-21ec-cc48-6759-bf2648b22e16 + 0 + 86.00.3A.00.02 + No + 0x400 + 900-2H400-0110-030 + + H400.0202.00.01 + 1.1 + 4.1 + N/A + + + N/A + N/A + + + None + + + 04 + 00 + 0000 + 15F710DE + 0000:04:00.0 + 11DA10DE + + + 3 + 3 + + + 16x + 16x + + + + N/A + N/A + + 0 + 0 KB/s + 0 KB/s + + N/A + P0 + + Active + Not Active + Not Active + Not Active + Not Active + Not Active + + + 12193 MiB + 0 MiB + 12193 MiB + + + 16384 MiB + 2 MiB + 16382 MiB + + Default + + 0 % + 0 % + 0 % + 0 % + + + 0 + 0 + 0 ms + + + Enabled + Enabled + + + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + + + + 0 + + + + + 0 + + + + No + + + 31 C + 85 C + 82 C + + + P0 + Supported + 24.84 W + 250.00 W + 250.00 W + 250.00 W + 125.00 W + 250.00 W + + + 405 MHz + 405 MHz + 715 MHz + 835 MHz + + + 1189 MHz + 715 MHz + + + 1189 MHz + 715 MHz + + + 1328 MHz + 1328 MHz + 715 MHz + 1328 MHz + + + N/A + N/A + + + + 715 MHz + 1328 MHz + 1316 MHz + 1303 MHz + 1290 MHz + 1278 MHz + 1265 MHz + 1252 MHz + 1240 MHz + 1227 MHz + 1215 MHz + 1202 MHz + 1189 MHz + 1177 MHz + 1164 MHz + 1151 MHz + 1139 MHz + 1126 MHz + 1113 MHz + 1101 MHz + 1088 MHz + 1075 MHz + 1063 MHz + 1050 MHz + 1037 MHz + 1025 MHz + 1012 MHz + 999 MHz + 987 MHz + 974 MHz + 961 MHz + 949 MHz + 936 MHz + 923 MHz + 911 MHz + 898 MHz + 885 MHz + 873 MHz + 860 MHz + 847 MHz + 835 MHz + 822 MHz + 810 MHz + 797 MHz + 784 MHz + 772 MHz + 759 MHz + 746 MHz + 734 MHz + 721 MHz + 708 MHz + 696 MHz + 683 MHz + 670 MHz + 658 MHz + 645 MHz + 632 MHz + 620 MHz + 607 MHz + 594 MHz + 582 MHz + 569 MHz + 556 MHz + 544 MHz + + + + + + + + + + Tesla P100-PCIE-12GB + Tesla + Disabled + Disabled + Disabled + Disabled + 1920 + + N/A + N/A + + 0320717031755 + GPU-46915a82-3fd2-8e11-ae26-a80b607c04f3 + 1 + 86.00.3A.00.02 + No + 0x8200 + 900-2H400-0110-030 + + H400.0202.00.01 + 1.1 + 4.1 + N/A + + + N/A + N/A + + + None + + + 82 + 00 + 0000 + 15F710DE + 0000:82:00.0 + 11DA10DE + + + 3 + 3 + + + 16x + 16x + + + + N/A + N/A + + 0 + 0 KB/s + 0 KB/s + + N/A + P0 + + Active + Not Active + Not Active + Not Active + Not Active + Not Active + + + 12193 MiB + 0 MiB + 12193 MiB + + + 16384 MiB + 2 MiB + 16382 MiB + + Default + + 10.3 % + 0 % + 0 % + 0 % + + + 0 + 0 + 0 ms + + + Enabled + Enabled + + + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + 0 + 0 + N/A + 0 + 0 + 0 + 0 + + + + + + 0 + + + + + 0 + + + + No + + + 34 C + 85 C + 82 C + + + P0 + Supported + 25.54 W + 250.00 W + 250.00 W + 250.00 W + 125.00 W + 250.00 W + + + 405 MHz + 405 MHz + 715 MHz + 835 MHz + + + 1189 MHz + 715 MHz + + + 1189 MHz + 715 MHz + + + 1328 MHz + 1328 MHz + 715 MHz + 1328 MHz + + + N/A + N/A + + + + 715 MHz + 1328 MHz + 1316 MHz + 1303 MHz + 1290 MHz + 1278 MHz + 1265 MHz + 1252 MHz + 1240 MHz + 1227 MHz + 1215 MHz + 1202 MHz + 1189 MHz + 1177 MHz + 1164 MHz + 1151 MHz + 1139 MHz + 1126 MHz + 1113 MHz + 1101 MHz + 1088 MHz + 1075 MHz + 1063 MHz + 1050 MHz + 1037 MHz + 1025 MHz + 1012 MHz + 999 MHz + 987 MHz + 974 MHz + 961 MHz + 949 MHz + 936 MHz + 923 MHz + 911 MHz + 898 MHz + 885 MHz + 873 MHz + 860 MHz + 847 MHz + 835 MHz + 822 MHz + 810 MHz + 797 MHz + 784 MHz + 772 MHz + 759 MHz + 746 MHz + 734 MHz + 721 MHz + 708 MHz + 696 MHz + 683 MHz + 670 MHz + 658 MHz + 645 MHz + 632 MHz + 620 MHz + 607 MHz + 594 MHz + 582 MHz + 569 MHz + 556 MHz + 544 MHz + + + + + + + + + \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java index 931b1c8b7d5..7ae23e7bb63 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java @@ -82,7 +82,7 @@ public synchronized void addProcessor( public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse resp) throws IOException { + RegisterApplicationMasterResponse resp) throws IOException, YarnException { this.head.registerApplicationMaster(applicationAttemptId, request, resp); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 6c0a8541223..3c117bc4b07 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -400,14 +400,32 @@ public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) } } + protected Configuration loadNewConfiguration() + throws IOException, YarnException { + // Retrieve yarn-site.xml in order to refresh scheduling monitor properties. + Configuration conf = getConfiguration(new Configuration(false), + YarnConfiguration.YARN_SITE_CONFIGURATION_FILE, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + // The reason we call Configuration#size() is because when getConfiguration + // been called, it invokes Configuration#addResouce, which invokes + // Configuration#reloadConfiguration which triggers the reload process in a + // lazy way, the properties will only be reload when it's needed rather than + // reload it right after getConfiguration been called. So here we call + // Configuration#size() to force the Configuration#getProps been called to + // reload all the properties. + conf.size(); + return conf; + } + @Private public void refreshQueues() throws IOException, YarnException { - rm.getRMContext().getScheduler().reinitialize(getConfig(), + Configuration conf = loadNewConfiguration(); + rm.getRMContext().getScheduler().reinitialize(conf, this.rm.getRMContext()); // refresh the reservation system ReservationSystem rSystem = rm.getRMContext().getReservationSystem(); if (rSystem != null) { - rSystem.reinitialize(getConfig(), rm.getRMContext()); + rSystem.reinitialize(conf, rm.getRMContext()); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index a10413ba143..16d6416f89f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -112,6 +112,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityResponse; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; @@ -174,6 +176,7 @@ import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.UTCClock; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; import com.google.common.annotations.VisibleForTesting; @@ -1783,4 +1786,12 @@ public void setDisplayPerUserApps(boolean displayPerUserApps) { this.displayPerUserApps = displayPerUserApps; } + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + GetAllResourceTypeInfoResponse response = + GetAllResourceTypeInfoResponse.newInstance(); + response.setResourceTypeInfo(ResourceUtils.getResourcesTypeInfo()); + return response; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java index 273e0cda8b7..0baf17aa952 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java @@ -111,7 +111,8 @@ public void init(ApplicationMasterServiceContext amsContext, public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException { + RegisterApplicationMasterResponse response) + throws IOException, YarnException { RMApp app = getRmContext().getRMApps().get( applicationAttemptId.getApplicationId()); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java index 208300c789d..a9136d65cde 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java @@ -127,7 +127,8 @@ public void init(ApplicationMasterServiceContext amsContext, public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException { + RegisterApplicationMasterResponse response) + throws IOException, YarnException { final SchedulerApplicationAttempt appAttempt = ((AbstractYarnScheduler) getScheduler()).getApplicationAttempt(applicationAttemptId); if (appAttempt.getOpportunisticContainerContext() == null) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index d6a4d2f782d..61e8a6d499a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -69,6 +69,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.SettableFuture; +import org.apache.hadoop.yarn.util.StringHelper; /** * This class manages the list of applications for the resource manager. @@ -189,7 +190,12 @@ public static SummaryBuilder createAppSummary(RMApp app) { .add("preemptedAMContainers", metrics.getNumAMContainersPreempted()) .add("preemptedNonAMContainers", metrics.getNumNonAMContainersPreempted()) .add("preemptedResources", metrics.getResourcePreempted()) - .add("applicationType", app.getApplicationType()); + .add("applicationType", app.getApplicationType()) + .add("resourceSeconds", StringHelper + .getResourceSecondsString(metrics.getResourceSecondsMap())) + .add("preemptedResourceSeconds", StringHelper + .getResourceSecondsString( + metrics.getPreemptedResourceSecondsMap())); return summary; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index 5b074488931..4a853682902 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -486,7 +486,7 @@ public static YarnApplicationAttemptState createApplicationAttemptState( DUMMY_APPLICATION_RESOURCE_USAGE_REPORT = BuilderUtils.newApplicationResourceUsageReport(-1, -1, Resources.createResource(-1, -1), Resources.createResource(-1, -1), - Resources.createResource(-1, -1), 0, 0, 0, 0); + Resources.createResource(-1, -1), new HashMap(), new HashMap()); /** @@ -630,4 +630,12 @@ public static int getApplicableNodeCountForAM(RMContext rmContext, return labelsToNodes.get(label); } } + + public static Long getOrDefault(Map map, String key, + Long defaultValue) { + if (map.containsKey(key)) { + return map.get(key); + } + return defaultValue; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index de8386d195e..42a7e014907 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -378,10 +378,11 @@ public RegisterNodeManagerResponse registerNodeManager( // Check if this node has minimum allocations if (capability.getMemorySize() < minAllocMb || capability.getVirtualCores() < minAllocVcores) { - String message = - "NodeManager from " + host - + " doesn't satisfy minimum allocations, Sending SHUTDOWN" - + " signal to the NodeManager."; + String message = "NodeManager from " + host + + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + + " signal to the NodeManager. Node capabilities are " + capability + + "; minimums are " + minAllocMb + "mb and " + minAllocVcores + + " vcores"; LOG.info(message); response.setDiagnosticsMessage(message); response.setNodeAction(NodeAction.SHUTDOWN); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index e8ed0b7ee65..f0ab324ace8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -853,11 +853,8 @@ public void storeNewApplicationAttempt(RMAppAttempt appAttempt) { appAttempt.getAppAttemptId(), appAttempt.getMasterContainer(), credentials, appAttempt.getStartTime(), - resUsage.getMemorySeconds(), - resUsage.getVcoreSeconds(), - attempMetrics.getPreemptedMemory(), - attempMetrics.getPreemptedVcore() - ); + resUsage.getResourceUsageSecondsMap(), + attempMetrics.getPreemptedResourceSecondsMap()); getRMStateStoreEventHandler().handle( new RMStateStoreAppAttemptEvent(attemptState)); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java index 67aaf947127..2de071ad2ec 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java @@ -25,23 +25,28 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerRecoveryProtos.ApplicationAttemptStateDataProto; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.util.Records; +import java.util.Map; + /* * Contains the state data that needs to be persisted for an ApplicationAttempt */ @Public @Unstable public abstract class ApplicationAttemptStateData { + public static ApplicationAttemptStateData newInstance( ApplicationAttemptId attemptId, Container container, Credentials attemptTokens, long startTime, RMAppAttemptState finalState, String finalTrackingUrl, String diagnostics, FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus, - long finishTime, long memorySeconds, long vcoreSeconds, - long preemptedMemorySeconds, long preemptedVcoreSeconds) { + long finishTime, Map resourceSecondsMap, + Map preemptedResourceSecondsMap) { ApplicationAttemptStateData attemptStateData = Records.newRecord(ApplicationAttemptStateData.class); attemptStateData.setAttemptId(attemptId); @@ -54,23 +59,33 @@ public static ApplicationAttemptStateData newInstance( attemptStateData.setFinalApplicationStatus(amUnregisteredFinalStatus); attemptStateData.setAMContainerExitStatus(exitStatus); attemptStateData.setFinishTime(finishTime); - attemptStateData.setMemorySeconds(memorySeconds); - attemptStateData.setVcoreSeconds(vcoreSeconds); - attemptStateData.setPreemptedMemorySeconds(preemptedMemorySeconds); - attemptStateData.setPreemptedVcoreSeconds(preemptedVcoreSeconds); + attemptStateData.setMemorySeconds(RMServerUtils + .getOrDefault(resourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L)); + attemptStateData.setVcoreSeconds(RMServerUtils + .getOrDefault(resourceSecondsMap, ResourceInformation.VCORES.getName(), + 0L)); + attemptStateData.setPreemptedMemorySeconds(RMServerUtils + .getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L)); + attemptStateData.setPreemptedVcoreSeconds(RMServerUtils + .getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.VCORES.getName(), 0L)); + attemptStateData.setResourceSecondsMap(resourceSecondsMap); + attemptStateData + .setPreemptedResourceSecondsMap(preemptedResourceSecondsMap); return attemptStateData; } public static ApplicationAttemptStateData newInstance( ApplicationAttemptId attemptId, Container masterContainer, - Credentials attemptTokens, long startTime, long memorySeconds, - long vcoreSeconds, long preemptedMemorySeconds, - long preemptedVcoreSeconds) { - return newInstance(attemptId, masterContainer, attemptTokens, - startTime, null, "N/A", "", null, ContainerExitStatus.INVALID, 0, - memorySeconds, vcoreSeconds, - preemptedMemorySeconds, preemptedVcoreSeconds); - } + Credentials attemptTokens, long startTime, + Map resourceSeondsMap, + Map preemptedResourceSecondsMap) { + return newInstance(attemptId, masterContainer, attemptTokens, startTime, + null, "N/A", "", null, ContainerExitStatus.INVALID, 0, + resourceSeondsMap, preemptedResourceSecondsMap); + } public abstract ApplicationAttemptStateDataProto getProto(); @@ -215,4 +230,50 @@ public abstract void setFinalApplicationStatus( @Public @Unstable public abstract void setPreemptedVcoreSeconds(long vcoreSeconds); + + /** + * Get the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * + * @return map containing the resource name and aggregated preempted + * resource-seconds + */ + @Public + @Unstable + public abstract Map getResourceSecondsMap(); + + /** + * Set the aggregated number of resources that the application has + * allocated times the number of seconds the application has been running. + * + * @param resourceSecondsMap map containing the resource name and aggregated + * resource-seconds + */ + @Public + @Unstable + public abstract void setResourceSecondsMap( + Map resourceSecondsMap); + + /** + * Get the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * + * @return map containing the resource name and aggregated preempted + * resource-seconds + */ + @Public + @Unstable + public abstract Map getPreemptedResourceSecondsMap(); + + /** + * Set the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * + * @param preemptedResourceSecondsMap map containing the resource name and + * aggregated preempted resource-seconds + */ + @Public + @Unstable + public abstract void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java index e89726f91ad..ed71ea2f016 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -55,6 +56,9 @@ private Container masterContainer = null; private ByteBuffer appAttemptTokens = null; + private Map resourceSecondsMap; + private Map preemptedResourceSecondsMap; + public ApplicationAttemptStateDataPBImpl() { builder = ApplicationAttemptStateDataProto.newBuilder(); } @@ -404,4 +408,50 @@ private static Credentials convertCredentialsFromByteBuffer( IOUtils.closeStream(dibb); } } + + @Override + public Map getResourceSecondsMap() { + if (this.resourceSecondsMap != null) { + return this.resourceSecondsMap; + } + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + this.resourceSecondsMap = ProtoUtils.convertStringLongMapProtoListToMap( + p.getApplicationResourceUsageMapList()); + return this.resourceSecondsMap; + } + + @Override + public void setResourceSecondsMap(Map resourceSecondsMap) { + maybeInitBuilder(); + builder.clearApplicationResourceUsageMap(); + this.resourceSecondsMap = resourceSecondsMap; + if (resourceSecondsMap != null) { + builder.addAllApplicationResourceUsageMap( + ProtoUtils.convertMapToStringLongMapProtoList(resourceSecondsMap)); + } + } + + @Override + public Map getPreemptedResourceSecondsMap() { + if (this.preemptedResourceSecondsMap != null) { + return this.preemptedResourceSecondsMap; + } + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + this.preemptedResourceSecondsMap = ProtoUtils + .convertStringLongMapProtoListToMap( + p.getApplicationResourceUsageMapList()); + return this.preemptedResourceSecondsMap; + } + + @Override + public void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap) { + maybeInitBuilder(); + builder.clearPreemptedResourceUsageMap(); + this.preemptedResourceSecondsMap = preemptedResourceSecondsMap; + if (preemptedResourceSecondsMap != null) { + builder.addAllPreemptedResourceUsageMap(ProtoUtils + .convertMapToStringLongMapProtoList(preemptedResourceSecondsMap)); + } + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 63acaeab100..3143423516b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -754,14 +754,10 @@ public ApplicationReport createAndGetApplicationReport(String clientUserName, } RMAppMetrics rmAppMetrics = getRMAppMetrics(); - appUsageReport.setMemorySeconds(rmAppMetrics.getMemorySeconds()); - appUsageReport.setVcoreSeconds(rmAppMetrics.getVcoreSeconds()); - appUsageReport. - setPreemptedMemorySeconds(rmAppMetrics. - getPreemptedMemorySeconds()); - appUsageReport. - setPreemptedVcoreSeconds(rmAppMetrics. - getPreemptedVcoreSeconds()); + appUsageReport + .setResourceSecondsMap(rmAppMetrics.getResourceSecondsMap()); + appUsageReport.setPreemptedResourceSecondsMap( + rmAppMetrics.getPreemptedResourceSecondsMap()); } if (currentApplicationAttemptId == null) { @@ -1650,10 +1646,9 @@ public RMAppMetrics getRMAppMetrics() { Resource resourcePreempted = Resource.newInstance(0, 0); int numAMContainerPreempted = 0; int numNonAMContainerPreempted = 0; - long memorySeconds = 0; - long vcoreSeconds = 0; - long preemptedMemorySeconds = 0; - long preemptedVcoreSeconds = 0; + Map resourceSecondsMap = new HashMap<>(); + Map preemptedSecondsMap = new HashMap<>(); + this.readLock.lock(); try { for (RMAppAttempt attempt : attempts.values()) { @@ -1669,20 +1664,28 @@ public RMAppMetrics getRMAppMetrics() { // for both running and finished containers. AggregateAppResourceUsage resUsage = attempt.getRMAppAttemptMetrics().getAggregateAppResourceUsage(); - memorySeconds += resUsage.getMemorySeconds(); - vcoreSeconds += resUsage.getVcoreSeconds(); - preemptedMemorySeconds += attemptMetrics.getPreemptedMemory(); - preemptedVcoreSeconds += attemptMetrics.getPreemptedVcore(); + for (Map.Entry entry : resUsage + .getResourceUsageSecondsMap().entrySet()) { + long value = RMServerUtils + .getOrDefault(resourceSecondsMap, entry.getKey(), 0L); + value += entry.getValue(); + resourceSecondsMap.put(entry.getKey(), value); + } + for (Map.Entry entry : attemptMetrics + .getPreemptedResourceSecondsMap().entrySet()) { + long value = RMServerUtils + .getOrDefault(preemptedSecondsMap, entry.getKey(), 0L); + value += entry.getValue(); + preemptedSecondsMap.put(entry.getKey(), value); + } } } } finally { this.readLock.unlock(); } - return new RMAppMetrics(resourcePreempted, - numNonAMContainerPreempted, numAMContainerPreempted, - memorySeconds, vcoreSeconds, - preemptedMemorySeconds, preemptedVcoreSeconds); + return new RMAppMetrics(resourcePreempted, numNonAMContainerPreempted, + numAMContainerPreempted, resourceSecondsMap, preemptedSecondsMap); } @Private diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java index fa068ea2d88..2bb7fd1ae10 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java @@ -19,27 +19,27 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; + +import java.util.Map; public class RMAppMetrics { final Resource resourcePreempted; final int numNonAMContainersPreempted; final int numAMContainersPreempted; - final long memorySeconds; - final long vcoreSeconds; - private final long preemptedMemorySeconds; - private final long preemptedVcoreSeconds; + private final Map resourceSecondsMap; + private final Map preemptedResourceSecondsMap; public RMAppMetrics(Resource resourcePreempted, int numNonAMContainersPreempted, int numAMContainersPreempted, - long memorySeconds, long vcoreSeconds, long preemptedMemorySeconds, - long preemptedVcoreSeconds) { + Map resourceSecondsMap, + Map preemptedResourceSecondsMap) { this.resourcePreempted = resourcePreempted; this.numNonAMContainersPreempted = numNonAMContainersPreempted; this.numAMContainersPreempted = numAMContainersPreempted; - this.memorySeconds = memorySeconds; - this.vcoreSeconds = vcoreSeconds; - this.preemptedMemorySeconds = preemptedMemorySeconds; - this.preemptedVcoreSeconds = preemptedVcoreSeconds; + this.resourceSecondsMap = resourceSecondsMap; + this.preemptedResourceSecondsMap = preemptedResourceSecondsMap; } public Resource getResourcePreempted() { @@ -55,19 +55,32 @@ public int getNumAMContainersPreempted() { } public long getMemorySeconds() { - return memorySeconds; + return RMServerUtils.getOrDefault(resourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L); } public long getVcoreSeconds() { - return vcoreSeconds; + return RMServerUtils + .getOrDefault(resourceSecondsMap, ResourceInformation.VCORES.getName(), + 0L); } public long getPreemptedMemorySeconds() { - return preemptedMemorySeconds; + return RMServerUtils.getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L); } public long getPreemptedVcoreSeconds() { - return preemptedVcoreSeconds; + return RMServerUtils.getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.VCORES.getName(), 0L); + } + + public Map getResourceSecondsMap() { + return resourceSecondsMap; + } + + public Map getPreemptedResourceSecondsMap() { + return preemptedResourceSecondsMap; } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java index f0c2b348c32..b858712f7d6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java @@ -19,42 +19,38 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; + +import java.util.HashMap; +import java.util.Map; @Private public class AggregateAppResourceUsage { - long memorySeconds; - long vcoreSeconds; + private Map resourceSecondsMap = new HashMap<>(); - public AggregateAppResourceUsage(long memorySeconds, long vcoreSeconds) { - this.memorySeconds = memorySeconds; - this.vcoreSeconds = vcoreSeconds; + public AggregateAppResourceUsage(Map resourceSecondsMap) { + this.resourceSecondsMap.putAll(resourceSecondsMap); } /** * @return the memorySeconds */ public long getMemorySeconds() { - return memorySeconds; - } - - /** - * @param memorySeconds the memorySeconds to set - */ - public void setMemorySeconds(long memorySeconds) { - this.memorySeconds = memorySeconds; + return RMServerUtils.getOrDefault(resourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L); } /** * @return the vcoreSeconds */ public long getVcoreSeconds() { - return vcoreSeconds; + return RMServerUtils + .getOrDefault(resourceSecondsMap, ResourceInformation.VCORES.getName(), + 0L); } - /** - * @param vcoreSeconds the vcoreSeconds to set - */ - public void setVcoreSeconds(long vcoreSeconds) { - this.vcoreSeconds = vcoreSeconds; + public Map getResourceUsageSecondsMap() { + return resourceSecondsMap; } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 93b5383dccf..d09be8b91d6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -939,12 +939,9 @@ public ApplicationResourceUsageReport getApplicationResourceUsageReport() { } AggregateAppResourceUsage resUsage = this.attemptMetrics.getAggregateAppResourceUsage(); - report.setMemorySeconds(resUsage.getMemorySeconds()); - report.setVcoreSeconds(resUsage.getVcoreSeconds()); - report.setPreemptedMemorySeconds( - this.attemptMetrics.getPreemptedMemory()); - report.setPreemptedVcoreSeconds( - this.attemptMetrics.getPreemptedVcore()); + report.setResourceSecondsMap(resUsage.getResourceUsageSecondsMap()); + report.setPreemptedResourceSecondsMap( + this.attemptMetrics.getPreemptedResourceSecondsMap()); return report; } finally { this.readLock.unlock(); @@ -981,11 +978,10 @@ public void recover(RMState state) { this.finalStatus = attemptState.getFinalApplicationStatus(); this.startTime = attemptState.getStartTime(); this.finishTime = attemptState.getFinishTime(); - this.attemptMetrics.updateAggregateAppResourceUsage( - attemptState.getMemorySeconds(), attemptState.getVcoreSeconds()); + this.attemptMetrics + .updateAggregateAppResourceUsage(attemptState.getResourceSecondsMap()); this.attemptMetrics.updateAggregatePreemptedAppResourceUsage( - attemptState.getPreemptedMemorySeconds(), - attemptState.getPreemptedVcoreSeconds()); + attemptState.getPreemptedResourceSecondsMap()); } public void transferStateFromAttempt(RMAppAttempt attempt) { @@ -1361,16 +1357,12 @@ private void rememberTargetTransitionsAndStoreState(RMAppAttemptEvent event, RMStateStore rmStore = rmContext.getStateStore(); setFinishTime(System.currentTimeMillis()); - ApplicationAttemptStateData attemptState = - ApplicationAttemptStateData.newInstance( - applicationAttemptId, getMasterContainer(), - rmStore.getCredentialsFromAppAttempt(this), - startTime, stateToBeStored, finalTrackingUrl, diags.toString(), - finalStatus, exitStatus, - getFinishTime(), resUsage.getMemorySeconds(), - resUsage.getVcoreSeconds(), - this.attemptMetrics.getPreemptedMemory(), - this.attemptMetrics.getPreemptedVcore()); + ApplicationAttemptStateData attemptState = ApplicationAttemptStateData + .newInstance(applicationAttemptId, getMasterContainer(), + rmStore.getCredentialsFromAppAttempt(this), startTime, + stateToBeStored, finalTrackingUrl, diags.toString(), finalStatus, exitStatus, + getFinishTime(), resUsage.getResourceUsageSecondsMap(), + this.attemptMetrics.getPreemptedResourceSecondsMap()); LOG.info("Updating application attempt " + applicationAttemptId + " with final state: " + targetedFinalState + ", and exit status: " + exitStatus); @@ -1822,8 +1814,6 @@ public void transition(RMAppAttemptImpl appAttempt, if (newTrackingUrl != null && !newTrackingUrl.equals(appAttempt.originalTrackingUrl)) { appAttempt.originalTrackingUrl = newTrackingUrl; - AggregateAppResourceUsage resUsage = - appAttempt.attemptMetrics.getAggregateAppResourceUsage(); ApplicationAttemptStateData attemptState = ApplicationAttemptStateData .newInstance(appAttempt.applicationAttemptId, appAttempt.getMasterContainer(), @@ -1832,9 +1822,9 @@ public void transition(RMAppAttemptImpl appAttempt, appAttempt.startTime, appAttempt.recoveredFinalState, newTrackingUrl, appAttempt.getDiagnostics(), null, ContainerExitStatus.INVALID, appAttempt.getFinishTime(), - resUsage.getMemorySeconds(), resUsage.getVcoreSeconds(), - appAttempt.attemptMetrics.getPreemptedMemory(), - appAttempt.attemptMetrics.getPreemptedVcore()); + appAttempt.attemptMetrics.getAggregateAppResourceUsage() + .getResourceUsageSecondsMap(), + appAttempt.attemptMetrics.getPreemptedResourceSecondsMap()); appAttempt.rmContext.getStateStore() .updateApplicationAttemptState(attemptState); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java index 0655609a893..e68c5d7412a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java @@ -18,6 +18,9 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -25,11 +28,13 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; +import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; @@ -49,10 +54,8 @@ private ReadLock readLock; private WriteLock writeLock; - private AtomicLong finishedMemorySeconds = new AtomicLong(0); - private AtomicLong finishedVcoreSeconds = new AtomicLong(0); - private AtomicLong preemptedMemorySeconds = new AtomicLong(0); - private AtomicLong preemptedVcoreSeconds = new AtomicLong(0); + private Map resourceUsageMap = new ConcurrentHashMap<>(); + private Map preemptedResourceMap = new ConcurrentHashMap<>(); private RMContext rmContext; private int[][] localityStatistics = @@ -95,18 +98,23 @@ public void updatePreemptionInfo(Resource resource, RMContainer container) { public Resource getResourcePreempted() { try { readLock.lock(); - return resourcePreempted; + return Resource.newInstance(resourcePreempted); } finally { readLock.unlock(); } } public long getPreemptedMemory() { - return preemptedMemorySeconds.get(); + return preemptedResourceMap.get(ResourceInformation.MEMORY_MB.getName()) + .get(); } public long getPreemptedVcore() { - return preemptedVcoreSeconds.get(); + return preemptedResourceMap.get(ResourceInformation.VCORES.getName()).get(); + } + + public Map getPreemptedResourceSecondsMap() { + return convertAtomicLongMaptoLongMap(preemptedResourceMap); } public int getNumNonAMContainersPreempted() { @@ -122,35 +130,89 @@ public boolean getIsPreempted() { } public AggregateAppResourceUsage getAggregateAppResourceUsage() { - long memorySeconds = finishedMemorySeconds.get(); - long vcoreSeconds = finishedVcoreSeconds.get(); + Map resourcesUsed = + convertAtomicLongMaptoLongMap(resourceUsageMap); // Only add in the running containers if this is the active attempt. RMApp rmApp = rmContext.getRMApps().get(attemptId.getApplicationId()); - if (null != rmApp) { - RMAppAttempt currentAttempt = rmApp.getCurrentAppAttempt(); + if (rmApp != null) { + RMAppAttempt currentAttempt = rmContext.getRMApps().get(attemptId.getApplicationId()).getCurrentAppAttempt(); if (currentAttempt.getAppAttemptId().equals(attemptId)) { - ApplicationResourceUsageReport appResUsageReport = rmContext - .getScheduler().getAppResourceUsageReport(attemptId); + ApplicationResourceUsageReport appResUsageReport = + rmContext.getScheduler().getAppResourceUsageReport(attemptId); if (appResUsageReport != null) { - memorySeconds += appResUsageReport.getMemorySeconds(); - vcoreSeconds += appResUsageReport.getVcoreSeconds(); + Map tmp = appResUsageReport.getResourceSecondsMap(); + for (Map.Entry entry : tmp.entrySet()) { + if (resourcesUsed.containsKey(entry.getKey())) { + Long value = resourcesUsed.get(entry.getKey()); + value += entry.getValue(); + resourcesUsed.put(entry.getKey(), value); + } else{ + resourcesUsed.put(entry.getKey(), entry.getValue()); + } + } } } } - return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + return new AggregateAppResourceUsage(resourcesUsed); + } + + public void updateAggregateAppResourceUsage(Resource allocated, + long deltaUsedMillis) { + updateUsageMap(allocated, deltaUsedMillis, resourceUsageMap); + } + + public void updateAggregatePreemptedAppResourceUsage(Resource allocated, + long deltaUsedMillis) { + updateUsageMap(allocated, deltaUsedMillis, preemptedResourceMap); } - public void updateAggregateAppResourceUsage(long finishedMemorySeconds, - long finishedVcoreSeconds) { - this.finishedMemorySeconds.addAndGet(finishedMemorySeconds); - this.finishedVcoreSeconds.addAndGet(finishedVcoreSeconds); + public void updateAggregateAppResourceUsage( + Map resourceSecondsMap) { + updateUsageMap(resourceSecondsMap, resourceUsageMap); } public void updateAggregatePreemptedAppResourceUsage( - long preemptedMemorySeconds, long preemptedVcoreSeconds) { - this.preemptedMemorySeconds.addAndGet(preemptedMemorySeconds); - this.preemptedVcoreSeconds.addAndGet(preemptedVcoreSeconds); + Map preemptedResourceSecondsMap) { + updateUsageMap(preemptedResourceSecondsMap, preemptedResourceMap); + } + + private void updateUsageMap(Resource allocated, long deltaUsedMillis, + Map targetMap) { + for (ResourceInformation entry : allocated.getResources()) { + AtomicLong resourceUsed; + if (!targetMap.containsKey(entry.getName())) { + resourceUsed = new AtomicLong(0); + targetMap.put(entry.getName(), resourceUsed); + + } + resourceUsed = targetMap.get(entry.getName()); + resourceUsed.addAndGet((entry.getValue() * deltaUsedMillis) + / DateUtils.MILLIS_PER_SECOND); + } + } + + private void updateUsageMap(Map sourceMap, + Map targetMap) { + for (Map.Entry entry : sourceMap.entrySet()) { + AtomicLong resourceUsed; + if (!targetMap.containsKey(entry.getKey())) { + resourceUsed = new AtomicLong(0); + targetMap.put(entry.getKey(), resourceUsed); + + } + resourceUsed = targetMap.get(entry.getKey()); + resourceUsed.set(entry.getValue()); + } + } + + private Map convertAtomicLongMaptoLongMap( + Map source) { + Map ret = new HashMap<>(); + for (Map.Entry entry : source.entrySet()) { + ret.put(entry.getKey(), entry.getValue().get()); + } + return ret; } public void incNumAllocatedContainers(NodeType containerType, @@ -168,7 +230,7 @@ public int getTotalAllocatedContainers() { } public Resource getApplicationAttemptHeadroom() { - return applicationHeadroom; + return Resource.newInstance(applicationHeadroom); } public void setApplicationAttemptHeadRoom(Resource headRoom) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index b185495271a..76933f1d12c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -25,7 +25,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; -import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -716,20 +715,15 @@ private static void updateAttemptMetrics(RMContainerImpl container) { if (rmAttempt != null) { long usedMillis = container.finishTime - container.creationTime; - long memorySeconds = resource.getMemorySize() - * usedMillis / DateUtils.MILLIS_PER_SECOND; - long vcoreSeconds = resource.getVirtualCores() - * usedMillis / DateUtils.MILLIS_PER_SECOND; rmAttempt.getRMAppAttemptMetrics() - .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds); + .updateAggregateAppResourceUsage(resource, usedMillis); // If this is a preempted container, update preemption metrics if (ContainerExitStatus.PREEMPTED == container.finishedStatus - .getExitStatus()) { - rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource, - container); + .getExitStatus()) { rmAttempt.getRMAppAttemptMetrics() - .updateAggregatePreemptedAppResourceUsage(memorySeconds, - vcoreSeconds); + .updatePreemptionInfo(resource, container); + rmAttempt.getRMAppAttemptMetrics() + .updateAggregatePreemptedAppResourceUsage(resource, usedMillis); } } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 4f51e4e2f1b..fd50f203d9b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -96,6 +96,7 @@ import org.apache.hadoop.yarn.server.utils.Lock; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.annotations.VisibleForTesting; @@ -795,7 +796,7 @@ public void killAllAppsInQueue(String queueName) writeLock.unlock(); } } - + /** * Process resource update on a node. */ @@ -898,12 +899,12 @@ public void setClusterMaxPriority(Configuration conf) LOG.info("Updated the cluste max priority to maxClusterLevelAppPriority = " + maxClusterLevelAppPriority); } - + /** * Sanity check increase/decrease request, and return * SchedulerContainerResourceChangeRequest according to given * UpdateContainerRequest. - * + * *

    * - Returns non-null value means validation succeeded
    * - Throw exception when any other error happens
@@ -1327,8 +1328,31 @@ protected void rollbackContainerUpdate(
    * @param container Container.
    */
   public void asyncContainerRelease(RMContainer container) {
-    this.rmContext.getDispatcher().getEventHandler()
-        .handle(new ReleaseContainerEvent(container));
+    this.rmContext.getDispatcher().getEventHandler().handle(
+        new ReleaseContainerEvent(container));
+  }
+
+  /*
+   * Get a Resource object with for the minimum allocation possible.
+   *
+   * @return a Resource object with the minimum allocation for the scheduler
+   */
+  public Resource getMinimumAllocation() {
+    Resource ret = ResourceUtils.getResourceTypesMinimumAllocation();
+    LOG.info("Minimum allocation = " + ret);
+    return ret;
+  }
+
+  /**
+   * Get a Resource object with for the maximum allocation possible.
+   *
+   * @return a Resource object with the maximum allocation for the scheduler
+   */
+
+  public Resource getMaximumAllocation() {
+    Resource ret = ResourceUtils.getResourceTypesMaximumAllocation();
+    LOG.info("Maximum allocation = " + ret);
+    return ret;
   }
 
   @Override
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java
index b23b2be687c..07b5f5ca272 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java
@@ -56,7 +56,7 @@
   private Map nodeNameToNodeMap = new HashMap<>();
   private Map> nodesPerRack = new HashMap<>();
 
-  private Resource clusterCapacity = Resources.clone(Resources.none());
+  private Resource clusterCapacity = Resources.createResource(0, 0);
   private Resource staleClusterCapacity = null;
 
   // Max allocation
@@ -220,10 +220,15 @@ public Resource getMaxAllowedAllocation() {
         return configuredMaxAllocation;
       }
 
-      return Resources.createResource(
-          Math.min(configuredMaxAllocation.getMemorySize(), maxNodeMemory),
-          Math.min(configuredMaxAllocation.getVirtualCores(), maxNodeVCores)
-      );
+      Resource ret = Resources.clone(configuredMaxAllocation);
+      if (ret.getMemorySize() > maxNodeMemory) {
+        ret.setMemorySize(maxNodeMemory);
+      }
+      if (ret.getVirtualCores() > maxNodeVCores) {
+        ret.setVirtualCores(maxNodeVCores);
+      }
+
+      return ret;
     } finally {
       readLock.unlock();
     }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
index 05dc8343c4b..1e1f32790e8 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
@@ -55,11 +55,13 @@
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.UpdateContainerError;
 import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
 import org.apache.hadoop.yarn.server.api.ContainerType;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage;
@@ -107,9 +109,7 @@
 
   private static final long MEM_AGGREGATE_ALLOCATION_CACHE_MSECS = 3000;
   protected long lastMemoryAggregateAllocationUpdateTime = 0;
-  private long lastMemorySeconds = 0;
-  private long lastVcoreSeconds = 0;
-
+  private Map lastResourceSecondsMap = new HashMap<>();
   protected final AppSchedulingInfo appSchedulingInfo;
   protected ApplicationAttemptId attemptId;
   protected Map liveContainers =
@@ -1002,22 +1002,23 @@ private AggregateAppResourceUsage getRunningAggregateAppResourceUsage() {
     // recently.
     if ((currentTimeMillis - lastMemoryAggregateAllocationUpdateTime)
         > MEM_AGGREGATE_ALLOCATION_CACHE_MSECS) {
-      long memorySeconds = 0;
-      long vcoreSeconds = 0;
+      Map resourceSecondsMap = new HashMap<>();
       for (RMContainer rmContainer : this.liveContainers.values()) {
         long usedMillis = currentTimeMillis - rmContainer.getCreationTime();
         Resource resource = rmContainer.getContainer().getResource();
-        memorySeconds += resource.getMemorySize() * usedMillis /
-            DateUtils.MILLIS_PER_SECOND;
-        vcoreSeconds += resource.getVirtualCores() * usedMillis  
-            / DateUtils.MILLIS_PER_SECOND;
+        for (ResourceInformation entry : resource.getResources()) {
+          long value = RMServerUtils
+              .getOrDefault(resourceSecondsMap, entry.getName(), 0L);
+          value += entry.getValue() * usedMillis
+              / DateUtils.MILLIS_PER_SECOND;
+          resourceSecondsMap.put(entry.getName(), value);
+        }
       }
 
       lastMemoryAggregateAllocationUpdateTime = currentTimeMillis;
-      lastMemorySeconds = memorySeconds;
-      lastVcoreSeconds = vcoreSeconds;
+      lastResourceSecondsMap = resourceSecondsMap;
     }
-    return new AggregateAppResourceUsage(lastMemorySeconds, lastVcoreSeconds);
+    return new AggregateAppResourceUsage(lastResourceSecondsMap);
   }
 
   public ApplicationResourceUsageReport getResourceUsageReport() {
@@ -1032,6 +1033,11 @@ public ApplicationResourceUsageReport getResourceUsageReport() {
       Resource cluster = rmContext.getScheduler().getClusterResource();
       ResourceCalculator calc =
           rmContext.getScheduler().getResourceCalculator();
+      Map preemptedResourceSecondsMaps = new HashMap<>();
+      preemptedResourceSecondsMaps
+          .put(ResourceInformation.MEMORY_MB.getName(), 0L);
+      preemptedResourceSecondsMaps
+          .put(ResourceInformation.VCORES.getName(), 0L);
       float queueUsagePerc = 0.0f;
       float clusterUsagePerc = 0.0f;
       if (!calc.isInvalidDivisor(cluster)) {
@@ -1041,15 +1047,15 @@ public ApplicationResourceUsageReport getResourceUsageReport() {
           queueUsagePerc = calc.divide(cluster, usedResourceClone,
               Resources.multiply(cluster, queueCapacityPerc)) * 100;
         }
-        clusterUsagePerc = calc.divide(cluster, usedResourceClone, cluster)
-            * 100;
+        clusterUsagePerc =
+            calc.divide(cluster, usedResourceClone, cluster) * 100;
       }
-      return ApplicationResourceUsageReport.newInstance(liveContainers.size(),
-          reservedContainers.size(), usedResourceClone, reservedResourceClone,
-          Resources.add(usedResourceClone, reservedResourceClone),
-          runningResourceUsage.getMemorySeconds(),
-          runningResourceUsage.getVcoreSeconds(), queueUsagePerc,
-          clusterUsagePerc, 0, 0);
+      return ApplicationResourceUsageReport
+          .newInstance(liveContainers.size(), reservedContainers.size(),
+              usedResourceClone, reservedResourceClone,
+              Resources.add(usedResourceClone, reservedResourceClone),
+              runningResourceUsage.getResourceUsageSecondsMap(), queueUsagePerc,
+              clusterUsagePerc, preemptedResourceSecondsMaps);
     } finally {
       writeLock.unlock();
     }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index e70e7cc30ec..86b9591d0f9 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -147,6 +147,7 @@
 import org.apache.hadoop.yarn.server.utils.Lock;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 import org.apache.hadoop.yarn.util.resource.Resources;
 
 @LimitedPrivate("yarn")
@@ -318,8 +319,8 @@ void initScheduler(Configuration configuration) throws
       this.csConfProvider.init(configuration);
       this.conf = this.csConfProvider.loadConfiguration(configuration);
       validateConf(this.conf);
-      this.minimumAllocation = this.conf.getMinimumAllocation();
-      initMaximumResourceCapability(this.conf.getMaximumAllocation());
+      this.minimumAllocation = super.getMinimumAllocation();
+      initMaximumResourceCapability(super.getMaximumAllocation());
       this.calculator = this.conf.getResourceCalculator();
       this.usePortForNodeName = this.conf.getUsePortForNodeName();
       this.applications = new ConcurrentHashMap<>();
@@ -433,12 +434,15 @@ public void reinitialize(Configuration newConf, RMContext rmContext)
       validateConf(this.conf);
       try {
         LOG.info("Re-initializing queues...");
-        refreshMaximumAllocation(this.conf.getMaximumAllocation());
+        refreshMaximumAllocation(
+            ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
         reinitializeQueues(this.conf);
       } catch (Throwable t) {
         this.conf = oldConf;
-        refreshMaximumAllocation(this.conf.getMaximumAllocation());
-        throw new IOException("Failed to re-init queues : "+ t.getMessage(), t);
+        refreshMaximumAllocation(
+            ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
+        throw new IOException("Failed to re-init queues : " + t.getMessage(),
+            t);
       }
 
       // update lazy preemption
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
index 1e22e0b7ed4..8f3d59d080a 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -49,6 +49,7 @@
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.SchedulableEntity;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 import org.apache.hadoop.yarn.util.resource.Resources;
 
 import java.util.ArrayList;
@@ -773,16 +774,6 @@ public Resource getMinimumAllocation() {
     return Resources.createResource(minimumMemory, minimumCores);
   }
 
-  public Resource getMaximumAllocation() {
-    int maximumMemory = getInt(
-        YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
-        YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
-    int maximumCores = getInt(
-        YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
-        YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES);
-    return Resources.createResource(maximumMemory, maximumCores);
-  }
-
   @Private
   public Priority getQueuePriority(String queue) {
     String queuePolicyPrefix = getQueuePrefix(queue);
@@ -806,6 +797,8 @@ public void setQueuePriority(String queue, int priority) {
    * @return setting specified per queue else falls back to the cluster setting
    */
   public Resource getMaximumAllocationPerQueue(String queue) {
+    // Only support to specify memory and vcores maximum allocation per queue
+    // for now.
     String queuePrefix = getQueuePrefix(queue);
     long maxAllocationMbPerQueue = getInt(queuePrefix + MAXIMUM_ALLOCATION_MB,
         (int)UNDEFINED);
@@ -817,7 +810,7 @@ public Resource getMaximumAllocationPerQueue(String queue) {
       LOG.debug("max alloc vcores per queue for " + queue + " is "
           + maxAllocationVcoresPerQueue);
     }
-    Resource clusterMax = getMaximumAllocation();
+    Resource clusterMax = ResourceUtils.fetchMaximumAllocationFromConfig(this);
     if (maxAllocationMbPerQueue == (int)UNDEFINED) {
       LOG.info("max alloc mb per queue for " + queue + " is undefined");
       maxAllocationMbPerQueue = clusterMax.getMemorySize();
@@ -826,8 +819,11 @@ public Resource getMaximumAllocationPerQueue(String queue) {
        LOG.info("max alloc vcore per queue for " + queue + " is undefined");
       maxAllocationVcoresPerQueue = clusterMax.getVirtualCores();
     }
-    Resource result = Resources.createResource(maxAllocationMbPerQueue,
-        maxAllocationVcoresPerQueue);
+    // Copy from clusterMax and overwrite per-queue's maximum memory/vcore
+    // allocation.
+    Resource result = Resources.clone(clusterMax);
+    result.setMemorySize(maxAllocationMbPerQueue);
+    result.setVirtualCores(maxAllocationVcoresPerQueue);
     if (maxAllocationMbPerQueue > clusterMax.getMemorySize()
         || maxAllocationVcoresPerQueue > clusterMax.getVirtualCores()) {
       throw new IllegalArgumentException(
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index c352ba4bcff..993f0894923 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -540,8 +540,8 @@ public void reinitialize(
       // since we have already told running AM's the size
       Resource oldMax = getMaximumAllocation();
       Resource newMax = newlyParsedLeafQueue.getMaximumAllocation();
-      if (newMax.getMemorySize() < oldMax.getMemorySize()
-          || newMax.getVirtualCores() < oldMax.getVirtualCores()) {
+
+      if (!Resources.fitsIn(oldMax, newMax)) {
         throw new IOException("Trying to reinitialize " + getQueuePath()
             + " the maximum allocation size can not be decreased!"
             + " Current setting: " + oldMax + ", trying to set it to: "
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
index 04aae19199c..a1ee8a19a59 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
@@ -1282,8 +1282,8 @@ private void initScheduler(Configuration conf) throws IOException {
       this.conf = new FairSchedulerConfiguration(conf);
       validateConf(this.conf);
       authorizer = YarnAuthorizationProvider.getInstance(conf);
-      minimumAllocation = this.conf.getMinimumAllocation();
-      initMaximumResourceCapability(this.conf.getMaximumAllocation());
+      minimumAllocation = super.getMinimumAllocation();
+      initMaximumResourceCapability(super.getMaximumAllocation());
       incrAllocation = this.conf.getIncrementAllocation();
       updateReservationThreshold();
       continuousSchedulingEnabled = this.conf.isContinuousSchedulingEnabled();
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index 5c923552fc2..6d176c1a154 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -241,17 +241,8 @@ private synchronized void initScheduler(Configuration conf) {
     //Use ConcurrentSkipListMap because applications need to be ordered
     this.applications =
         new ConcurrentSkipListMap<>();
-    this.minimumAllocation =
-        Resources.createResource(conf.getInt(
-            YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
-            YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB));
-    initMaximumResourceCapability(
-        Resources.createResource(conf.getInt(
-            YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
-            YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB),
-          conf.getInt(
-            YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
-            YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES)));
+    this.minimumAllocation = super.getMinimumAllocation();
+    initMaximumResourceCapability(super.getMaximumAllocation());
     this.usePortForNodeName = conf.getBoolean(
         YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME,
         YarnConfiguration.DEFAULT_RM_SCHEDULER_USE_PORT_FOR_NODE_NAME);
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java
index fe7b2470044..aebac0cf4a4 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java
@@ -19,17 +19,21 @@
 package org.apache.hadoop.yarn.server.resourcemanager.webapp;
 
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;
 
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
 
 import com.google.inject.Inject;
 
+import java.util.Arrays;
+
 /**
  * Provides an table with an overview of many cluster wide metrics and if
  * per user metrics are enabled it will show an overview of what the
@@ -168,8 +172,8 @@ protected void render(Block html) {
         
       }
     }
-    
-    SchedulerInfo schedulerInfo=new SchedulerInfo(this.rm);
+
+    SchedulerInfo schedulerInfo = new SchedulerInfo(this.rm);
     
     div.h3("Scheduler Metrics").
     table("#schedulermetricsoverview").
@@ -186,7 +190,8 @@ protected void render(Block html) {
     tbody().$class("ui-widget-content").
       tr().
         td(String.valueOf(schedulerInfo.getSchedulerType())).
-        td(String.valueOf(schedulerInfo.getSchedulerResourceTypes())).
+        td(String.valueOf(Arrays.toString(ResourceUtils.getResourcesTypeInfo()
+            .toArray(new ResourceTypeInfo[0])))).
         td(schedulerInfo.getMinAllocation().toString()).
         td(schedulerInfo.getMaxAllocation().toString()).
         td(String.valueOf(schedulerInfo.getMaxClusterLevelAppPriority())).
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java
index 3f774e52947..bbc35fc3b9e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java
@@ -44,6 +44,7 @@
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppAttemptInfo;
 import org.apache.hadoop.yarn.server.webapp.AppBlock;
+import org.apache.hadoop.yarn.util.StringHelper;
 import org.apache.hadoop.yarn.util.resource.Resources;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
@@ -106,15 +107,12 @@ protected void createApplicationMetricsTable(Block html){
           attemptResourcePreempted)
         ._("Number of Non-AM Containers Preempted from Current Attempt:",
           attemptNumNonAMContainerPreempted)
-        ._("Aggregate Resource Allocation:",
-          String.format("%d MB-seconds, %d vcore-seconds",
-              appMetrics == null ? "N/A" : appMetrics.getMemorySeconds(),
-              appMetrics == null ? "N/A" : appMetrics.getVcoreSeconds()))
+        ._("Aggregate Resource Allocation:", appMetrics == null ? "N/A" :
+            StringHelper
+                .getResourceSecondsString(appMetrics.getResourceSecondsMap()))
         ._("Aggregate Preempted Resource Allocation:",
-          String.format("%d MB-seconds, %d vcore-seconds",
-            appMetrics == null ? "N/A" : appMetrics.getPreemptedMemorySeconds(),
-            appMetrics == null ? "N/A" :
-                appMetrics.getPreemptedVcoreSeconds()));
+            appMetrics == null ? "N/A" : StringHelper.getResourceSecondsString(
+                appMetrics.getPreemptedResourceSecondsMap()));
 
     pdiv._();
   }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java
index 71fbfc44e5e..880d22fd3f0 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java
@@ -102,6 +102,7 @@
   private long vcoreSeconds;
   protected float queueUsagePercentage;
   protected float clusterUsagePercentage;
+  protected Map resourceSecondsMap;
 
   // preemption info fields
   private long preemptedResourceMB;
@@ -110,6 +111,7 @@
   private int numAMContainerPreempted;
   private long preemptedMemorySeconds;
   private long preemptedVcoreSeconds;
+  protected Map preemptedResourceSecondsMap;
 
   // list of resource requests
   @XmlElement(name = "resourceRequests")
@@ -236,8 +238,10 @@ public AppInfo(ResourceManager rm, RMApp app, Boolean hasAccess,
           appMetrics.getResourcePreempted().getVirtualCores();
       memorySeconds = appMetrics.getMemorySeconds();
       vcoreSeconds = appMetrics.getVcoreSeconds();
+      resourceSecondsMap = appMetrics.getResourceSecondsMap();
       preemptedMemorySeconds = appMetrics.getPreemptedMemorySeconds();
       preemptedVcoreSeconds = appMetrics.getPreemptedVcoreSeconds();
+      preemptedResourceSecondsMap = appMetrics.getPreemptedResourceSecondsMap();
       ApplicationSubmissionContext appSubmissionContext =
           app.getApplicationSubmissionContext();
       unmanagedApplication = appSubmissionContext.getUnmanagedAM();
@@ -460,6 +464,22 @@ public long getReservedVCores() {
     return this.reservedVCores;
   }
 
+  public long getPreemptedMB() {
+    return preemptedResourceMB;
+  }
+
+  public long getPreemptedVCores() {
+    return preemptedResourceVCores;
+  }
+
+  public int getNumNonAMContainersPreempted() {
+    return numNonAMContainerPreempted;
+  }
+  
+  public int getNumAMContainersPreempted() {
+    return numAMContainerPreempted;
+  }
+
   public long getMemorySeconds() {
     return memorySeconds;
   }
@@ -468,6 +488,10 @@ public long getVcoreSeconds() {
     return vcoreSeconds;
   }
 
+  public Map getResourceSecondsMap() {
+    return resourceSecondsMap;
+  }
+
   public long getPreemptedMemorySeconds() {
     return preemptedMemorySeconds;
   }
@@ -476,6 +500,10 @@ public long getPreemptedVcoreSeconds() {
     return preemptedVcoreSeconds;
   }
 
+  public Map getPreemptedResourceSecondsMap() {
+    return preemptedResourceSecondsMap;
+  }
+
   public List getResourceRequests() {
     return this.resourceRequests;
   }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java
index 5083943b65a..dd80d205a17 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java
@@ -20,46 +20,71 @@
 
 import javax.xml.bind.annotation.XmlAccessType;
 import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
 import javax.xml.bind.annotation.XmlRootElement;
 
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.util.resource.Resources;
 
 @XmlRootElement
-@XmlAccessorType(XmlAccessType.FIELD)
+@XmlAccessorType(XmlAccessType.NONE)
 public class ResourceInfo {
+
+  @XmlElement
   long memory;
+  @XmlElement
   int vCores;
-  
+
+  private Resource resources;
+
   public ResourceInfo() {
   }
 
   public ResourceInfo(Resource res) {
     memory = res.getMemorySize();
     vCores = res.getVirtualCores();
+    resources = Resources.clone(res);
   }
 
   public long getMemorySize() {
-    return memory;
+    if (resources == null) {
+      resources = Resource.newInstance(memory, vCores);
+    }
+    return resources.getMemorySize();
   }
 
   public int getvCores() {
-    return vCores;
+    if (resources == null) {
+      resources = Resource.newInstance(memory, vCores);
+    }
+    return resources.getVirtualCores();
   }
-  
+
   @Override
   public String toString() {
-    return "";
+    return getResource().toString();
   }
 
   public void setMemory(int memory) {
+    if (resources == null) {
+      resources = Resource.newInstance(memory, vCores);
+    }
     this.memory = memory;
+    resources.setMemorySize(memory);
   }
 
   public void setvCores(int vCores) {
+    if (resources == null) {
+      resources = Resource.newInstance(memory, vCores);
+    }
     this.vCores = vCores;
+    resources.setVirtualCores(vCores);
   }
 
   public Resource getResource() {
-    return Resource.newInstance(memory, vCores);
+    if (resources == null) {
+      resources = Resource.newInstance(memory, vCores);
+    }
+    return Resource.newInstance(resources);
   }
 }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java
index cf93edd2c1e..81491b14ce1 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
 
+import java.util.Arrays;
 import java.util.EnumSet;
 
 import javax.xml.bind.annotation.XmlRootElement;
@@ -73,7 +74,7 @@ public ResourceInfo getMaxAllocation() {
   }
 
   public String getSchedulerResourceTypes() {
-    return this.schedulingResourceTypes.toString();
+    return Arrays.toString(minAllocResource.getResource().getResources());
   }
 
   public int getMaxClusterLevelAppPriority() {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto
index 0300d65f857..35c77ab78b4 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto
@@ -88,6 +88,8 @@ message ApplicationAttemptStateDataProto {
     optional int64 finish_time = 12;
     optional int64 preempted_memory_seconds = 13;
     optional int64 preempted_vcore_seconds = 14;
+    repeated StringLongMapProto application_resource_usage_map = 15;
+    repeated StringLongMapProto preempted_resource_usage_map = 16;
 }
 
 message EpochProto {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
index db92d7c98dc..a02ac8429de 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
@@ -47,14 +47,14 @@
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 import org.apache.hadoop.yarn.util.YarnVersionInfo;
+import org.apache.hadoop.yarn.util.resource.Resources;
 import org.mortbay.log.Log;
 
 public class MockNM {
 
   private int responseId;
   private NodeId nodeId;
-  private long memory;
-  private int vCores;
+  private Resource capatibility;
   private ResourceTrackerService resourceTracker;
   private int httpPort = 2;
   private MasterKey currentContainerTokenMasterKey;
@@ -75,13 +75,25 @@ public MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTrack
 
   public MockNM(String nodeIdStr, int memory, int vcores,
       ResourceTrackerService resourceTracker) {
-    this(nodeIdStr, memory, vcores, resourceTracker, YarnVersionInfo.getVersion());
+    this(nodeIdStr, memory, vcores, resourceTracker,
+        YarnVersionInfo.getVersion());
   }
 
   public MockNM(String nodeIdStr, int memory, int vcores,
       ResourceTrackerService resourceTracker, String version) {
-    this.memory = memory;
-    this.vCores = vcores;
+    this(nodeIdStr, Resource.newInstance(memory, vcores), resourceTracker,
+        version);
+  }
+
+  public MockNM(String nodeIdStr, Resource capatibility,
+      ResourceTrackerService resourceTracker) {
+    this(nodeIdStr, capatibility, resourceTracker,
+        YarnVersionInfo.getVersion());
+  }
+
+  public MockNM(String nodeIdStr, Resource capatibility,
+      ResourceTrackerService resourceTracker, String version) {
+    this.capatibility = capatibility;
     this.resourceTracker = resourceTracker;
     this.version = version;
     String[] splits = nodeIdStr.split(":");
@@ -146,8 +158,7 @@ public RegisterNodeManagerResponse registerNode(
         RegisterNodeManagerRequest.class);
     req.setNodeId(nodeId);
     req.setHttpPort(httpPort);
-    Resource resource = BuilderUtils.newResource(memory, vCores);
-    req.setResource(resource);
+    req.setResource(capatibility);
     req.setContainerStatuses(containerReports);
     req.setNMVersion(version);
     req.setRunningApplications(runningApplications);
@@ -158,8 +169,7 @@ public RegisterNodeManagerResponse registerNode(
     this.currentNMTokenMasterKey = registrationResponse.getNMTokenMasterKey();
     Resource newResource = registrationResponse.getResource();
     if (newResource != null) {
-      memory = (int) newResource.getMemorySize();
-      vCores = newResource.getVirtualCores();
+      capatibility = Resources.clone(newResource);
     }
     containerStats.clear();
     if (containerReports != null) {
@@ -185,7 +195,7 @@ public NodeHeartbeatResponse nodeHeartbeat(ApplicationAttemptId attemptId,
       long containerId, ContainerState containerState) throws Exception {
     ContainerStatus containerStatus = BuilderUtils.newContainerStatus(
         BuilderUtils.newContainerId(attemptId, containerId), containerState,
-        "Success", 0, BuilderUtils.newResource(memory, vCores));
+        "Success", 0, capatibility);
     ArrayList containerStatusList =
         new ArrayList(1);
     containerStatusList.add(containerStatus);
@@ -266,19 +276,22 @@ public NodeHeartbeatResponse nodeHeartbeat(List updatedStats,
 
     Resource newResource = heartbeatResponse.getResource();
     if (newResource != null) {
-      memory = newResource.getMemorySize();
-      vCores = newResource.getVirtualCores();
+      capatibility = Resources.clone(newResource);
     }
 
     return heartbeatResponse;
   }
 
   public long getMemory() {
-    return memory;
+    return capatibility.getMemorySize();
   }
 
   public int getvCores() {
-    return vCores;
+    return capatibility.getVirtualCores();
+  }
+
+  public Resource getCapatibility() {
+    return capatibility;
   }
 
   public String getVersion() {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
index ef7cb9a0e4a..fd327f1df2c 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
@@ -104,6 +104,8 @@
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.util.Records;
 import org.apache.hadoop.yarn.util.YarnVersionInfo;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+import org.apache.hadoop.yarn.util.resource.TestResourceUtils;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -150,6 +152,10 @@ public MockRM(Configuration conf, RMStateStore store,
   public MockRM(Configuration conf, RMStateStore store,
       boolean useNullRMNodeLabelsManager, boolean useRealElector) {
     super();
+    if (conf.getBoolean(TestResourceUtils.TEST_CONF_RESET_RESOURCE_TYPES,
+        true)) {
+      ResourceUtils.resetResourceTypes(conf);
+    }
     this.useNullRMNodeLabelsManager = useNullRMNodeLabelsManager;
     this.useRealElector = useRealElector;
     init(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf));
@@ -842,6 +848,15 @@ public MockNM registerNode(String nodeIdStr, int memory, int vCores,
     return nm;
   }
 
+  public MockNM registerNode(String nodeIdStr, Resource nodeCapatibility)
+      throws Exception {
+    MockNM nm = new MockNM(nodeIdStr, nodeCapatibility,
+        getResourceTrackerService());
+    nm.registerNode();
+    drainEventsImplicitly();
+    return nm;
+  }
+
   public void sendNodeStarted(MockNM nm) throws Exception {
     RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(
         nm.getNodeId());
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java
index 4ac4fc306b5..e756c49fd71 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java
@@ -105,9 +105,34 @@ protected MockAM launchAM(RMApp app, MockRM rm, MockNM nm)
     return am;
   }
 
+  private MockRM initMockRMWithOldConf(final Configuration confForRM1) {
+    return new MockRM(confForRM1, null, false, false) {
+      @Override
+      protected AdminService createAdminService() {
+        return new AdminService(this) {
+          @Override
+          protected void startServer() {
+            // override to not start rpc handler
+          }
+
+          @Override
+          protected void stopServer() {
+            // don't do anything
+          }
+
+          @Override
+          protected Configuration loadNewConfiguration() throws IOException, YarnException {
+            return confForRM1;
+          }
+        };
+      }
+    };
+  }
+
   protected void startRMs() throws IOException {
-    rm1 = new MockRM(confForRM1, null, false, false);
-    rm2 = new MockRM(confForRM2, null, false, false);
+    rm1 = initMockRMWithOldConf(confForRM1);
+    rm2 = initMockRMWithOldConf(confForRM2);
+
     startRMs(rm1, confForRM1, rm2, confForRM2);
   }
 
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
index 91f20c3f405..187fa3ec7fd 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
@@ -35,6 +35,7 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
 
 import org.apache.commons.logging.Log;
@@ -56,6 +57,7 @@
 import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
@@ -247,6 +249,8 @@ public void setUp() {
     ResourceScheduler scheduler = mockResourceScheduler();
     ((RMContextImpl)rmContext).setScheduler(scheduler);
     Configuration conf = new Configuration();
+    conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
+    ((RMContextImpl) rmContext).setYarnConfiguration(conf);
     ApplicationMasterService masterService =
         new ApplicationMasterService(rmContext, scheduler);
     appMonitor = new TestRMAppManager(rmContext,
@@ -827,9 +831,12 @@ public void testEscapeApplicationSummary() {
     when(app.getApplicationType()).thenReturn("MAPREDUCE");
     when(app.getSubmitTime()).thenReturn(1000L);
     when(app.getLaunchTime()).thenReturn(2000L);
+    Map resourceSecondsMap = new HashMap<>();
+    resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
+    resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
     RMAppMetrics metrics =
         new RMAppMetrics(Resource.newInstance(1234, 56),
-            10, 1, 16384, 64, 0, 0);
+            10, 1, resourceSecondsMap, new HashMap());
     when(app.getRMAppMetrics()).thenReturn(metrics);
 
     RMAppManager.ApplicationSummary.SummaryBuilder summary =
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
index 65e517d0680..aa0085ba648 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
@@ -102,9 +102,11 @@ public void init(ApplicationMasterServiceContext amsContext,
     }
 
     @Override
-    public void registerApplicationMaster(ApplicationAttemptId
-        applicationAttemptId, RegisterApplicationMasterRequest request,
-        RegisterApplicationMasterResponse response) throws IOException {
+    public void registerApplicationMaster(
+        ApplicationAttemptId applicationAttemptId,
+        RegisterApplicationMasterRequest request,
+        RegisterApplicationMasterResponse response)
+        throws IOException, YarnException {
       nextProcessor.registerApplicationMaster(
           applicationAttemptId, request, response);
     }
@@ -144,7 +146,8 @@ public void init(ApplicationMasterServiceContext amsContext,
     public void registerApplicationMaster(
         ApplicationAttemptId applicationAttemptId,
         RegisterApplicationMasterRequest request,
-        RegisterApplicationMasterResponse response) throws IOException {
+        RegisterApplicationMasterResponse response)
+        throws IOException, YarnException {
       beforeRegCount.incrementAndGet();
       nextProcessor.registerApplicationMaster(applicationAttemptId,
               request, response);
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
index 6d2e4f45a15..fbd830fb1da 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
@@ -54,6 +54,8 @@
 import org.apache.hadoop.yarn.MockApps;
 import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.ApplicationsRequestScope;
+import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest;
@@ -114,6 +116,7 @@
 import org.apache.hadoop.yarn.api.records.ReservationRequest;
 import org.apache.hadoop.yarn.api.records.ReservationRequests;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -1894,4 +1897,46 @@ public void handle(Event event) {
         rmService.getApplications(request).getApplicationList().size());
     rmService.setDisplayPerUserApps(false);
   }
+
+  public void testGetResourceTypesInfoWhenResourceProfileDisabled()
+      throws Exception {
+    YarnConfiguration conf = new YarnConfiguration();
+    MockRM rm = new MockRM(conf) {
+      protected ClientRMService createClientRMService() {
+        return new ClientRMService(this.rmContext, scheduler,
+            this.rmAppManager, this.applicationACLsManager, this.queueACLsManager,
+            this.getRMContext().getRMDelegationTokenSecretManager());
+      }
+    };
+    rm.start();
+
+    YarnRPC rpc = YarnRPC.create(conf);
+    InetSocketAddress rmAddress = rm.getClientRMService().getBindAddress();
+    LOG.info("Connecting to ResourceManager at " + rmAddress);
+    ApplicationClientProtocol client =
+        (ApplicationClientProtocol) rpc
+            .getProxy(ApplicationClientProtocol.class, rmAddress, conf);
+
+    // Make call
+    GetAllResourceTypeInfoRequest request =
+        GetAllResourceTypeInfoRequest.newInstance();
+    GetAllResourceTypeInfoResponse response = client.getResourceTypeInfo(request);
+
+    Assert.assertEquals(2, response.getResourceTypeInfo().size());
+
+    // Check memory
+    Assert.assertEquals(ResourceInformation.MEMORY_MB.getName(),
+        response.getResourceTypeInfo().get(0).getName());
+    Assert.assertEquals(ResourceInformation.MEMORY_MB.getUnits(),
+        response.getResourceTypeInfo().get(0).getDefaultUnit());
+
+    // Check vcores
+    Assert.assertEquals(ResourceInformation.VCORES.getName(),
+        response.getResourceTypeInfo().get(1).getName());
+    Assert.assertEquals(ResourceInformation.VCORES.getUnits(),
+        response.getResourceTypeInfo().get(1).getDefaultUnit());
+
+    rm.stop();
+    rpc.stopProxy(client, conf);
+  }
 }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java
index 11fe0561769..3508ab4760c 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java
@@ -23,6 +23,7 @@
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.commons.lang.time.DateUtils;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -32,6 +33,7 @@
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
@@ -424,6 +426,9 @@ private AggregateAppResourceUsage calculateContainerResourceMetrics(
                           * usedMillis / DateUtils.MILLIS_PER_SECOND;
     long vcoreSeconds = resource.getVirtualCores()
                           * usedMillis / DateUtils.MILLIS_PER_SECOND;
-    return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds);
+    Map map = new HashMap<>();
+    map.put(ResourceInformation.MEMORY_MB.getName(), memorySeconds);
+    map.put(ResourceInformation.VCORES.getName(), vcoreSeconds);
+    return new AggregateAppResourceUsage(map);
   }
 }
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
index 9d8151aefce..760ee8e1c38 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager;
 
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -194,7 +195,8 @@ public YarnApplicationState createApplicationState() {
 
     @Override
     public RMAppMetrics getRMAppMetrics() {
-      return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0, 0, 0);
+      return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, new HashMap(),
+          new HashMap());
     }
 
     @Override
@@ -354,8 +356,9 @@ public int getMaxAppAttempts() {
       public ApplicationReport createAndGetApplicationReport(
           String clientUserName, boolean allowAccess) {
         ApplicationResourceUsageReport usageReport =
-            ApplicationResourceUsageReport.newInstance(0, 0, null, null, null, 
-            0, 0, 0, 0, 0, 0);
+            ApplicationResourceUsageReport
+                .newInstance(0, 0, null, null, null, new HashMap(), 0, 0,
+                    new HashMap());
         ApplicationReport report = ApplicationReport.newInstance(
             getApplicationId(), appAttemptId, getUser(), getQueue(), 
             getName(), null, 0, null, null, getDiagnostics().toString(), 
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestCombinedSystemMetricsPublisher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestCombinedSystemMetricsPublisher.java
index 4f55a6b0d8f..fdb32385e5a 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestCombinedSystemMetricsPublisher.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestCombinedSystemMetricsPublisher.java
@@ -30,7 +30,9 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
@@ -46,6 +48,7 @@
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
@@ -481,9 +484,16 @@ private static RMApp createRMApp(ApplicationId appId) {
     when(rmApp.getCurrentAppAttempt()).thenReturn(appAttempt);
     when(rmApp.getFinalApplicationStatus()).thenReturn(
         FinalApplicationStatus.UNDEFINED);
+    Map resourceMap = new HashMap<>();
+    resourceMap
+        .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE);
+    resourceMap.put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE);
+    Map preemptedMap = new HashMap<>();
+    preemptedMap
+        .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE);
     when(rmApp.getRMAppMetrics()).thenReturn(
-        new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, Integer.MAX_VALUE,
-            Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE));
+        new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, resourceMap,
+            preemptedMap));
     when(rmApp.getApplicationTags()).thenReturn(
         Collections. emptySet());
     ApplicationSubmissionContext appSubmissionContext =
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java
index 5321916ca33..36e35d46484 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java
@@ -24,6 +24,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@@ -40,6 +41,7 @@
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
@@ -505,9 +507,16 @@ private static RMApp createRMApp(ApplicationId appId) {
     when(app.getCurrentAppAttempt()).thenReturn(appAttempt);
     when(app.getFinalApplicationStatus()).thenReturn(
         FinalApplicationStatus.UNDEFINED);
-    when(app.getRMAppMetrics()).thenReturn(
-        new RMAppMetrics(null, 0, 0, Integer.MAX_VALUE, Long.MAX_VALUE,
-            Integer.MAX_VALUE, Long.MAX_VALUE));
+    Map resourceMap = new HashMap<>();
+    resourceMap
+        .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE);
+    resourceMap.put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE);
+    Map preemptedMap = new HashMap<>();
+    preemptedMap
+        .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE);
+    preemptedMap.put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE);
+    when(app.getRMAppMetrics())
+        .thenReturn(new RMAppMetrics(null, 0, 0, resourceMap, preemptedMap));
     Set appTags = new HashSet();
     appTags.add("test");
     appTags.add("tags");
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java
index ad71f6e0405..3aeafc96aa2 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java
@@ -29,6 +29,8 @@
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
@@ -46,6 +48,7 @@
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
@@ -358,15 +361,20 @@ private static RMApp createRMApp(ApplicationId appId) {
     when(app.getDiagnostics()).thenReturn(
         new StringBuilder("test diagnostics info"));
     RMAppAttempt appAttempt = mock(RMAppAttempt.class);
-    when(appAttempt.getAppAttemptId()).thenReturn(
-        ApplicationAttemptId.newInstance(appId, 1));
+    when(appAttempt.getAppAttemptId())
+        .thenReturn(ApplicationAttemptId.newInstance(appId, 1));
     when(app.getCurrentAppAttempt()).thenReturn(appAttempt);
-    when(app.getFinalApplicationStatus()).thenReturn(
-        FinalApplicationStatus.UNDEFINED);
+    when(app.getFinalApplicationStatus())
+        .thenReturn(FinalApplicationStatus.UNDEFINED);
+    Map resourceSecondsMap = new HashMap<>();
+    resourceSecondsMap
+        .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE);
+    resourceSecondsMap
+        .put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE);
     when(app.getRMAppMetrics()).thenReturn(
-        new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, Integer.MAX_VALUE,
-            Long.MAX_VALUE, Long.MAX_VALUE, Long.MAX_VALUE));
-    when(app.getApplicationTags()).thenReturn(Collections. emptySet());
+        new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, resourceSecondsMap,
+            new HashMap()));
+    when(app.getApplicationTags()).thenReturn(Collections.emptySet());
     ApplicationSubmissionContext appSubmissionContext =
         mock(ApplicationSubmissionContext.class);
     when(appSubmissionContext.getPriority())
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
index f01f730bb26..10391b1507f 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
@@ -194,7 +194,7 @@ protected RMAppAttempt storeAttempt(RMStateStore store,
     when(mockAttempt.getRMAppAttemptMetrics())
         .thenReturn(mockRmAppAttemptMetrics);
     when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage())
-        .thenReturn(new AggregateAppResourceUsage(0, 0));
+        .thenReturn(new AggregateAppResourceUsage(new HashMap()));
     dispatcher.attemptId = attemptId;
     store.storeNewApplicationAttempt(mockAttempt);
     waitNotify(dispatcher);
@@ -292,7 +292,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper,
     when(mockRemovedAttempt.getRMAppAttemptMetrics())
         .thenReturn(mockRmAppAttemptMetrics);
     when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage())
-        .thenReturn(new AggregateAppResourceUsage(0,0));
+        .thenReturn(new AggregateAppResourceUsage(new HashMap()));
     attempts.put(attemptIdRemoved, mockRemovedAttempt);
     store.removeApplication(mockRemovedApp);
 
@@ -369,7 +369,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper,
             oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED,
             "myTrackingUrl", "attemptDiagnostics",
             FinalApplicationStatus.SUCCEEDED, 100,
-            oldAttemptState.getFinishTime(), 0, 0, 0, 0);
+            oldAttemptState.getFinishTime(), new HashMap(), new HashMap());
     store.updateApplicationAttemptState(newAttemptState);
 
     // test updating the state of an app/attempt whose initial state was not
@@ -393,7 +393,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper,
             oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED,
             "myTrackingUrl", "attemptDiagnostics",
             FinalApplicationStatus.SUCCEEDED, 111,
-            oldAttemptState.getFinishTime(), 0, 0, 0, 0);
+            oldAttemptState.getFinishTime(), new HashMap(), new HashMap());
     store.updateApplicationAttemptState(dummyAttempt);
 
     // let things settle down
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
index b5b792578e7..9e005006b04 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
@@ -34,12 +34,7 @@
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.test.GenericTestUtils;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.*;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
 import org.apache.hadoop.yarn.conf.HAUtil;
@@ -554,7 +549,7 @@ public void testFencedState() throws Exception {
     when(mockAttempt.getRMAppAttemptMetrics())
         .thenReturn(mockRmAppAttemptMetrics);
     when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage())
-        .thenReturn(new AggregateAppResourceUsage(0,0));
+        .thenReturn(new AggregateAppResourceUsage(new HashMap()));
     store.storeNewApplicationAttempt(mockAttempt);
     assertEquals("RMStateStore should have been in fenced state",
             true, store.isFencedState());
@@ -566,7 +561,7 @@ public void testFencedState() throws Exception {
             store.getCredentialsFromAppAttempt(mockAttempt),
             startTime, RMAppAttemptState.FINISHED, "testUrl", 
             "test", FinalApplicationStatus.SUCCEEDED, 100, 
-            finishTime, 0, 0, 0, 0);
+            finishTime, new HashMap(), new HashMap());
     store.updateApplicationAttemptState(newAttemptState);
     assertEquals("RMStateStore should have been in fenced state",
             true, store.isFencedState());
@@ -803,10 +798,20 @@ private static ApplicationStateData createAppState(
   private static ApplicationAttemptStateData createFinishedAttempt(
       ApplicationAttemptId attemptId, Container container, long startTime,
       int amExitStatus) {
+    Map resourceSecondsMap = new HashMap<>();
+    Map preemptedResoureSecondsMap = new HashMap<>();
+    resourceSecondsMap
+        .put(ResourceInformation.MEMORY_MB.getName(), 0L);
+    resourceSecondsMap
+        .put(ResourceInformation.VCORES.getName(), 0L);
+    preemptedResoureSecondsMap.put(ResourceInformation.MEMORY_MB.getName(),
+        0L);
+    preemptedResoureSecondsMap
+        .put(ResourceInformation.VCORES.getName(), 0L);
     return ApplicationAttemptStateData.newInstance(attemptId,
         container, null, startTime, RMAppAttemptState.FINISHED,
         "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.SUCCEEDED,
-        amExitStatus, 0, 0, 0, 0, 0);
+        amExitStatus, 0, resourceSecondsMap, preemptedResoureSecondsMap);
   }
 
   private ApplicationAttemptId storeAttempt(RMStateStore store,
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java
deleted file mode 100644
index 2a10747ac9d..00000000000
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.resource;
-
-import static org.apache.hadoop.yarn.util.resource.Resources.*;
-import static org.junit.Assert.*;
-import org.junit.Test;
-
-public class TestResources {
-  @Test(timeout=10000)
-  public void testFitsIn() {
-    assertTrue(fitsIn(createResource(1, 1), createResource(2, 2)));
-    assertTrue(fitsIn(createResource(2, 2), createResource(2, 2)));
-    assertFalse(fitsIn(createResource(2, 2), createResource(1, 1)));
-    assertFalse(fitsIn(createResource(1, 2), createResource(2, 1)));
-    assertFalse(fitsIn(createResource(2, 1), createResource(1, 2)));
-  }
-  
-  @Test(timeout=10000)
-  public void testComponentwiseMin() {
-    assertEquals(createResource(1, 1),
-        componentwiseMin(createResource(1, 1), createResource(2, 2)));
-    assertEquals(createResource(1, 1),
-        componentwiseMin(createResource(2, 2), createResource(1, 1)));
-    assertEquals(createResource(1, 1),
-        componentwiseMin(createResource(1, 2), createResource(2, 1)));
-  }
-}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index d23ef59b3a5..a1ef3a0f42e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -135,6 +135,7 @@
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 import org.apache.hadoop.yarn.util.resource.Resources;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
@@ -2941,7 +2942,7 @@ public void testRefreshQueuesMaxAllocationRefresh() throws Exception {
         conf.getMaximumAllocationPerQueue(A1).getMemorySize());
     assertEquals("max allocation",
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
-        conf.getMaximumAllocation().getMemorySize());
+        ResourceUtils.fetchMaximumAllocationFromConfig(conf).getMemorySize());
 
     CSQueue rootQueue = cs.getRootQueue();
     CSQueue queueA = findQueue(rootQueue, A);
@@ -3042,10 +3043,10 @@ public void testRefreshQueuesMaxAllocationRefreshLarger() throws Exception {
         conf.getMaximumAllocationPerQueue(A1).getVirtualCores());
     assertEquals("cluster max allocation MB",
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
-        conf.getMaximumAllocation().getMemorySize());
+        ResourceUtils.fetchMaximumAllocationFromConfig(conf).getMemorySize());
     assertEquals("cluster max allocation vcores",
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
-        conf.getMaximumAllocation().getVirtualCores());
+        ResourceUtils.fetchMaximumAllocationFromConfig(conf).getVirtualCores());
 
     CSQueue rootQueue = cs.getRootQueue();
     CSQueue queueA = findQueue(rootQueue, A);
@@ -3064,10 +3065,10 @@ public void testRefreshQueuesMaxAllocationRefreshLarger() throws Exception {
         conf.getMaximumAllocationPerQueue(A1).getVirtualCores());
     assertEquals("max allocation MB cluster",
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
-        conf.getMaximumAllocation().getMemorySize());
+        ResourceUtils.fetchMaximumAllocationFromConfig(conf).getMemorySize());
     assertEquals("max allocation vcores cluster",
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
-        conf.getMaximumAllocation().getVirtualCores());
+        ResourceUtils.fetchMaximumAllocationFromConfig(conf).getVirtualCores());
     assertEquals("queue max allocation MB", 6144,
         ((LeafQueue) queueA1).getMaximumAllocation().getMemorySize());
     assertEquals("queue max allocation vcores", 3,
@@ -4341,143 +4342,6 @@ public void testCSReservationWithRootUnblocked() throws Exception {
     rm.stop();
   }
 
-  @Test (timeout = 300000)
-  public void testUserLimitThroughput() throws Exception {
-    // Since this is more of a performance unit test, only run if
-    // RunUserLimitThroughput is set (-DRunUserLimitThroughput=true)
-    Assume.assumeTrue(Boolean.valueOf(
-        System.getProperty("RunUserLimitThroughput")));
-
-    CapacitySchedulerConfiguration csconf =
-        new CapacitySchedulerConfiguration();
-    csconf.setMaximumApplicationMasterResourcePerQueuePercent("root", 100.0f);
-    csconf.setMaximumAMResourcePercentPerPartition("root", "", 100.0f);
-    csconf.setMaximumApplicationMasterResourcePerQueuePercent("root.default",
-        100.0f);
-    csconf.setMaximumAMResourcePercentPerPartition("root.default", "", 100.0f);
-    csconf.setResourceComparator(DominantResourceCalculator.class);
-
-    YarnConfiguration conf = new YarnConfiguration(csconf);
-      conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
-          ResourceScheduler.class);
-
-    MockRM rm = new MockRM(conf);
-    rm.start();
-
-    CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
-    LeafQueue qb = (LeafQueue)cs.getQueue("default");
-
-    // For now make user limit large so we can activate all applications
-    qb.setUserLimitFactor((float)100.0);
-    qb.setupConfigurableCapacities();
-
-    SchedulerEvent addAppEvent;
-    SchedulerEvent addAttemptEvent;
-    Container container = mock(Container.class);
-    ApplicationSubmissionContext submissionContext =
-        mock(ApplicationSubmissionContext.class);
-
-    final int appCount = 100;
-    ApplicationId[] appids = new ApplicationId[appCount];
-    RMAppAttemptImpl[] attempts = new RMAppAttemptImpl[appCount];
-    ApplicationAttemptId[] appAttemptIds = new ApplicationAttemptId[appCount];
-    RMAppImpl[] apps = new RMAppImpl[appCount];
-    RMAppAttemptMetrics[] attemptMetrics = new RMAppAttemptMetrics[appCount];
-    for (int i=0; i loggers=LogManager.getCurrentLoggers();
-        loggers.hasMoreElements(); )  {
-      Logger logger = (Logger) loggers.nextElement();
-      logger.setLevel(Level.WARN);
-    }
-    final int topn = 20;
-    final int iterations = 2000000;
-    final int printInterval = 20000;
-    final float numerator = 1000.0f * printInterval;
-    PriorityQueue queue = new PriorityQueue<>(topn,
-        Collections.reverseOrder());
-
-    long n = Time.monotonicNow();
-    long timespent = 0;
-    for (int i = 0; i < iterations; i+=2) {
-      if (i > 0  && i % printInterval == 0){
-        long ts = (Time.monotonicNow() - n);
-        if (queue.size() < topn) {
-          queue.offer(ts);
-        } else {
-          Long last = queue.peek();
-          if (last > ts) {
-            queue.poll();
-            queue.offer(ts);
-          }
-        }
-        System.out.println(i + " " + (numerator / ts));
-        n= Time.monotonicNow();
-      }
-    cs.handle(new NodeUpdateSchedulerEvent(node));
-    cs.handle(new NodeUpdateSchedulerEvent(node2));
-    }
-    timespent=0;
-    int entries = queue.size();
-    while(queue.size() > 0){
-      long l = queue.poll();
-      timespent += l;
-    }
-    System.out.println("Avg of fastest " + entries + ": "
-        + numerator / (timespent / entries));
-    rm.stop();
-  }
-
   @Test
   public void testCSQueueBlocked() throws Exception {
     CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPerf.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPerf.java
new file mode 100644
index 00000000000..0837fd7205a
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPerf.java
@@ -0,0 +1,265 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
+
+import org.apache.hadoop.util.Time;
+import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.PriorityQueue;
+
+import static org.apache.hadoop.yarn.util.resource.TestResourceUtils.TEST_CONF_RESET_RESOURCE_TYPES;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class TestCapacitySchedulerPerf {
+  private final int GB = 1024;
+
+  private String getResourceName(int idx) {
+    return "resource-" + idx;
+  }
+
+  private void testUserLimitThroughputWithNumberOfResourceTypes(
+      int numOfResourceTypes)
+      throws Exception {
+    if (numOfResourceTypes > 2) {
+      // Initialize resource map
+      Map riMap = new HashMap<>();
+
+      // Initialize mandatory resources
+      riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB);
+      riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES);
+
+      for (int i = 2; i < numOfResourceTypes; i++) {
+        String resourceName = getResourceName(i);
+        riMap.put(resourceName, ResourceInformation
+            .newInstance(resourceName, "", 0, ResourceTypes.COUNTABLE, 0,
+                Integer.MAX_VALUE));
+      }
+
+      ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
+    }
+
+    // Since this is more of a performance unit test, only run if
+    // RunUserLimitThroughput is set (-DRunUserLimitThroughput=true)
+    Assume.assumeTrue(Boolean.valueOf(
+        System.getProperty("RunCapacitySchedulerPerfTests")));
+
+    CapacitySchedulerConfiguration csconf =
+        new CapacitySchedulerConfiguration();
+    csconf.setMaximumApplicationMasterResourcePerQueuePercent("root", 100.0f);
+    csconf.setMaximumAMResourcePercentPerPartition("root", "", 100.0f);
+    csconf.setMaximumApplicationMasterResourcePerQueuePercent("root.default",
+        100.0f);
+    csconf.setMaximumAMResourcePercentPerPartition("root.default", "", 100.0f);
+    csconf.setResourceComparator(DominantResourceCalculator.class);
+
+    YarnConfiguration conf = new YarnConfiguration(csconf);
+    // Don't reset resource types since we have already configured resource types
+    conf.setBoolean(TEST_CONF_RESET_RESOURCE_TYPES, false);
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+        ResourceScheduler.class);
+
+    MockRM rm = new MockRM(conf);
+    rm.start();
+
+    CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
+    LeafQueue qb = (LeafQueue)cs.getQueue("default");
+
+    // For now make user limit large so we can activate all applications
+    qb.setUserLimitFactor((float)100.0);
+    qb.setupConfigurableCapacities();
+
+    SchedulerEvent addAppEvent;
+    SchedulerEvent addAttemptEvent;
+    Container container = mock(Container.class);
+    ApplicationSubmissionContext submissionContext =
+        mock(ApplicationSubmissionContext.class);
+
+    final int appCount = 100;
+    ApplicationId[] appids = new ApplicationId[appCount];
+    RMAppAttemptImpl[] attempts = new RMAppAttemptImpl[appCount];
+    ApplicationAttemptId[] appAttemptIds = new ApplicationAttemptId[appCount];
+    RMAppImpl[] apps = new RMAppImpl[appCount];
+    RMAppAttemptMetrics[] attemptMetrics = new RMAppAttemptMetrics[appCount];
+    for (int i=0; i 2) {
+      for (int i = 2; i < numOfResourceTypes; i++) {
+        nodeResource.setResourceValue(getResourceName(i), 10);
+      }
+    }
+
+    RMNode node = MockNodes.newNodeInfo(0, nodeResource, 1, "127.0.0.1");
+    cs.handle(new NodeAddedSchedulerEvent(node));
+
+    RMNode node2 = MockNodes.newNodeInfo(0, nodeResource, 1, "127.0.0.2");
+    cs.handle(new NodeAddedSchedulerEvent(node2));
+
+    Priority u0Priority = TestUtils.createMockPriority(1);
+    RecordFactory recordFactory =
+        RecordFactoryProvider.getRecordFactory(null);
+
+    FiCaSchedulerApp[] fiCaApps = new FiCaSchedulerApp[appCount];
+    for (int i=0;i 2) {
+        for (int j = 2; j < numOfResourceTypes; j++) {
+          resourceRequest.getCapability().setResourceValue(getResourceName(j),
+              10);
+        }
+      }
+
+      // allocate container for app2 with 1GB memory and 1 vcore
+      fiCaApps[i].updateResourceRequests(
+          Collections.singletonList(resourceRequest));
+    }
+    // Now force everything to be over user limit
+    qb.setUserLimitFactor((float)0.0);
+
+    // Quiet the loggers while measuring throughput
+    for (Enumeration loggers = LogManager.getCurrentLoggers();
+         loggers.hasMoreElements(); )  {
+      Logger logger = (Logger) loggers.nextElement();
+      logger.setLevel(Level.WARN);
+    }
+    final int topn = 20;
+    final int iterations = 2000000;
+    final int printInterval = 20000;
+    final float numerator = 1000.0f * printInterval;
+    PriorityQueue queue = new PriorityQueue<>(topn,
+        Collections.reverseOrder());
+
+    long n = Time.monotonicNow();
+    long timespent = 0;
+    for (int i = 0; i < iterations; i+=2) {
+      if (i > 0  && i % printInterval == 0){
+        long ts = (Time.monotonicNow() - n);
+        if (queue.size() < topn) {
+          queue.offer(ts);
+        } else {
+          Long last = queue.peek();
+          if (last > ts) {
+            queue.poll();
+            queue.offer(ts);
+          }
+        }
+        System.out.println(i + " " + (numerator / ts));
+        n= Time.monotonicNow();
+      }
+      cs.handle(new NodeUpdateSchedulerEvent(node));
+      cs.handle(new NodeUpdateSchedulerEvent(node2));
+    }
+    timespent=0;
+    int entries = queue.size();
+    while(queue.size() > 0){
+      long l = queue.poll();
+      timespent += l;
+    }
+    System.out.println(
+        "#ResourceTypes = " + numOfResourceTypes + ". Avg of fastest " + entries
+            + ": " + numerator / (timespent / entries));
+    rm.stop();
+  }
+
+  @Test(timeout = 300000)
+  public void testUserLimitThroughputForTwoResources() throws Exception {
+    testUserLimitThroughputWithNumberOfResourceTypes(2);
+  }
+
+  @Test(timeout = 300000)
+  public void testUserLimitThroughputForThreeResources() throws Exception {
+    testUserLimitThroughputWithNumberOfResourceTypes(3);
+  }
+
+  @Test(timeout = 300000)
+  public void testUserLimitThroughputForFourResources() throws Exception {
+    testUserLimitThroughputWithNumberOfResourceTypes(4);
+  }
+
+  @Test(timeout = 300000)
+  public void testUserLimitThroughputForFiveResources() throws Exception {
+    testUserLimitThroughputWithNumberOfResourceTypes(5);
+  }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java
new file mode 100644
index 00000000000..1a30e1da359
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWithMultiResourceTypes.java
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * Test Capacity Scheduler with multiple resource types. + */ +public class TestCapacitySchedulerWithMultiResourceTypes { + private static String RESOURCE_1 = "res1"; + private final int GB = 1024; + + @Test + public void testMaximumAllocationRefreshWithMultipleResourceTypes() throws Exception { + + // Initialize resource map + Map riMap = new HashMap<>(); + + // Initialize mandatory resources + ResourceInformation memory = ResourceInformation.newInstance( + ResourceInformation.MEMORY_MB.getName(), + ResourceInformation.MEMORY_MB.getUnits(), + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); + ResourceInformation vcores = ResourceInformation.newInstance( + ResourceInformation.VCORES.getName(), + ResourceInformation.VCORES.getUnits(), + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + riMap.put(ResourceInformation.MEMORY_URI, memory); + riMap.put(ResourceInformation.VCORES_URI, vcores); + riMap.put(RESOURCE_1, ResourceInformation.newInstance(RESOURCE_1, "", 0, + ResourceTypes.COUNTABLE, 0, 3333L)); + + ResourceUtils.initializeResourcesFromResourceInformationMap(riMap); + + CapacitySchedulerConfiguration csconf = + new CapacitySchedulerConfiguration(); + csconf.setMaximumApplicationMasterResourcePerQueuePercent("root", 100.0f); + csconf.setMaximumAMResourcePercentPerPartition("root", "", 100.0f); + csconf.setMaximumApplicationMasterResourcePerQueuePercent("root.default", + 100.0f); + csconf.setMaximumAMResourcePercentPerPartition("root.default", "", 100.0f); + csconf.setResourceComparator(DominantResourceCalculator.class); + csconf.set(YarnConfiguration.RESOURCE_TYPES, RESOURCE_1); + csconf.setInt(YarnConfiguration.RESOURCE_TYPES + "." + RESOURCE_1 + + ".maximum-allocation", 3333); + + YarnConfiguration conf = new YarnConfiguration(csconf); + // Don't reset resource types since we have already configured resource + // types + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + + MockRM rm = new MockRM(conf); + rm.start(); + + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + Assert.assertEquals(3333L, + cs.getMaximumResourceCapability().getResourceValue(RESOURCE_1)); + Assert.assertEquals(3333L, + cs.getMaximumAllocation().getResourceValue(RESOURCE_1)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + cs.getMaximumResourceCapability() + .getResourceValue(ResourceInformation.MEMORY_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + cs.getMaximumAllocation() + .getResourceValue(ResourceInformation.MEMORY_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + cs.getMaximumResourceCapability() + .getResourceValue(ResourceInformation.VCORES_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + cs.getMaximumAllocation() + .getResourceValue(ResourceInformation.VCORES_URI)); + + // Set RES_1 to 3332 (less than 3333) and refresh CS, failures expected. + csconf.set(YarnConfiguration.RESOURCE_TYPES, RESOURCE_1); + csconf.setInt(YarnConfiguration.RESOURCE_TYPES + "." + RESOURCE_1 + + ".maximum-allocation", 3332); + + boolean exception = false; + try { + cs.reinitialize(csconf, rm.getRMContext()); + } catch (IOException e) { + exception = true; + } + + Assert.assertTrue("Should have exception in CS", exception); + + // Maximum allocation won't be updated + Assert.assertEquals(3333L, + cs.getMaximumResourceCapability().getResourceValue(RESOURCE_1)); + Assert.assertEquals(3333L, + cs.getMaximumAllocation().getResourceValue(RESOURCE_1)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + cs.getMaximumResourceCapability() + .getResourceValue(ResourceInformation.MEMORY_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + cs.getMaximumAllocation() + .getResourceValue(ResourceInformation.MEMORY_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + cs.getMaximumResourceCapability() + .getResourceValue(ResourceInformation.VCORES_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + cs.getMaximumAllocation() + .getResourceValue(ResourceInformation.VCORES_URI)); + + // Set RES_1 to 3334 and refresh CS, should success + csconf.set(YarnConfiguration.RESOURCE_TYPES, RESOURCE_1); + csconf.setInt(YarnConfiguration.RESOURCE_TYPES + "." + RESOURCE_1 + + ".maximum-allocation", 3334); + cs.reinitialize(csconf, rm.getRMContext()); + + // Maximum allocation will be updated + Assert.assertEquals(3334, + cs.getMaximumResourceCapability().getResourceValue(RESOURCE_1)); + + // Since we haven't updated the real configuration of ResourceUtils, + // cs.getMaximumAllocation won't be updated. + Assert.assertEquals(3333, + cs.getMaximumAllocation().getResourceValue(RESOURCE_1)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + cs.getMaximumResourceCapability() + .getResourceValue(ResourceInformation.MEMORY_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + cs.getMaximumAllocation() + .getResourceValue(ResourceInformation.MEMORY_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + cs.getMaximumResourceCapability() + .getResourceValue(ResourceInformation.VCORES_URI)); + Assert.assertEquals( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + cs.getMaximumAllocation() + .getResourceValue(ResourceInformation.VCORES_URI)); + + rm.close(); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java index e81ffbd5354..cb150e0363b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java @@ -18,16 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.util.Map; -import java.util.Set; - +import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -38,6 +29,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Event; @@ -53,20 +45,28 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import com.google.common.collect.Sets; +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; public class TestUtils { private static final Log LOG = LogFactory.getLog(TestUtils.class); @@ -456,4 +456,21 @@ public FiCaSchedulerApp getApplicationAttempt( cs.submitResourceCommitRequest(clusterResource, csAssignment); } + + /** + * An easy way to create resources other than memory and vcores for tests. + * @param memory memory + * @param vcores vcores + * @param nameToValues resource types other than memory and vcores. + * @return created resource + */ + public static Resource createResource(long memory, int vcores, + Map nameToValues) { + Resource res = Resource.newInstance(memory, vcores); + for (Map.Entry entry : nameToValues.entrySet()) { + res.setResourceInformation(entry.getKey(), ResourceInformation + .newInstance(entry.getKey(), "", entry.getValue())); + } + return res; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index da3f1609ff2..283fc00dcea 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -116,6 +116,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.ControlledClock; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.After; import org.junit.Assert; @@ -215,6 +216,7 @@ public void testLoadConfigurationOnInitialize() throws IOException { conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); conf.setInt(FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 128); + ResourceUtils.resetResourceTypes(conf); scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); @@ -243,6 +245,7 @@ public void testNonMinZeroResourcesSettings() throws IOException { FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); + ResourceUtils.resetResourceTypes(conf); scheduler.init(conf); scheduler.reinitialize(conf, null); Assert.assertEquals(256, scheduler.getMinimumResourceCapability().getMemorySize()); @@ -260,6 +263,7 @@ public void testMinZeroResourcesSettings() throws IOException { FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); + ResourceUtils.resetResourceTypes(conf); scheduler.init(conf); scheduler.reinitialize(conf, null); Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getMemorySize()); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java index d9ed073e95b..e67e1cb4ca8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java @@ -22,6 +22,7 @@ import static org.mockito.Mockito.when; import java.io.IOException; +import java.util.HashMap; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; @@ -62,9 +63,10 @@ public void testAppBlockRenderWithNullCurrentAppAttempt() throws Exception { when(app.getStartTime()).thenReturn(0L); when(app.getFinishTime()).thenReturn(0L); when(app.createApplicationState()).thenReturn(YarnApplicationState.FAILED); - - RMAppMetrics appMetrics = new RMAppMetrics( - Resource.newInstance(0, 0), 0, 0, 0, 0, 0, 0); + + RMAppMetrics appMetrics = + new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, new HashMap(), + new HashMap()); when(app.getRMAppMetrics()).thenReturn(appMetrics); // initialize RM Context, and create RMApp, without creating RMAppAttempt diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java index 8c00b39c4ba..6bd78c1acbc 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java @@ -51,6 +51,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.concurrent.ConcurrentMap; @@ -136,8 +137,8 @@ private static RMContext mockRMContext(List states) { MockRMApp app = new MockRMApp(i, i, state) { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), - 0, 0, 0, 0, 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, + new HashMap(), new HashMap()); } @Override public YarnApplicationState createApplicationState() { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index aea158a0a1e..dc921201b3d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1551,7 +1551,7 @@ public void verifyAppsXML(NodeList nodes, RMApp app, boolean hasResourceReq) public void verifyAppInfo(JSONObject info, RMApp app, boolean hasResourceReqs) throws JSONException, Exception { - int expectedNumberOfElements = 37 + (hasResourceReqs ? 2 : 0); + int expectedNumberOfElements = 39 + (hasResourceReqs ? 2 : 0); String appNodeLabelExpression = null; String amNodeLabelExpression = null; if (app.getApplicationSubmissionContext() diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java index 71de6b470e9..5fa6a3e491c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java @@ -27,6 +27,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -303,6 +305,12 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( return clientRMProxy.updateApplicationTimeouts(request); } + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return clientRMProxy.getResourceTypeInfo(request); + } + @VisibleForTesting public void setRMClient(ApplicationClientProtocol clientRM) { this.clientRMProxy = clientRM; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java index 3a36eec66ac..7f953c74009 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java @@ -32,6 +32,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -709,4 +711,9 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throw new NotImplementedException(); } + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + throw new NotImplementedException(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java index fd2c610c7fe..92b43d5eeb0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java @@ -38,6 +38,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -403,6 +405,13 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( return pipeline.getRootInterceptor().updateApplicationTimeouts(request); } + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + RequestInterceptorChainWrapper pipeline = getInterceptorChain(); + return pipeline.getRootInterceptor().getResourceTypeInfo(request); + } + private RequestInterceptorChainWrapper getInterceptorChain() throws IOException { String user = UserGroupInformation.getCurrentUser().getUserName(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java index c403bd5006c..76faf947187 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java @@ -24,6 +24,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -264,4 +266,10 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throws YarnException, IOException { return getNextInterceptor().updateApplicationTimeouts(request); } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceTypeInfo(request); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java index 0a66b74342c..611cfcc4d38 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java @@ -96,11 +96,14 @@ import org.apache.hadoop.yarn.server.timeline.TimelineStore; import org.apache.hadoop.yarn.server.timeline.recovery.MemoryTimelineStateStore; import org.apache.hadoop.yarn.server.timeline.recovery.TimelineStateStore; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.yarn.util.resource.TestResourceUtils.TEST_CONF_RESET_RESOURCE_TYPES; + /** *

* Embedded Yarn minicluster for testcases that need to interact with a cluster. @@ -261,6 +264,10 @@ public void serviceInit(Configuration conf) throws Exception { failoverTimeout = conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS); + if (conf.getBoolean(TEST_CONF_RESET_RESOURCE_TYPES, true)) { + ResourceUtils.resetResourceTypes(conf); + } + if (useRpc && !useFixedPorts) { throw new YarnRuntimeException("Invalid configuration!" + " Minicluster can use rpc only when configured to use fixed ports"); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceProfiles.md hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceProfiles.md new file mode 100644 index 00000000000..e7b38e13399 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceProfiles.md @@ -0,0 +1,79 @@ + + +Hadoop: YARN Resource Types +=========================== + +Overview +-------- +Resource types support in YARN helps to extend the YARN resource model to a more flexible model which makes it easier to add new countable resource­types. This solution also helps the users to submit jobs with ease to specify the resources they need. + +Resource model of YARN +----------------------- +Resource Manager will load a new configuration file named `resource-types.xml` to determine the set of resource ­types for which scheduling is enabled. Sample XML will look like below. + +```xml + + + yarn.resource-types + resource1, resource2 + + + + yarn.resource-types.resource1.units + G + + +``` + +Similarly, a new configuration file `node­-resources.xml` will also be loaded by Node Manager where the resource capabilities of a node can be specified. + +```xml + + + yarn.nodemanager.resource-type.resource1 + 5G + + + + yarn.nodemanager.resource-type.resource2 + 2m + + + +``` + +Node Manager will use these custom resource types and will register it's capability to Resource Manager. + +Configurations +------------- + +Please note that, `resource-types.xml` and `node­-resources.xml` file also need to be placed in conf directory if new resources are to be added to YARN. + +*In `resource-types.xml`* + +| Configuration Property | Value | Description | +|:---- |:---- |:---- | +| `yarn.resource-types` | resource1 | Custom resource | +| `yarn.resource-types.resource1.units` | G | Default unit for resource1 type | + +*In `node­-resources.xml`* + +| Configuration Property | Value | Description | +|:---- |:---- |:---- | +| `yarn.nodemanager.resource-type.resource1` | 5G | Resource capability for resource named 'resource1'. | + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md new file mode 100644 index 00000000000..f6000e7c35e --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md @@ -0,0 +1,230 @@ + + + +# Using GPU On YARN +# Prerequisites + +- As of now, only Nvidia GPUs are supported by YARN +- YARN node managers have to be pre-installed with Nvidia drivers. +- When Docker is used as container runtime context, nvidia-docker 1.0 needs to be installed (Current supported version in YARN for nvidia-docker). + +# Configs + +## GPU scheduling + +In `resource-types.xml` + +Add following properties + +``` + + + yarn.resource-types + yarn.io/gpu + + +``` + +In `yarn-site.xml` + +`DominantResourceCalculator` MUST be configured to enable GPU scheduling/isolation. + +For `Capacity Scheduler`, use following property to configure `DominantResourceCalculator` (In `capacity-scheduler.xml`): + +| Property | Default value | +| --- | --- | +| yarn.scheduler.capacity.resource-calculator | org.apache.hadoop.yarn.util.resource.DominantResourceCalculator | + + +## GPU Isolation + +### In `yarn-site.xml` + +``` + + yarn.nodemanager.resource-plugins + yarn.io/gpu + +``` + +This is to enable GPU isolation module on NodeManager side. + +By default, YARN will automatically detect and config GPUs when above config is set. Following configs need to be set in `yarn-site.xml` only if admin has specialized requirements. + +**1) Allowed GPU Devices** + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices | auto | + + Specify GPU devices which can be managed by YARN NodeManager (split by comma). + Number of GPU devices will be reported to RM to make scheduling decisions. + Set to auto (default) let YARN automatically discover GPU resource from + system. + + Manually specify GPU devices if auto detect GPU device failed or admin + only want subset of GPU devices managed by YARN. GPU device is identified + by their minor device number and index. A common approach to get minor + device number of GPUs is using `nvidia-smi -q` and search `Minor Number` + output. + + When minor numbers are specified manually, admin needs to include indice of GPUs + as well, format is `index:minor_number[,index:minor_number...]`. An example + of manual specification is `0:0,1:1,2:2,3:4"`to allow YARN NodeManager to + manage GPU devices with indices `0/1/2/3` and minor number `0/1/2/4`. + numbers . + +**2) Executable to discover GPUs** + +| Property | value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables | /absolute/path/to/nvidia-smi | + +When `yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto` specified, +YARN NodeManager needs to run GPU discovery binary (now only support +`nvidia-smi`) to get GPU-related information. +When value is empty (default), YARN NodeManager will try to locate +discovery executable itself. +An example of the config value is: `/usr/local/bin/nvidia-smi` + +**3) Docker Plugin Related Configs** + +Following configs can be customized when user needs to run GPU applications inside Docker container. They're not required if admin follows default installation/configuration of `nvidia-docker`. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 | + +Specify docker command plugin for GPU. By default uses Nvidia docker V1.0. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint | http://localhost:3476/v1.0/docker/cli | + +Specify end point of `nvidia-docker-plugin`. Please find documentation: https://github.com/NVIDIA/nvidia-docker/wiki For more details. + +**4) CGroups mount** + +GPU isolation uses CGroup [devices controller](https://www.kernel.org/doc/Documentation/cgroup-v1/devices.txt) to do per-GPU device isolation. Following configs should be added to `yarn-site.xml` to automatically mount CGroup sub devices, otherwise admin has to manually create devices subfolder in order to use this feature. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.linux-container-executor.cgroups.mount | true | + + +### In `container-executor.cfg` + +In general, following config needs to be added to `container-executor.cfg` + +``` +[gpu] +module.enabled=true +``` + +When user needs to run GPU applications under non-Docker environment: + +``` +[cgroups] +# This should be same as yarn.nodemanager.linux-container-executor.cgroups.mount-path inside yarn-site.xml +root=/sys/fs/cgroup +# This should be same as yarn.nodemanager.linux-container-executor.cgroups.hierarchy inside yarn-site.xml +yarn-hierarchy=yarn +``` + +When user needs to run GPU applications under Docker environment: + +**1) Add GPU related devices to docker section:** + +Values separated by comma, you can get this by running `ls /dev/nvidia*` + +``` +[docker] +docker.allowed.devices=/dev/nvidiactl,/dev/nvidia-uvm,/dev/nvidia-uvm-tools,/dev/nvidia1,/dev/nvidia0 +``` + +**2) Add `nvidia-docker` to volume-driver whitelist.** + +``` +[docker] +... +docker.allowed.volume-drivers +``` + +**3) Add `nvidia_driver_` to readonly mounts whitelist.** + +``` +[docker] +... +docker.allowed.ro-mounts=nvidia_driver_375.66 +``` + +# Use it + +## Distributed-shell + GPU + +Distributed shell currently support specify additional resource types other than memory and vcores. + +### Distributed-shell + GPU without Docker + +Run distributed shell without using docker container (Asks 2 tasks, each task has 3GB memory, 1 vcore, 2 GPU device resource): + +``` +yarn jar \ + -jar \ + -shell_command /usr/local/nvidia/bin/nvidia-smi \ + -container_resources memory-mb=3072,vcores=1,yarn.io/gpu=2 \ + -num_containers 2 +``` + +You should be able to see output like + +``` +Tue Dec 5 22:21:47 2017 ++-----------------------------------------------------------------------------+ +| NVIDIA-SMI 375.66 Driver Version: 375.66 | +|-------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | +|===============================+======================+======================| +| 0 Tesla P100-PCIE... Off | 0000:04:00.0 Off | 0 | +| N/A 30C P0 24W / 250W | 0MiB / 12193MiB | 0% Default | ++-------------------------------+----------------------+----------------------+ +| 1 Tesla P100-PCIE... Off | 0000:82:00.0 Off | 0 | +| N/A 34C P0 25W / 250W | 0MiB / 12193MiB | 0% Default | ++-------------------------------+----------------------+----------------------+ + ++-----------------------------------------------------------------------------+ +| Processes: GPU Memory | +| GPU PID Type Process name Usage | +|=============================================================================| +| No running processes found | ++-----------------------------------------------------------------------------+ +``` + +For launched container task. + +### Distributed-shell + GPU with Docker + +You can also run distributed shell with Docker container. `YARN_CONTAINER_RUNTIME_TYPE`/`YARN_CONTAINER_RUNTIME_DOCKER_IMAGE` must be specified to use docker container. + +``` +yarn jar \ + -jar \ + -shell_env YARN_CONTAINER_RUNTIME_TYPE=docker \ + -shell_env YARN_CONTAINER_RUNTIME_DOCKER_IMAGE= \ + -shell_command nvidia-smi \ + -container_resources memory-mb=3072,vcores=1,yarn.io/gpu=2 \ + -num_containers 2 +``` \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/adapters/yarn-nm-gpu.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/adapters/yarn-nm-gpu.js new file mode 100644 index 00000000000..bf6307a664c --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/adapters/yarn-nm-gpu.js @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import AbstractAdapter from './abstract'; + +export default AbstractAdapter.extend({ + + address: "localBaseAddress", + restNameSpace: "node", + serverName: "NM", + + urlForFindRecord(id/*, modelName, snapshot*/) { + var url = this._buildURL(); + url = url.replace("{nodeAddress}", id) + "/resources/yarn.io%2Fgpu"; + return url; + } + +}); \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/donut-chart.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/donut-chart.js index ce268117fc6..5236ca05571 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/donut-chart.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/donut-chart.js @@ -20,6 +20,7 @@ import Ember from 'ember'; import BaseChartComponent from 'yarn-ui/components/base-chart-component'; import ColorUtils from 'yarn-ui/utils/color-utils'; import Converter from 'yarn-ui/utils/converter'; +import {Entities} from 'yarn-ui/constants'; export default BaseChartComponent.extend({ /* @@ -41,8 +42,10 @@ export default BaseChartComponent.extend({ } if (!middleValue) { - if (this.get("type") === "memory") { + if (this.get(Entities.Type) === Entities.Memory) { middleValue = Converter.memoryToSimpliedUnit(total); + } else if (this.get(Entities.Type) === Entities.Resource) { + middleValue = Converter.resourceToSimplifiedUnit(total, this.get(Entities.Unit)); } else { middleValue = total; } @@ -151,7 +154,10 @@ export default BaseChartComponent.extend({ var value = d.value; if (this.get("type") === "memory") { value = Converter.memoryToSimpliedUnit(value); + } else if (this.get("type") === "resource") { + value = Converter.resourceToSimplifiedUnit(value, this.get(Entities.Unit)); } + return d.label + ' = ' + value + suffix; }.bind(this)); } @@ -185,10 +191,18 @@ export default BaseChartComponent.extend({ } this.renderDonutChart(this.get("data"), this.get("title"), this.get("showLabels"), - this.get("middleLabel"), this.get("middleValue")); + this.get("middleLabel"), this.get("middleValue"), this.get("suffix")); }, didInsertElement: function() { + // When parentIdPrefix is specified, use parentidPrefix + name as new parent + // id + if (this.get("parentIdPrefix")) { + var newParentId = this.get("parentIdPrefix") + this.get("id"); + this.set("parentId", newParentId); + console.log(newParentId); + } + this.initChart(); this.draw(); }, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/gpu-donut-chart.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/gpu-donut-chart.js new file mode 100644 index 00000000000..fa5ca8ac1dc --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/gpu-donut-chart.js @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import DonutChart from 'yarn-ui/components/donut-chart'; +import ColorUtils from 'yarn-ui/utils/color-utils'; + +export default DonutChart.extend({ + draw: function() { + // Construct data + var data = []; + if (this.get("gpu-render-type") === "gpu-memory") { + data.push({ + label: "Used", + value: parseFloat(this.get("gpuInfo").gpuMemoryUsage.usedMemoryMiB), + }); + data.push({ + label: "Available", + value: parseFloat(this.get("gpuInfo").gpuMemoryUsage.availMemoryMiB) + }); + } else if (this.get("gpu-render-type") === "gpu-utilization") { + var utilization = parseFloat(this.get("gpuInfo").gpuUtilizations.overallGpuUtilization); + data.push({ + label: "Utilized", + value: utilization, + }); + data.push({ + label: "Available", + value: 100 - utilization + }); + } + + var colorTargets = this.get("colorTargets"); + if (colorTargets) { + var colorTargetReverse = Boolean(this.get("colorTargetReverse")); + var targets = colorTargets.split(" "); + this.colors = ColorUtils.getColors(data.length, targets, colorTargetReverse); + } + + this.renderDonutChart(data, this.get("title"), this.get("showLabels"), + this.get("middleLabel"), this.get("middleValue"), this.get("suffix")); + }, + + didInsertElement: function() { + // ParentId includes minorNumber + var newParentId = this.get("parentId") + this.get("gpuInfo").minorNumber; + this.set("parentId", newParentId); + + this.initChart(); + this.draw(); + }, +}); \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/constants.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/constants.js index d2937a0441f..29ad4bc2d93 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/constants.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/constants.js @@ -22,3 +22,16 @@ export default { PARAM_SEPARATOR: '!', }; + +const BASE_UNIT = 1024 + +export const Type = 'type'; +export const Memory = 'memory'; +export const Resource = 'resource'; +export const Unit = 'unit'; +export const Entities = { + Type: 'type', + Memory:'memory', + Resource: 'resource', + Unit: 'unit' +} \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/controllers/yarn-nodes/table.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/controllers/yarn-nodes/table.js index 3fae5961f87..f4bd5788433 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/controllers/yarn-nodes/table.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/controllers/yarn-nodes/table.js @@ -60,7 +60,7 @@ export default Ember.Controller.extend({ getCellContent: function(row) { var node_id = row.get("id"), node_addr = row.get("nodeHTTPAddress"), - href = `#/yarn-node/${node_id}/${node_addr}`; + href = `#/yarn-node/${node_id}/${node_addr}/info`; switch(row.get("nodeState")) { case "SHUTDOWN": case "LOST": diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/cluster-metric.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/cluster-metric.js index dcc0c2997d8..d9a5eefd769 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/cluster-metric.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/models/cluster-metric.js @@ -43,6 +43,8 @@ export default DS.Model.extend({ decommissionedNodes: DS.attr('number'), rebootedNodes: DS.attr('number'), activeNodes: DS.attr('number'), + totalUsedResourcesAcrossPartition: DS.attr('object'), + totalClusterResourcesAcrossPartition: DS.attr('object'), getFinishedAppsDataForDonutChart: function() { var arr = []; @@ -135,4 +137,71 @@ export default DS.Model.extend({ return arr; }.property("allocatedVirtualCores", "reservedVirtualCores", "availableVirtualCores"), + + getResourceTypes: function() { + var types = []; + if (this.get("totalClusterResourcesAcrossPartition")) { + + console.log(types); + } + }.property("totalClusterResourcesAcrossPartition"), + + /* + * Returned format + * [ + * { + * name: + * unit: + * [ + * { + * label:

Rack: " + this.get("rack") + '

' + "

Host: " + this.get("nodeHostName") + '

'; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/router.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/router.js index 901314289f2..1a01b863756 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/router.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/router.js @@ -37,7 +37,10 @@ Router.map(function() { this.route('apps'); }); this.route('yarn-nodes-heatmap'); - this.route('yarn-node', { path: '/yarn-node/:node_id/:node_addr' }); + this.route('yarn-node', { path: '/yarn-node/:node_id/:node_addr' }, function() { + this.route("info"); + this.route("yarn-nm-gpu"); + }); this.route('yarn-node-apps', { path: '/yarn-node-apps/:node_id/:node_addr' }); this.route('yarn-node-app', { path: '/yarn-node-app/:node_id/:node_addr/:app_id' }); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/cluster-overview.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/cluster-overview.js index d03ea0daa2d..254ece43479 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/cluster-overview.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/cluster-overview.js @@ -31,7 +31,7 @@ export default AbstractRoute.extend({ queues: this.store.query("yarn-queue.yarn-queue", {}).then((model) => { let type = model.get('firstObject').get('type'); return this.store.query("yarn-queue." + type + "-queue", {}); - }), + }) }); }, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node.js index 3d548460d4f..7ce615c83fe 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node.js @@ -25,6 +25,7 @@ export default AbstractRoute.extend({ // Fetches data from both NM and RM. RM is queried to get node usage info. return Ember.RSVP.hash({ nodeInfo: { id: param.node_id, addr: param.node_addr }, + nmGpuInfo: this.store.findRecord('yarn-nm-gpu', param.node_addr, {reload:true}), node: this.store.findRecord('yarn-node', param.node_addr, {reload: true}), rmNode: this.store.findRecord('yarn-rm-node', param.node_id, {reload: true}) }); @@ -33,5 +34,6 @@ export default AbstractRoute.extend({ unloadAll() { this.store.unloadAll('yarn-node'); this.store.unloadAll('yarn-rm-node'); + this.store.unloadAll('yarn-nm-gpu'); } }); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node/yarn-nm-gpu.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node/yarn-nm-gpu.js new file mode 100644 index 00000000000..38ae5d15f4c --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/routes/yarn-node/yarn-nm-gpu.js @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +export default Ember.Route.extend({ +}); \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-nm-gpu.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-nm-gpu.js new file mode 100644 index 00000000000..3567c683013 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-nm-gpu.js @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import DS from 'ember-data'; + +export default DS.JSONAPISerializer.extend({ + internalNormalizeSingleResponse(store, primaryModelClass, payload, id) { + if (payload.nodeInfo) { + payload = payload.nodeInfo; + } + + var fixedPayload = { + id: id, + type: primaryModelClass.modelName, + attributes: { + info: payload + } + }; + return fixedPayload; + }, + + normalizeSingleResponse(store, primaryModelClass, payload, id/*, requestType*/) { + // payload is of the form {"nodeInfo":{}} + var p = this.internalNormalizeSingleResponse(store, + primaryModelClass, payload, id); + return { data: p }; + }, +}); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-queue/capacity-queue.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-queue/capacity-queue.js index c7350ef03bc..7626598e092 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-queue/capacity-queue.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-queue/capacity-queue.js @@ -72,6 +72,7 @@ export default DS.JSONAPISerializer.extend({ preemptionDisabled: payload.preemptionDisabled, numPendingApplications: payload.numPendingApplications, numActiveApplications: payload.numActiveApplications, + resources: payload.resources, type: "capacity", }, // Relationships diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-rm-node.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-rm-node.js index 1c6d1be859a..a3a1d59168f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-rm-node.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/serializers/yarn-rm-node.js @@ -41,7 +41,9 @@ export default DS.JSONAPISerializer.extend({ usedVirtualCores: payload.usedVirtualCores, availableVirtualCores: payload.availableVirtualCores, version: payload.version, - nodeLabels: payload.nodeLabels + nodeLabels: payload.nodeLabels, + usedResource: payload.used, + availableResource: payload.avail } }; return fixedPayload; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/cluster-overview.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/cluster-overview.hbs index e549ce568a2..ff4682a5b88 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/cluster-overview.hbs +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/cluster-overview.hbs @@ -90,41 +90,71 @@
- -
-
-
- Resource - Memory + + {{#if model.clusterMetrics.firstObject.getAllResourceTypesDonutChart}} + {{#each + model.clusterMetrics.firstObject.getAllResourceTypesDonutChart as |perTypeUsage|}} +
+
+
+ {{perTypeUsage.name}} - Usages +
+
+ {{donut-chart + data=perTypeUsage.data + showLabels=true + parentIdPrefix="resource-type-" + id=perTypeUsage.id + ratio=0.6 + unit=perTypeUsage.unit + type="resource" + maxHeight=350 + colorTargets="good" + colorTargetReverse=true}} +
+
-
- {{donut-chart data=model.clusterMetrics.firstObject.getMemoryDataForDonutChart - showLabels=true - parentId="mem-donut-chart" - ratio=0.6 - maxHeight=350 - colorTargets="good" - colorTargetReverse=true - type="memory"}} + {{/each}} + {{else}} +
+
+
+ Resource - Memory +
+
+ {{donut-chart + data=model.clusterMetrics.firstObject.getMemoryDataForDonutChart + showLabels=true + parentId="mem-donut-chart" + ratio=0.6 + maxHeight=350 + colorTargets="good" + colorTargetReverse=true + type="memory"}} +
-
-
-
-
- Resource - VCores -
-
- {{donut-chart data=model.clusterMetrics.firstObject.getVCoreDataForDonutChart - showLabels=true - parentId="vcore-donut-chart" - ratio=0.6 - maxHeight=350 - colorTargets="good" - colorTargetReverse=true}} +
+
+
+ Resource - VCores +
+
+ {{donut-chart + data=model.clusterMetrics.firstObject.getVCoreDataForDonutChart + showLabels=true + parentId="vcore-donut-chart" + ratio=0.6 + maxHeight=350 + colorTargets="good" + colorTargetReverse=true}} +
-
+ {{/if}} +
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/node-menu-panel.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/node-menu-panel.hbs index d2486c9ff6f..966e408d2cf 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/node-menu-panel.hbs +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/node-menu-panel.hbs @@ -24,8 +24,8 @@
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/yarn-nm-gpu-info.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/yarn-nm-gpu-info.hbs new file mode 100644 index 00000000000..4118b1e7c81 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/components/yarn-nm-gpu-info.hbs @@ -0,0 +1,69 @@ +{{! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +}} + +
+
Gpu Information - (Minor + Number {{gpu.minorNumber}}) +
+ + + + + + + + + + + + + + + + + + + +
Product Name{{gpu.productName}}
UUID{{gpu.uuid}}
Current Temperature{{gpu.temperature.currentGpuTemp}}
Max Temperature{{gpu.temperature.maxGpuTemp}}
+ +
+ {{gpu-donut-chart gpuInfo=gpu + showLabels=true + parentId="mem-donut-chart" + middleLabel = "Gpu Memory" + ratio=0.6 + type="memory" + gpu-render-type = "gpu-memory" + colorTargets="good" + colorTargetReverse=true + maxHeight=350}} +
+ +
+ {{gpu-donut-chart gpuInfo=gpu + showLabels=true + parentId="utilization-donut-chart" + middleLabel = "Gpu Utilization" + ratio=0.6 + gpu-render-type = "gpu-utilization" + colorTargets="good" + colorTargetReverse=true + suffix="%" + maxHeight=350}} +
+
\ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-apps.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-apps.hbs index 52f0c86c8e8..919e54df545 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-apps.hbs +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-apps.hbs @@ -20,7 +20,7 @@
- {{node-menu-panel path="yarn-node-apps" nodeAddr=model.nodeInfo.addr nodeId=model.nodeInfo.id}} + {{node-menu-panel path="yarn-node-apps" nodeAddr=model.nodeInfo.addr nodeId=model.nodeInfo.id nmGpuInfo=model.nmGpuInfo}} {{#if model.apps}}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-containers.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-containers.hbs index f520c46b9b4..1f312722072 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-containers.hbs +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node-containers.hbs @@ -20,7 +20,7 @@
- {{node-menu-panel path="yarn-node-containers" nodeAddr=model.nodeInfo.addr nodeId=model.nodeInfo.id}} + {{node-menu-panel path="yarn-node-containers" nodeAddr=model.nodeInfo.addr nodeId=model.nodeInfo.id nmGpuInfo=model.nmGpuInfo}} {{#if model.containers}}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node.hbs deleted file mode 100644 index 1e8549bd87f..00000000000 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node.hbs +++ /dev/null @@ -1,125 +0,0 @@ -{{!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---}} - -{{breadcrumb-bar breadcrumbs=breadcrumbs}} - -
-
- - {{node-menu-panel path="yarn-node" nodeId=model.rmNode.id nodeAddr=model.node.id}} - -
- -
-
-
-
Node Information: {{model.rmNode.id}}
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {{#if model.node.nmStartupTime}} - - - - - {{/if}} - - - - - - - - - -
Total Vmem allocated for Containers{{divide num=model.node.totalVmemAllocatedContainersMB den=1024}} GB
Vmem enforcement enabled{{model.node.vmemCheckEnabled}}
Total Pmem allocated for Containers{{divide num=model.node.totalPmemAllocatedContainersMB den=1024}} GB
Pmem enforcement enabled{{model.node.pmemCheckEnabled}}
Total VCores allocated for Containers{{model.node.totalVCoresAllocatedContainers}}
Node Healthy Status{{model.node.nodeHealthy}}
Last Node Health Report Time{{model.node.lastNodeUpdateTime}}
Node Health Report{{model.node.healthReport}}
Node Manager Start Time{{model.node.nmStartupTime}}
Node Manager Version{{model.node.nodeManagerBuildVersion}}
Hadoop Version{{model.node.hadoopBuildVersion}}
-
-
-
- -
-
-
-
- Resource - Memory -
-
- {{donut-chart data=model.rmNode.getMemoryDataForDonutChart - showLabels=true - parentId="mem-donut-chart" - ratio=0.6 - type="memory" - colorTargets="good" - colorTargetReverse=true - maxHeight=350}} -
-
-
- -
-
-
- Resource - VCores -
-
- {{donut-chart data=model.rmNode.getVCoreDataForDonutChart - showLabels=true - parentId="vcore-donut-chart" - ratio=0.6 - colorTargets="good" - colorTargetReverse=true - maxHeight=350}} -
-
-
-
-
-
-
-{{outlet}} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs new file mode 100644 index 00000000000..ad411c096a1 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/info.hbs @@ -0,0 +1,154 @@ +{{!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--}} + +{{breadcrumb-bar breadcrumbs=breadcrumbs}} + +
+
+ + {{node-menu-panel path="yarn-node" nodeId=model.rmNode.id + nodeAddr=model.node.id nmGpuInfo=model.nmGpuInfo}} + +
+ +
+
+
+
Node + Information: {{model.rmNode.id}}
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {{#if model.node.nmStartupTime}} + + + + + {{/if}} + + + + + + + + + +
Total Vmem allocated for Containers{{divide num=model.node.totalVmemAllocatedContainersMB + den=1024}} GB +
Vmem enforcement enabled{{model.node.vmemCheckEnabled}}
Total Pmem allocated for Containers{{divide num=model.node.totalPmemAllocatedContainersMB + den=1024}} GB +
Pmem enforcement enabled{{model.node.pmemCheckEnabled}}
Total VCores allocated for Containers{{model.node.totalVCoresAllocatedContainers}}
Node Healthy Status{{model.node.nodeHealthy}}
Last Node Health Report Time{{model.node.lastNodeUpdateTime}}
Node Health Report{{model.node.healthReport}}
Node Manager Start Time{{model.node.nmStartupTime}}
Node Manager Version{{model.node.nodeManagerBuildVersion}}
Hadoop Version{{model.node.hadoopBuildVersion}}
+
+
+
+ +
+
+
+
+ Resource - Memory +
+
+ {{donut-chart data=model.rmNode.getMemoryDataForDonutChart + showLabels=true + parentId="mem-donut-chart" + ratio=0.6 + type="memory" + colorTargets="good" + colorTargetReverse=true + maxHeight=350}} +
+
+
+ +
+
+
+ Resource - VCores +
+
+ {{donut-chart data=model.rmNode.getVCoreDataForDonutChart + showLabels=true + parentId="vcore-donut-chart" + ratio=0.6 + colorTargets="good" + colorTargetReverse=true + maxHeight=350}} +
+
+
+
+ + {{#if model.nmGpuInfo}} +
+
+
+
+
  • + Resources - yarn.io/gpu +
  • +
    +
    + {{donut-chart data=model.rmNode.getGpuDataForDonutChart + showLabels=true + parentId="gpu-donut-chart" + ratio=0.6 + colorTargets="good" + colorTargetReverse=true + maxHeight=350}} +
    +
    +
    +
    + {{/if}} +
    +
    +
    +{{outlet}} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/yarn-nm-gpu.hbs hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/yarn-nm-gpu.hbs new file mode 100644 index 00000000000..0464cc8db50 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/templates/yarn-node/yarn-nm-gpu.hbs @@ -0,0 +1,57 @@ +{{!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--}} + +{{breadcrumb-bar breadcrumbs=breadcrumbs}} + +
    +
    + + {{node-menu-panel path="yarn-node" nodeId=model.rmNode.id + nodeAddr=model.node.id nmGpuInfo=model.nmGpuInfo}} + {{#if model.nmGpuInfo.info.totalGpuDevices}} + +
    +
    +
    Gpu Information
    + + + + + + + + + + + + + + + +
    VendorNVIDIA
    Driver Version{{model.nmGpuInfo.info.gpuDeviceInformation.driverVersion}}
    Total Number Of Gpus{{model.nmGpuInfo.info.totalGpuDevices.length}}
    +
    + + {{#each model.nmGpuInfo.info.gpuDeviceInformation.gpus as |gpu|}} + {{yarn-nm-gpu-info gpu=gpu}} + {{/each}} +
    + {{else}} +

    No GPUs are found on this node.

    + {{/if}} +
    +
    \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/utils/converter.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/utils/converter.js index 7c9a1f81781..e47edad8494 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/utils/converter.js +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/utils/converter.js @@ -130,6 +130,57 @@ export default { } return value.toFixed(1) + " " + unit; }, + resourceToSimplifiedUnit: function (value, unit) { + // First convert unit to base unit (""). + var normalizedValue = value; + if (unit === "Ki") { + normalizedValue = normalizedValue * 1024; + } else if (unit === "Mi") { + normalizedValue = normalizedValue * 1024 * 1024; + } else if (unit === "Gi") { + normalizedValue = normalizedValue * 1024 * 1024 * 1024; + } else if (unit === "Ti") { + normalizedValue = normalizedValue * 1024 * 1024 * 1024 * 1024; + } else if (unit === "Pi") { + normalizedValue = normalizedValue * 1024 * 1024 * 1024 * 1024 * 1024; + } else if (unit === "K" || unit === "k") { + normalizedValue = normalizedValue * 1000; + } else if (unit === "M" || unit === "m") { + normalizedValue = normalizedValue * 1000 * 1000; + } else if (unit === "G" || unit === "g") { + normalizedValue = normalizedValue * 1000 * 1000 * 1000; + } else if (unit === "T" || unit === "t") { + normalizedValue = normalizedValue * 1000 * 1000 * 1000 * 1000; + } else if (unit === "P" || unit === "p") { + normalizedValue = normalizedValue * 1000 * 1000 * 1000 * 1000 * 1000; + } + + // From baseunit ("") convert to most human readable unit + // (which value < 1024 * 0.9). + var finalUnit = ""; + if (normalizedValue / 1024 >= 0.9) { + normalizedValue = normalizedValue / 1024; + finalUnit = "Ki"; + } + if (normalizedValue / 1024 >= 0.9) { + normalizedValue = normalizedValue / 1024; + finalUnit = "Mi"; + } + if (normalizedValue / 1024 >= 0.9) { + normalizedValue = normalizedValue / 1024; + finalUnit = "Gi"; + } + if (normalizedValue / 1024 >= 0.9) { + normalizedValue = normalizedValue / 1024; + finalUnit = "Ti"; + } + if (normalizedValue / 1024 >= 0.9) { + normalizedValue = normalizedValue / 1024; + finalUnit = "Pi"; + } + + return normalizedValue.toFixed(1) + " " + finalUnit; + }, msToElapsedTimeUnit: function(millisecs, short) { var seconds = Math.floor(millisecs / 1000); var days = Math.floor(seconds / (3600 * 24));