diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index a603a80..7dd91ce 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -543,6 +542,8 @@ public abstract class TaskAttemptImpl implements getMemoryRequired(conf, taskId.getTaskType())); this.resourceCapability.setVirtualCores( getCpuRequired(conf, taskId.getTaskType())); + this.resourceCapability.setGpuCores( + getGpuRequired(conf, taskId.getTaskType())); this.dataLocalHosts = resolveHosts(dataLocalHosts); RackResolver.init(conf); @@ -589,6 +590,21 @@ public abstract class TaskAttemptImpl implements return vcores; } + private int getGpuRequired(Configuration conf, TaskType taskType) { + int gcores = 0; + if (taskType == TaskType.MAP) { + gcores = + conf.getInt(MRJobConfig.MAP_GPU_CORES, + MRJobConfig.DEFAULT_MAP_GPU_CORES); + } else if (taskType == TaskType.REDUCE) { + gcores = + conf.getInt(MRJobConfig.REDUCE_GPU_CORES, + MRJobConfig.DEFAULT_REDUCE_GPU_CORES); + } + + return gcores; + } + /** * Create a {@link LocalResource} record with all the given parameters. */ @@ -1284,6 +1300,7 @@ public abstract class TaskAttemptImpl implements int mbRequired = taskAttempt.getMemoryRequired(taskAttempt.conf, taskType); int vcoresRequired = taskAttempt.getCpuRequired(taskAttempt.conf, taskType); + int gcoresRequired = taskAttempt.getGpuRequired(taskAttempt.conf, taskType); int minSlotMemSize = taskAttempt.conf.getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, @@ -1297,11 +1314,13 @@ public abstract class TaskAttemptImpl implements jce.addCounterUpdate(JobCounter.SLOTS_MILLIS_MAPS, simSlotsRequired * duration); jce.addCounterUpdate(JobCounter.MB_MILLIS_MAPS, duration * mbRequired); jce.addCounterUpdate(JobCounter.VCORES_MILLIS_MAPS, duration * vcoresRequired); + jce.addCounterUpdate(JobCounter.GCORES_MILLIS_MAPS, duration * gcoresRequired); jce.addCounterUpdate(JobCounter.MILLIS_MAPS, duration); } else { jce.addCounterUpdate(JobCounter.SLOTS_MILLIS_REDUCES, simSlotsRequired * duration); jce.addCounterUpdate(JobCounter.MB_MILLIS_REDUCES, duration * mbRequired); jce.addCounterUpdate(JobCounter.VCORES_MILLIS_REDUCES, duration * vcoresRequired); + jce.addCounterUpdate(JobCounter.GCORES_MILLIS_REDUCES, duration * gcoresRequired); jce.addCounterUpdate(JobCounter.MILLIS_REDUCES, duration); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index dc5198b..23d5cb9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -351,7 +350,8 @@ public class RMContainerAllocator extends RMContainerRequestor if (mapResourceRequest.getMemory() > supportedMaxContainerCapability .getMemory() || mapResourceRequest.getVirtualCores() > supportedMaxContainerCapability - .getVirtualCores()) { + .getVirtualCores() + || mapResourceRequest.getGpuCores() > supportedMaxContainerCapability.getGpuCores()) { String diagMsg = "MAP capability required is more than the supported " + "max container capability in the cluster. Killing the Job. mapResourceRequest: " @@ -366,6 +366,7 @@ public class RMContainerAllocator extends RMContainerRequestor reqEvent.getCapability().setMemory(mapResourceRequest.getMemory()); reqEvent.getCapability().setVirtualCores( mapResourceRequest.getVirtualCores()); + reqEvent.getCapability().setGpuCores(mapResourceRequest.getGpuCores()); scheduledRequests.addMap(reqEvent);//maps are immediately scheduled } else { if (reduceResourceRequest.equals(Resources.none())) { @@ -378,7 +379,8 @@ public class RMContainerAllocator extends RMContainerRequestor if (reduceResourceRequest.getMemory() > supportedMaxContainerCapability .getMemory() || reduceResourceRequest.getVirtualCores() > supportedMaxContainerCapability - .getVirtualCores()) { + .getVirtualCores() + || reduceResourceRequest.getGpuCores() > supportedMaxContainerCapability.getGpuCores()) { String diagMsg = "REDUCE capability required is more than the " + "supported max container capability in the cluster. Killing the " @@ -394,6 +396,7 @@ public class RMContainerAllocator extends RMContainerRequestor reqEvent.getCapability().setMemory(reduceResourceRequest.getMemory()); reqEvent.getCapability().setVirtualCores( reduceResourceRequest.getVirtualCores()); + reqEvent.getCapability().setGpuCores(reduceResourceRequest.getGpuCores()); if (reqEvent.getEarlierAttemptFailed()) { //add to the front of queue for fail fast pendingReduces.addFirst(new ContainerRequest(reqEvent, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java index b9bc8b5..781fbcb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java @@ -35,8 +35,15 @@ public class ResourceCalculatorUtils { public static int computeAvailableContainers(Resource available, Resource required, EnumSet resourceTypes) { if (resourceTypes.contains(SchedulerResourceTypes.CPU)) { - return Math.min(available.getMemory() / required.getMemory(), - available.getVirtualCores() / required.getVirtualCores()); + if (required.getGpuCores() == 0) { + return Math.min(available.getMemory() / required.getMemory(), + available.getVirtualCores() / required.getVirtualCores()); + } + else { + return Math.min(available.getMemory() / required.getMemory(), + Math.min(available.getVirtualCores() / required.getVirtualCores(), + available.getGpuCores() / required.getGpuCores())); + } } return available.getMemory() / required.getMemory(); } @@ -44,8 +51,15 @@ public class ResourceCalculatorUtils { public static int divideAndCeilContainers(Resource required, Resource factor, EnumSet resourceTypes) { if (resourceTypes.contains(SchedulerResourceTypes.CPU)) { - return Math.max(divideAndCeil(required.getMemory(), factor.getMemory()), - divideAndCeil(required.getVirtualCores(), factor.getVirtualCores())); + if (factor.getGpuCores() == 0) { + return Math.max(divideAndCeil(required.getMemory(), factor.getMemory()), + divideAndCeil(required.getVirtualCores(), factor.getVirtualCores())); + } + else { + return Math.max(divideAndCeil(required.getMemory(), factor.getMemory()), + Math.max(divideAndCeil(required.getVirtualCores(), factor.getVirtualCores()), + divideAndCeil(required.getGpuCores(), factor.getGpuCores()))); + } } return divideAndCeil(required.getMemory(), factor.getMemory()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java index 3100d12..26c8a48 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java @@ -269,7 +269,7 @@ public class MRApp extends MRAppMaster { this.clusterInfo.getMaxContainerCapability()); } else { getContext().getClusterInfo().setMaxContainerCapability( - Resource.newInstance(10240, 1)); + Resource.newInstance(10240, 1, 1)); } } @@ -568,7 +568,7 @@ public class MRApp extends MRAppMaster { ContainerId.newContainerId(getContext().getApplicationAttemptId(), containerCount++); NodeId nodeId = NodeId.newInstance(NM_HOST, NM_PORT); - Resource resource = Resource.newInstance(1234, 2); + Resource resource = Resource.newInstance(1234, 2, 2); ContainerTokenIdentifier containerTokenIdentifier = new ContainerTokenIdentifier(cId, nodeId.toString(), "user", resource, System.currentTimeMillis() + 10000, 42, 42, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java index 744ca10..c50cd84 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java @@ -205,7 +205,7 @@ public class MRAppBenchmark { RegisterApplicationMasterResponse response = Records.newRecord(RegisterApplicationMasterResponse.class); response.setMaximumResourceCapability(Resource.newInstance( - 10240, 1)); + 10240, 1, 0)); return response; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java index cae9663..9e07287 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java @@ -664,6 +664,7 @@ public class TestJobImpl { conf.setInt(MRJobConfig.NUM_REDUCES, 0); conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 2048); conf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 10); + conf.setInt(MRJobConfig.REDUCE_GPU_CORES, 10); isUber = testUberDecision(conf); Assert.assertTrue(isUber); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java index 1807c1c..f58c3ad 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java @@ -202,7 +202,7 @@ public class TestTaskAttempt{ conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, reduceMemMb); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, minContainerSize); - app.setClusterInfo(new ClusterInfo(Resource.newInstance(10240, 1))); + app.setClusterInfo(new ClusterInfo(Resource.newInstance(10240, 1, 0))); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java index 184f1b2..c018f31 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java @@ -410,7 +406,7 @@ public class TestContainerLauncherImpl { return MRApp.newContainerToken(NodeId.newInstance("127.0.0.1", 1234), "password".getBytes(), new ContainerTokenIdentifier( contId, containerManagerAddr, "user", - Resource.newInstance(1024, 1), + Resource.newInstance(1024, 1, 1), currentTime + 10000L, 123, currentTime, Priority.newInstance(0), 0)); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java index f901ed8..fcfdb02 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java @@ -211,7 +211,7 @@ public class TestLocalContainerAllocator { when(ctx.getApplicationAttemptId()).thenReturn(attemptId); when(ctx.getJob(isA(JobId.class))).thenReturn(job); when(ctx.getClusterInfo()).thenReturn( - new ClusterInfo(Resource.newInstance(10240, 1))); + new ClusterInfo(Resource.newInstance(10240, 1, 0))); when(ctx.getEventHandler()).thenReturn(eventHandler); return ctx; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java index 4644a86..85f3e0a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java @@ -2719,7 +2719,7 @@ public class TestRMContainerAllocator { request.getResponseId(), containersToComplete, containersToAllocate, Collections.emptyList(), - Resource.newInstance(512000, 1024), null, 10, null, + Resource.newInstance(512000, 1024, 1024), null, 10, null, Collections.emptyList()); containersToComplete.clear(); containersToAllocate.clear(); @@ -2733,7 +2733,7 @@ public class TestRMContainerAllocator { : RMContainerAllocator.PRIORITY_MAP; Container container = Container.newInstance(containerId, NodeId.newInstance(nodeName, 1234), nodeName + ":5678", - Resource.newInstance(1024, 1), priority, null); + Resource.newInstance(1024, 1, 1), priority, null); containersToAllocate.add(container); return containerId; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr index c7b3eb8..8d14542 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr @@ -155,6 +155,7 @@ {"name": "counters", "type": "JhCounters"}, {"name": "clockSplits", "type": { "type": "array", "items": "int"}}, {"name": "cpuUsages", "type": { "type": "array", "items": "int"}}, + {"name": "gpuUsages", "type": { "type": "array", "items": "int"}}, {"name": "vMemKbytes", "type": { "type": "array", "items": "int"}}, {"name": "physMemKbytes", "type": { "type": "array", "items": "int"}} ] @@ -176,6 +177,7 @@ {"name": "counters", "type": "JhCounters"}, {"name": "clockSplits", "type": { "type": "array", "items": "int"}}, {"name": "cpuUsages", "type": { "type": "array", "items": "int"}}, + {"name": "gpuUsages", "type": { "type": "array", "items": "int"}}, {"name": "vMemKbytes", "type": { "type": "array", "items": "int"}}, {"name": "physMemKbytes", "type": { "type": "array", "items": "int"}} ] @@ -224,6 +226,7 @@ {"name": "counters", "type": ["null","JhCounters"], "default": null}, {"name": "clockSplits", "type": { "type": "array", "items": "int"}}, {"name": "cpuUsages", "type": { "type": "array", "items": "int"}}, + {"name": "gpuUsages", "type": { "type": "array", "items": "int"}}, {"name": "vMemKbytes", "type": { "type": "array", "items": "int"}}, {"name": "physMemKbytes", "type": { "type": "array", "items": "int"}} ] diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ProgressSplitsBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ProgressSplitsBlock.java index ccc016a..e141697 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ProgressSplitsBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ProgressSplitsBlock.java @@ -31,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; public class ProgressSplitsBlock { final PeriodicStatsAccumulator progressWallclockTime; final PeriodicStatsAccumulator progressCPUTime; + final PeriodicStatsAccumulator progressGPUTime; final PeriodicStatsAccumulator progressVirtualMemoryKbytes; final PeriodicStatsAccumulator progressPhysicalMemoryKbytes; @@ -38,6 +39,7 @@ public class ProgressSplitsBlock { static final int WALLCLOCK_TIME_INDEX = 0; static final int CPU_TIME_INDEX = 1; + static final int GPU_TIME_INDEX = 4; static final int VIRTUAL_MEMORY_KBYTES_INDEX = 2; static final int PHYSICAL_MEMORY_KBYTES_INDEX = 3; @@ -48,6 +50,8 @@ public class ProgressSplitsBlock { = new CumulativePeriodicStats(numberSplits); progressCPUTime = new CumulativePeriodicStats(numberSplits); + progressGPUTime + = new CumulativePeriodicStats(numberSplits); progressVirtualMemoryKbytes = new StatePeriodicStats(numberSplits); progressPhysicalMemoryKbytes @@ -56,10 +60,11 @@ public class ProgressSplitsBlock { // this coordinates with LoggedTaskAttempt.SplitVectorKind int[][] burst() { - int[][] result = new int[4][]; + int[][] result = new int[5][]; result[WALLCLOCK_TIME_INDEX] = progressWallclockTime.getValues(); result[CPU_TIME_INDEX] = progressCPUTime.getValues(); + result[GPU_TIME_INDEX] = progressGPUTime.getValues(); result[VIRTUAL_MEMORY_KBYTES_INDEX] = progressVirtualMemoryKbytes.getValues(); result[PHYSICAL_MEMORY_KBYTES_INDEX] = progressPhysicalMemoryKbytes.getValues(); @@ -78,6 +83,10 @@ public class ProgressSplitsBlock { return arrayGet(burstedBlock, CPU_TIME_INDEX); } + static public int[] arrayGetGPUTime(int[][] burstedBlock) { + return arrayGet(burstedBlock, GPU_TIME_INDEX); + } + static public int[] arrayGetVMemKbytes(int[][] burstedBlock) { return arrayGet(burstedBlock, VIRTUAL_MEMORY_KBYTES_INDEX); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java index fea4535..04142aa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java @@ -51,5 +51,7 @@ public enum JobCounter { VCORES_MILLIS_MAPS, VCORES_MILLIS_REDUCES, MB_MILLIS_MAPS, - MB_MILLIS_REDUCES + MB_MILLIS_REDUCES, + GCORES_MILLIS_MAPS, + GCORES_MILLIS_REDUCES } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 6e079c1..3310046 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -263,6 +263,9 @@ public interface MRJobConfig { public static final String MAP_CPU_VCORES = "mapreduce.map.cpu.vcores"; public static final int DEFAULT_MAP_CPU_VCORES = 1; + public static final String MAP_GPU_CORES = "mapreduce.map.gpu.cores"; + public static final int DEFAULT_MAP_GPU_CORES = 0; + public static final String MAP_ENV = "mapreduce.map.env"; public static final String MAP_JAVA_OPTS = "mapreduce.map.java.opts"; @@ -311,6 +314,9 @@ public interface MRJobConfig { public static final String REDUCE_CPU_VCORES = "mapreduce.reduce.cpu.vcores"; public static final int DEFAULT_REDUCE_CPU_VCORES = 1; + public static final String REDUCE_GPU_CORES = "mapreduce.reduce.gpu.cores"; + public static final int DEFAULT_REDUCE_GPU_CORES = 0; + public static final String REDUCE_MEMORY_TOTAL_BYTES = "mapreduce.reduce.memory.totalbytes"; public static final String SHUFFLE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.shuffle.input.buffer.percent"; @@ -496,6 +502,11 @@ public interface MRJobConfig { MR_AM_PREFIX+"resource.cpu-vcores"; public static final int DEFAULT_MR_AM_CPU_VCORES = 1; + /** The number of virtual cores the MR app master needs.*/ + public static final String MR_AM_GPU_GCORES = + MR_AM_PREFIX+"resource.gpu-cores"; + public static final int DEFAULT_MR_AM_GPU_GCORES = 0; + /** Command line arguments passed to the MR app master.*/ public static final String MR_AM_COMMAND_OPTS = MR_AM_PREFIX+"command-opts"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/MapAttemptFinishedEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/MapAttemptFinishedEvent.java index 62df2aa..78429e1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/MapAttemptFinishedEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/MapAttemptFinishedEvent.java @@ -50,6 +50,7 @@ public class MapAttemptFinishedEvent implements HistoryEvent { int[][] allSplits; int[] clockSplits; int[] cpuUsages; + int[] gpuUsages; int[] vMemKbytes; int[] physMemKbytes; @@ -90,6 +91,7 @@ public class MapAttemptFinishedEvent implements HistoryEvent { this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); + this.gpuUsages = ProgressSplitsBlock.arrayGetGPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); } @@ -143,6 +145,8 @@ public class MapAttemptFinishedEvent implements HistoryEvent { .arrayGetWallclockTime(allSplits)); datum.cpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock .arrayGetCPUTime(allSplits)); + datum.gpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock + .arrayGetGPUTime(allSplits)); datum.vMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock .arrayGetVMemKbytes(allSplits)); datum.physMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock @@ -165,6 +169,7 @@ public class MapAttemptFinishedEvent implements HistoryEvent { this.counters = EventReader.fromAvro(datum.counters); this.clockSplits = AvroArrayUtils.fromAvro(datum.clockSplits); this.cpuUsages = AvroArrayUtils.fromAvro(datum.cpuUsages); + this.gpuUsages = AvroArrayUtils.fromAvro(datum.gpuUsages); this.vMemKbytes = AvroArrayUtils.fromAvro(datum.vMemKbytes); this.physMemKbytes = AvroArrayUtils.fromAvro(datum.physMemKbytes); } @@ -211,6 +216,9 @@ public class MapAttemptFinishedEvent implements HistoryEvent { public int[] getCpuUsages() { return cpuUsages; } + public int[] getGpuUsages() { + return gpuUsages; + } public int[] getVMemKbytes() { return vMemKbytes; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/ReduceAttemptFinishedEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/ReduceAttemptFinishedEvent.java index a779fca..e511c49 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/ReduceAttemptFinishedEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/ReduceAttemptFinishedEvent.java @@ -51,6 +51,7 @@ public class ReduceAttemptFinishedEvent implements HistoryEvent { int[][] allSplits; int[] clockSplits; int[] cpuUsages; + int[] gpuUsages; int[] vMemKbytes; int[] physMemKbytes; @@ -91,6 +92,7 @@ public class ReduceAttemptFinishedEvent implements HistoryEvent { this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); + this.gpuUsages = ProgressSplitsBlock.arrayGetGPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); } @@ -145,6 +147,8 @@ public class ReduceAttemptFinishedEvent implements HistoryEvent { .arrayGetWallclockTime(allSplits)); datum.cpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock .arrayGetCPUTime(allSplits)); + datum.gpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock + .arrayGetGPUTime(allSplits)); datum.vMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock .arrayGetVMemKbytes(allSplits)); datum.physMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock @@ -168,6 +172,7 @@ public class ReduceAttemptFinishedEvent implements HistoryEvent { this.counters = EventReader.fromAvro(datum.counters); this.clockSplits = AvroArrayUtils.fromAvro(datum.clockSplits); this.cpuUsages = AvroArrayUtils.fromAvro(datum.cpuUsages); + this.gpuUsages = AvroArrayUtils.fromAvro(datum.gpuUsages); this.vMemKbytes = AvroArrayUtils.fromAvro(datum.vMemKbytes); this.physMemKbytes = AvroArrayUtils.fromAvro(datum.physMemKbytes); } @@ -216,6 +221,9 @@ public class ReduceAttemptFinishedEvent implements HistoryEvent { public int[] getCpuUsages() { return cpuUsages; } + public int[] getGpuUsages() { + return gpuUsages; + } public int[] getVMemKbytes() { return vMemKbytes; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptUnsuccessfulCompletionEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptUnsuccessfulCompletionEvent.java index 9b5617c..f14b2f0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptUnsuccessfulCompletionEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptUnsuccessfulCompletionEvent.java @@ -52,6 +52,7 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent { int[][] allSplits; int[] clockSplits; int[] cpuUsages; + int[] gpuUsages; int[] vMemKbytes; int[] physMemKbytes; private static final Counters EMPTY_COUNTERS = new Counters(); @@ -91,6 +92,8 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent { ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); + this.gpuUsages = + ProgressSplitsBlock.arrayGetGPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = @@ -151,6 +154,8 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent { .arrayGetWallclockTime(allSplits)); datum.cpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock .arrayGetCPUTime(allSplits)); + datum.gpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock + .arrayGetGPUTime(allSplits)); datum.vMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock .arrayGetVMemKbytes(allSplits)); datum.physMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock @@ -180,6 +185,8 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent { AvroArrayUtils.fromAvro(datum.clockSplits); this.cpuUsages = AvroArrayUtils.fromAvro(datum.cpuUsages); + this.gpuUsages = + AvroArrayUtils.fromAvro(datum.gpuUsages); this.vMemKbytes = AvroArrayUtils.fromAvro(datum.vMemKbytes); this.physMemKbytes = @@ -241,6 +248,9 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent { public int[] getCpuUsages() { return cpuUsages; } + public int[] getGpuUsages() { + return gpuUsages; + } public int[] getVMemKbytes() { return vMemKbytes; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 38a7317..9d1926a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -465,6 +465,14 @@ + mapreduce.map.gpu.cores + 0 + The number of gpu cores to request from the scheduler for + each map task. + + + + mapreduce.reduce.memory.mb 1024 The amount of memory to request from the scheduler for each @@ -481,6 +489,14 @@ + mapreduce.reduce.gpu.cores + 0 + The number of gpu cores to request from the scheduler for + each reduce task. + + + + mapreduce.jobtracker.retiredjobs.cache.size 1000 The number of retired job status to keep in the cache. @@ -1872,6 +1888,14 @@ + yarn.app.mapreduce.am.resource.gpu-cores + 0 + + The number of GPU cores the MR AppMaster needs. + + + + yarn.app.mapreduce.am.hard-kill-timeout-ms 10000 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties index 7a493a8..ab3f0bf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties @@ -31,5 +31,7 @@ MB_MILLIS_MAPS.name= Total megabyte-seconds taken by all map tasks MB_MILLIS_REDUCES.name= Total megabyte-seconds taken by all reduce tasks VCORES_MILLIS_MAPS.name= Total vcore-seconds taken by all map tasks VCORES_MILLIS_REDUCES.name= Total vcore-seconds taken by all reduce tasks +GCORES_MILLIS_MAPS.name= Total gcore-seconds taken by all map tasks +GCORES_MILLIS_REDUCES.name= Total gcore-seconds taken by all reduce tasks FALLOW_SLOTS_MILLIS_MAPS.name= Total time spent by all maps waiting after reserving slots (ms) FALLOW_SLOTS_MILLIS_REDUCES.name= Total time spent by all reduces waiting after reserving slots (ms) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index 2bb2483..41a14cd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -347,6 +346,11 @@ public class YARNRunner implements ClientProtocol { MRJobConfig.MR_AM_CPU_VCORES, MRJobConfig.DEFAULT_MR_AM_CPU_VCORES ) ); + capability.setGpuCores( + conf.getInt( + MRJobConfig.MR_AM_GPU_GCORES, MRJobConfig.DEFAULT_MR_AM_GPU_GCORES + ) + ); LOG.debug("AppMaster capability = " + capability); // Setup LocalResources diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java index c5ae2fc..1a89de1 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java @@ -505,6 +505,7 @@ public class JobBuilder { counters == null ? EMPTY_COUNTERS : counters); attempt.arraySetClockSplits(event.getClockSplits()); attempt.arraySetCpuUsages(event.getCpuUsages()); + attempt.arraySetGpuUsages(event.getGpuUsages()); attempt.arraySetVMemKbytes(event.getVMemKbytes()); attempt.arraySetPhysMemKbytes(event.getPhysMemKbytes()); TaskAttemptUnsuccessfulCompletion t = @@ -568,6 +569,7 @@ public class JobBuilder { .incorporateCounters(((ReduceAttemptFinished) event.getDatum()).counters); attempt.arraySetClockSplits(event.getClockSplits()); attempt.arraySetCpuUsages(event.getCpuUsages()); + attempt.arraySetGpuUsages(event.getGpuUsages()); attempt.arraySetVMemKbytes(event.getVMemKbytes()); attempt.arraySetPhysMemKbytes(event.getPhysMemKbytes()); } @@ -596,6 +598,7 @@ public class JobBuilder { .incorporateCounters(((MapAttemptFinished) event.getDatum()).counters); attempt.arraySetClockSplits(event.getClockSplits()); attempt.arraySetCpuUsages(event.getCpuUsages()); + attempt.arraySetGpuUsages(event.getGpuUsages()); attempt.arraySetVMemKbytes(event.getVMemKbytes()); attempt.arraySetPhysMemKbytes(event.getPhysMemKbytes()); } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java index c21eb39..8187153 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java @@ -78,6 +78,7 @@ public class LoggedTaskAttempt implements DeepCompare { List clockSplits = new ArrayList(); List cpuUsages = new ArrayList(); + List gpuUsages = new ArrayList(); List vMemKbytes = new ArrayList(); List physMemKbytes = new ArrayList(); @@ -121,6 +122,17 @@ public class LoggedTaskAttempt implements DeepCompare { } }, + GPU_USAGE { + @Override + public List get(LoggedTaskAttempt attempt) { + return attempt.getGpuUsages(); + } + @Override + public void set(LoggedTaskAttempt attempt, List newValue) { + attempt.setGpuUsages(newValue); + } + }, + VIRTUAL_MEMORY_KBYTES { @Override public List get(LoggedTaskAttempt attempt) { @@ -237,6 +249,24 @@ public class LoggedTaskAttempt implements DeepCompare { this.cpuUsages = result; } + public List getGpuUsages() { + return gpuUsages; + } + + void setGpuUsages(List gpuUsages) { + this.gpuUsages = gpuUsages; + } + + void arraySetGpuUsages(int[] gpuUsages) { + List result = new ArrayList(); + + for (int i = 0; i < gpuUsages.length; ++i) { + result.add(gpuUsages[i]); + } + + this.gpuUsages = result; + } + public List getVMemKbytes() { return vMemKbytes; } @@ -575,7 +605,15 @@ public class LoggedTaskAttempt implements DeepCompare { metrics.setCumulativeCpuUsage(val); } }, counters, "CPU_MILLISECONDS"); - + + // incorporate GPU usage + incorporateCounter(new SetField(this) { + @Override + void set(long val) { + metrics.setCumulativeGpuUsage(val); + } + }, counters, "GPU_MILLISECONDS"); + // incorporate virtual memory usage incorporateCounter(new SetField(this) { @Override @@ -766,6 +804,7 @@ public class LoggedTaskAttempt implements DeepCompare { compare1(clockSplits, other.clockSplits, loc, "clockSplits"); compare1(cpuUsages, other.cpuUsages, loc, "cpuUsages"); + compare1(gpuUsages, other.gpuUsages, loc, "gpuUsages"); compare1(vMemKbytes, other.vMemKbytes, loc, "vMemKbytes"); compare1(physMemKbytes, other.physMemKbytes, loc, "physMemKbytes"); } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ResourceUsageMetrics.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ResourceUsageMetrics.java index a0944c8..55a1b40 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ResourceUsageMetrics.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ResourceUsageMetrics.java @@ -29,6 +29,7 @@ */ public class ResourceUsageMetrics implements Writable, DeepCompare { private long cumulativeCpuUsage; + private long cumulativeGpuUsage; private long virtualMemoryUsage; private long physicalMemoryUsage; private long heapUsage; @@ -49,7 +50,21 @@ public class ResourceUsageMetrics implements Writable, DeepCompare { public void setCumulativeCpuUsage(long usage) { cumulativeCpuUsage = usage; } - + + /** + * Get the cumulative GPU usage. + */ + public long getCumulativeGpuUsage() { + return cumulativeGpuUsage; + } + + /** + * Set the cumulative GPU usage. + */ + public void setCumulativeGpuUsage(long usage) { + cumulativeGpuUsage = usage; + } + /** * Get the virtual memory usage. */ @@ -98,6 +113,7 @@ public class ResourceUsageMetrics implements Writable, DeepCompare { public int size() { int size = 0; size += WritableUtils.getVIntSize(cumulativeCpuUsage); // long #1 + size += WritableUtils.getVIntSize(cumulativeGpuUsage); size += WritableUtils.getVIntSize(virtualMemoryUsage); // long #2 size += WritableUtils.getVIntSize(physicalMemoryUsage); // long #3 size += WritableUtils.getVIntSize(heapUsage); // long #4 @@ -107,6 +123,7 @@ public class ResourceUsageMetrics implements Writable, DeepCompare { @Override public void readFields(DataInput in) throws IOException { cumulativeCpuUsage = WritableUtils.readVLong(in); // long #1 + cumulativeGpuUsage = WritableUtils.readVLong(in); virtualMemoryUsage = WritableUtils.readVLong(in); // long #2 physicalMemoryUsage = WritableUtils.readVLong(in); // long #3 heapUsage = WritableUtils.readVLong(in); // long #4 @@ -116,6 +133,7 @@ public class ResourceUsageMetrics implements Writable, DeepCompare { public void write(DataOutput out) throws IOException { //TODO Write resources version no too WritableUtils.writeVLong(out, cumulativeCpuUsage); // long #1 + WritableUtils.writeVLong(out, cumulativeGpuUsage); WritableUtils.writeVLong(out, virtualMemoryUsage); // long #2 WritableUtils.writeVLong(out, physicalMemoryUsage); // long #3 WritableUtils.writeVLong(out, heapUsage); // long #4 @@ -148,6 +166,8 @@ public class ResourceUsageMetrics implements Writable, DeepCompare { ResourceUsageMetrics metrics2 = (ResourceUsageMetrics) other; compareMetric(getCumulativeCpuUsage(), metrics2.getCumulativeCpuUsage(), new TreePath(loc, "cumulativeCpu")); + compareMetric(getCumulativeGpuUsage(), metrics2.getCumulativeGpuUsage(), + new TreePath(loc, "cumulativeGpu")); compareMetric(getVirtualMemoryUsage(), metrics2.getVirtualMemoryUsage(), new TreePath(loc, "virtualMemory")); compareMetric(getPhysicalMemoryUsage(), metrics2.getPhysicalMemoryUsage(), diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index e729363..f12a4fc 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -343,8 +342,8 @@ public class ResourceSchedulerWrapper List resourceRequests, List containerIds) throws IOException { // update queue information - Resource pendingResource = Resources.createResource(0, 0); - Resource allocatedResource = Resources.createResource(0, 0); + Resource pendingResource = Resources.createResource(0, 0, 0); + Resource allocatedResource = Resources.createResource(0, 0, 0); String queueName = appQueueMap.get(attemptId.getApplicationId()); // container requested for (ResourceRequest request : resourceRequests) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java index b20d832..d1564d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java @@ -36,7 +36,7 @@ public abstract class ApplicationResourceUsageReport { public static ApplicationResourceUsageReport newInstance( int numUsedContainers, int numReservedContainers, Resource usedResources, Resource reservedResources, Resource neededResources, long memorySeconds, - long vcoreSeconds) { + long vcoreSeconds, long gcoreSeconds) { ApplicationResourceUsageReport report = Records.newRecord(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -46,6 +46,7 @@ public abstract class ApplicationResourceUsageReport { report.setNeededResources(neededResources); report.setMemorySeconds(memorySeconds); report.setVcoreSeconds(vcoreSeconds); + report.setGcoreSeconds(gcoreSeconds); return report; } @@ -152,4 +153,12 @@ public abstract class ApplicationResourceUsageReport { @Public @Unstable public abstract long getVcoreSeconds(); + + @Private + @Unstable + public abstract void setGcoreSeconds(long gcore_seconds); + + @Public + @Unstable + public abstract long getGcoreSeconds(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java index 88b57f1..692ee79 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java @@ -54,9 +54,16 @@ public abstract class Resource implements Comparable { @Public @Stable public static Resource newInstance(int memory, int vCores) { + return newInstance(memory, vCores, 0); + } + + @Public + @Stable + public static Resource newInstance(int memory, int vCores, int gCores) { Resource resource = Records.newRecord(Resource.class); resource.setMemory(memory); resource.setVirtualCores(vCores); + resource.setGpuCores(gCores); return resource; } @@ -105,12 +112,21 @@ public abstract class Resource implements Comparable { @Evolving public abstract void setVirtualCores(int vCores); + @Public + @Evolving + public abstract int getGpuCores(); + + @Public + @Evolving + public abstract void setGpuCores(int gCores); + @Override public int hashCode() { final int prime = 263167; int result = 3571; result = 939769357 + getMemory(); // prime * result = 939769357 initially result = prime * result + getVirtualCores(); + result = prime * result + getGpuCores(); return result; } @@ -124,7 +140,8 @@ public abstract class Resource implements Comparable { return false; Resource other = (Resource) obj; if (getMemory() != other.getMemory() || - getVirtualCores() != other.getVirtualCores()) { + getVirtualCores() != other.getVirtualCores() || + getGpuCores() != other.getGpuCores()) { return false; } return true; @@ -132,6 +149,6 @@ public abstract class Resource implements Comparable { @Override public String toString() { - return ""; + return ""; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index da076eb..5362634 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -171,6 +171,9 @@ public class YarnConfiguration extends Configuration { public static final String RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES = YARN_PREFIX + "scheduler.minimum-allocation-vcores"; public static final int DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES = 1; + public static final String RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES = + YARN_PREFIX + "scheduler.minimum-allocation-gcores"; + public static final int DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES = 0; /** Maximum request grant-able by the RM scheduler. */ public static final String RM_SCHEDULER_MAXIMUM_ALLOCATION_MB = @@ -179,7 +182,9 @@ public class YarnConfiguration extends Configuration { public static final String RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES = YARN_PREFIX + "scheduler.maximum-allocation-vcores"; public static final int DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES = 4; - + public static final String RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES = + YARN_PREFIX + "scheduler.maximum-allocation-gcores"; + public static final int DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES = 8; /** Number of threads to handle scheduler interface.*/ public static final String RM_SCHEDULER_CLIENT_THREAD_COUNT = RM_PREFIX + "scheduler.client.thread-count"; @@ -828,12 +833,18 @@ public class YarnConfiguration extends Configuration { /** Number of Virtual CPU Cores which can be allocated for containers.*/ public static final String NM_VCORES = NM_PREFIX + "resource.cpu-vcores"; public static final int DEFAULT_NM_VCORES = 8; + public static final String NM_GCORES = NM_PREFIX + "resource.gcores"; + public static final int DEFAULT_NM_GCORES = 0; /** Percentage of overall CPU which can be allocated for containers. */ public static final String NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT = NM_PREFIX + "resource.percentage-physical-cpu-limit"; public static final int DEFAULT_NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT = 100; + public static final String NM_RESOURCE_PERCENTAGE_GPU_LIMIT = + NM_PREFIX + "resource.percentage-gpu-limit"; + public static final int DEFAULT_NM_RESOURCE_PERCENTAGE_GPU_LIMIT = 100; + /** NM Webapp address.**/ public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 2edff99..9c7916e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -56,6 +56,7 @@ message ContainerIdProto { message ResourceProto { optional int32 memory = 1; optional int32 virtual_cores = 2; + optional int32 gpu_cores = 3; } message ResourceOptionProto { @@ -171,6 +172,7 @@ message ApplicationResourceUsageReportProto { optional ResourceProto needed_resources = 5; optional int64 memory_seconds = 6; optional int64 vcore_seconds = 7; + optional int64 gcore_seconds = 8; } message ApplicationReportProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 33d1207..8a2acaf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -92,6 +92,7 @@ message AllocateResponseProto { enum SchedulerResourceTypes { MEMORY = 0; CPU = 1; + GPU = 2; } ////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index f5b3d0a..6927415 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -217,6 +216,8 @@ public class ApplicationMaster { private int containerMemory = 10; // VirtualCores to request for the container on which the shell command will run private int containerVirtualCores = 1; + // GpuCores to request for the container + private int containerGpuCores = 0; // Priority of the request private int requestPriority; @@ -358,6 +359,8 @@ public class ApplicationMaster { "Amount of memory in MB to be requested to run the shell command"); opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command"); + opts.addOption("container_gcores", true, + "Amount of gpu cores to ve requested"); opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); opts.addOption("priority", true, "Application Priority. Default 0"); @@ -490,6 +493,7 @@ public class ApplicationMaster { "container_memory", "10")); containerVirtualCores = Integer.parseInt(cliParser.getOptionValue( "container_vcores", "1")); + containerGpuCores = Integer.parseInt(cliParser.getOptionValue("container_gcores", "0")); numTotalContainers = Integer.parseInt(cliParser.getOptionValue( "num_containers", "1")); if (numTotalContainers == 0) { @@ -582,6 +586,9 @@ public class ApplicationMaster { int maxVCores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores); + int maxGCores = response.getMaximumResourceCapability().getGpuCores(); + LOG.info("Max gcores capability of resources in this cluster " + maxGCores); + // A resource ask cannot exceed the max. if (containerMemory > maxMem) { LOG.info("Container memory specified above max threshold of cluster." @@ -597,6 +604,13 @@ public class ApplicationMaster { containerVirtualCores = maxVCores; } + if (containerGpuCores > maxGCores) { + LOG.info("Container gpu cores specified above max threshold of cluster." + + " Using max value." + ", specified=" + containerGpuCores + ", max=" + + maxGCores); + containerGpuCores = maxGCores; + } + List previousAMRunningContainers = response.getContainersFromPreviousAttempts(); LOG.info(appAttemptID + " received " + previousAMRunningContainers.size() @@ -792,7 +806,9 @@ public class ApplicationMaster { + ", containerResourceMemory" + allocatedContainer.getResource().getMemory() + ", containerResourceVirtualCores" - + allocatedContainer.getResource().getVirtualCores()); + + allocatedContainer.getResource().getVirtualCores() + + ", containerResourceGpuCores" + + allocatedContainer.getResource().getGpuCores()); // + ", containerToken" // +allocatedContainer.getContainerToken().getIdentifier().toString()); @@ -1056,7 +1072,7 @@ public class ApplicationMaster { // Set up resource type requirements // For now, memory and CPU are supported so we set memory and cpu requirements Resource capability = Resource.newInstance(containerMemory, - containerVirtualCores); + containerVirtualCores, containerGpuCores); ContainerRequest request = new ContainerRequest(capability, null, null, pri); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index 0e9a4e4..d9f8193 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -129,6 +129,8 @@ public class Client { private int amMemory = 10; // Amt. of virtual core resource to request for to run the App Master private int amVCores = 1; + // Amt. of gpu core resource to request for to run the App Master + private int amGCores = 0; // Application master jar file private String appMasterJar = ""; @@ -150,6 +152,8 @@ public class Client { private int containerMemory = 10; // Amt. of virtual cores to request for container in which shell script will be executed private int containerVirtualCores = 1; + // Amt. of gpu cores to request for container in which shell script will be executed + private int containerGpuCores = 0; // No. of containers in which the shell script needs to be executed private int numContainers = 1; private String nodeLabelExpression = null; @@ -245,6 +249,7 @@ public class Client { opts.addOption("timeout", true, "Application timeout in milliseconds"); opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); opts.addOption("master_vcores", true, "Amount of virtual cores to be requested to run the application master"); + opts.addOption("master_gcores", true, "Amount of gpu cores to be requested to run the application master"); opts.addOption("jar", true, "Jar file containing the application master"); opts.addOption("shell_command", true, "Shell command to be executed by " + "the Application Master. Can only specify either --shell_command " + @@ -258,6 +263,7 @@ public class Client { opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command"); + opts.addOption("container_gcores", true, "Amount of gpu cores to be requested to run the shell command"); opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); opts.addOption("log_properties", true, "log4j.properties file"); opts.addOption("keep_containers_across_application_attempts", false, @@ -345,6 +351,7 @@ public class Client { amQueue = cliParser.getOptionValue("queue", "default"); amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); amVCores = Integer.parseInt(cliParser.getOptionValue("master_vcores", "1")); + amGCores = Integer.parseInt(cliParser.getOptionValue("master_gcores", "0")); if (amMemory < 0) { throw new IllegalArgumentException("Invalid memory specified for application master, exiting." @@ -354,7 +361,10 @@ public class Client { throw new IllegalArgumentException("Invalid virtual cores specified for application master, exiting." + " Specified virtual cores=" + amVCores); } - + if (amGCores < 0) { + throw new IllegalArgumentException("Invalid gpu cores specified for application master, exiting." + + " Specified gpu cores=" + amGCores); + } if (!cliParser.hasOption("jar")) { throw new IllegalArgumentException("No jar file specified for application master"); } @@ -396,14 +406,16 @@ public class Client { containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); containerVirtualCores = Integer.parseInt(cliParser.getOptionValue("container_vcores", "1")); + containerGpuCores = Integer.parseInt(cliParser.getOptionValue("container_gcores", "0")); numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); - if (containerMemory < 0 || containerVirtualCores < 0 || numContainers < 1) { + if (containerMemory < 0 || containerVirtualCores < 0 || containerGpuCores < 0 || numContainers < 1) { throw new IllegalArgumentException("Invalid no. of containers or container memory/vcores specified," + " exiting." + " Specified containerMemory=" + containerMemory + ", containerVirtualCores=" + containerVirtualCores + + ", containerGpuCores=" + containerGpuCores + ", numContainer=" + numContainers); } @@ -507,7 +519,17 @@ public class Client { + ", max=" + maxVCores); amVCores = maxVCores; } - + + int maxGCores = appResponse.getMaximumResourceCapability().getGpuCores(); + LOG.info("Max gpu cores capabililty of resources in this cluster " + maxGCores); + + if (amGCores > maxGCores) { + LOG.info("AM gpu cores specified above max threshold of cluster. " + + "Using max value." + ", specified=" + amGCores + + ", max=" + maxGCores); + amGCores = maxGCores; + } + // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); @@ -625,6 +647,7 @@ public class Client { // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); + vargs.add("--container_gcores " + String.valueOf(containerGpuCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); @@ -658,7 +681,7 @@ public class Client { // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements - Resource capability = Resource.newInstance(amMemory, amVCores); + Resource capability = Resource.newInstance(amMemory, amVCores, amGCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java index 5e6fa46..359147c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -160,10 +160,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; if (haveDomain) { String[] domainArgs = { @@ -470,10 +474,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; //Before run the DS, the default the log level is INFO @@ -514,10 +522,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; LOG.info("Initializing DS Client"); @@ -548,10 +560,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; LOG.info("Initializing DS Client"); @@ -596,10 +612,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; LOG.info("Initializing DS Client"); @@ -700,10 +720,14 @@ public class TestDistributedShell { "512", "--master_vcores", "-2", + "--master_gcores", + "-2", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; client.init(args); Assert.fail("Exception is expected"); @@ -751,10 +775,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", - "1" + "1", + "--container_gcores", + "0" }; client.init(args); Assert.fail("Exception is expected"); @@ -818,10 +846,14 @@ public class TestDistributedShell { "512", "--master_vcores", "2", + "--master_gcores", + "0", "--container_memory", "128", "--container_vcores", "1", + "--container_gcores", + "0", "--debug" }; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index b1324c1..4c9f8c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -113,18 +112,26 @@ public class AMRMClientImpl extends AMRMClient { /** - * Class compares Resource by memory then cpu in reverse order + * Class compares Resource by memory then cpu then gpu in reverse order */ - class ResourceReverseMemoryThenCpuComparator implements Comparator { + class ResourceReverseMemoryThenCpuThenGpuComparator implements Comparator { @Override public int compare(Resource arg0, Resource arg1) { int mem0 = arg0.getMemory(); int mem1 = arg1.getMemory(); int cpu0 = arg0.getVirtualCores(); int cpu1 = arg1.getVirtualCores(); + int gpu0 = arg0.getGpuCores(); + int gpu1 = arg1.getGpuCores(); if(mem0 == mem1) { if(cpu0 == cpu1) { - return 0; + if(gpu0 == gpu1) { + return 0; + } + if(gpu0 < gpu1) { + return 1; + } + return -1; } if(cpu0 < cpu1) { return 1; @@ -143,8 +150,10 @@ public class AMRMClientImpl extends AMRMClient { int mem1 = arg1.getMemory(); int cpu0 = arg0.getVirtualCores(); int cpu1 = arg1.getVirtualCores(); + int gpu0 = arg0.getGpuCores(); + int gpu1 = arg1.getGpuCores(); - if(mem0 <= mem1 && cpu0 <= cpu1) { + if(mem0 <= mem1 && cpu0 <= cpu1 && gpu0 <= gpu1) { return true; } return false; @@ -655,7 +664,7 @@ public class AMRMClientImpl extends AMRMClient { if (reqMap == null) { // capabilities are stored in reverse sorted order. smallest last. reqMap = new TreeMap( - new ResourceReverseMemoryThenCpuComparator()); + new ResourceReverseMemoryThenCpuThenGpuComparator()); remoteRequests.put(resourceName, reqMap); } ResourceRequestInfo resourceRequestInfo = reqMap.get(capability); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index dd4a949..df980d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -527,6 +526,7 @@ public class ApplicationCLI extends YarnCLI { //completed app report in the timeline server doesn't have usage report appReportStr.print(usageReport.getMemorySeconds() + " MB-seconds, "); appReportStr.println(usageReport.getVcoreSeconds() + " vcore-seconds"); + appReportStr.println(usageReport.getGcoreSeconds() + " gcore-seconds"); } else { appReportStr.println("N/A"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeCLI.java index 4f0ddfe..281fc99 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeCLI.java @@ -211,6 +211,11 @@ public class NodeCLI extends YarnCLI { : (nodeReport.getUsed().getVirtualCores() + " vcores")); nodeReportStr.print("\tCPU-Capacity : "); nodeReportStr.println(nodeReport.getCapability().getVirtualCores() + " vcores"); + nodeReportStr.print("\tGPU-Used : "); + nodeReportStr.println((nodeReport.getUsed() == null) ? "0 gcores" + : (nodeReport.getUsed().getGpuCores() + " gcores")); + nodeReportStr.print("\tGPU-Capacity : "); + nodeReportStr.println(nodeReport.getCapability().getGpuCores() + " gcores"); nodeReportStr.print("\tNode-Labels : "); // Create a List for node labels since we need it get sorted diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java index f468bc1..d04be07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/ProtocolHATestBase.java @@ -807,8 +803,8 @@ public abstract class ProtocolHATestBase extends ClientBaseWithFixes { public RegisterApplicationMasterResponse createFakeRegisterApplicationMasterResponse() { - Resource minCapability = Resource.newInstance(2048, 2); - Resource maxCapability = Resource.newInstance(4096, 4); + Resource minCapability = Resource.newInstance(2048, 2, 0); + Resource maxCapability = Resource.newInstance(4096, 4, 4); Map acls = new HashMap(); acls.put(ApplicationAccessType.MODIFY_APP, "*"); @@ -827,7 +823,7 @@ public abstract class ProtocolHATestBase extends ClientBaseWithFixes { return AllocateResponse.newInstance(-1, new ArrayList(), new ArrayList(), new ArrayList(), - Resource.newInstance(1024, 2), null, 1, + Resource.newInstance(1024, 2, 2), null, 1, null, new ArrayList()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java index bfc6656..4c83ec9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationClientProtocolOnHA.java @@ -169,6 +169,7 @@ public class TestApplicationClientProtocolOnHA extends ProtocolHATestBase { Resource capability = Records.newRecord(Resource.class); capability.setMemory(10); capability.setVirtualCores(1); + capability.setGpuCores(1); appContext.setResource(capability); ApplicationId appId = client.submitApplication(appContext); Assert.assertTrue(getActiveRM().getRMContext().getRMApps() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java index 8885769..16a7a72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java @@ -55,7 +54,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase{ @Test(timeout = 15000) public void testResourceTrackerOnHA() throws Exception { NodeId nodeId = NodeId.newInstance("localhost", 0); - Resource resource = Resource.newInstance(2048, 4); + Resource resource = Resource.newInstance(2048, 4, 4); // make sure registerNodeManager works when failover happens RegisterNodeManagerRequest request = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index 7d29d05..75554f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -141,7 +138,7 @@ public class TestAMRMClient { priority = Priority.newInstance(1); priority2 = Priority.newInstance(2); - capability = Resource.newInstance(1024, 1); + capability = Resource.newInstance(1024, 1, 1); node = nodeReports.get(0).getNodeId().getHost(); rack = nodeReports.get(0).getRackName(); @@ -171,7 +168,7 @@ public class TestAMRMClient { new HashMap(), null, new HashMap()); appContext.setAMContainerSpec(amContainer); - appContext.setResource(Resource.newInstance(1024, 1)); + appContext.setResource(Resource.newInstance(1024, 1, 1)); // Create the request to send to the applications manager SubmitApplicationRequest appRequest = Records .newRecord(SubmitApplicationRequest.class); @@ -233,13 +230,13 @@ public class TestAMRMClient { amClient.start(); amClient.registerApplicationMaster("Host", 10000, ""); - Resource capability1 = Resource.newInstance(1024, 2); - Resource capability2 = Resource.newInstance(1024, 1); - Resource capability3 = Resource.newInstance(1000, 2); - Resource capability4 = Resource.newInstance(2000, 1); - Resource capability5 = Resource.newInstance(1000, 3); - Resource capability6 = Resource.newInstance(2000, 1); - Resource capability7 = Resource.newInstance(2000, 1); + Resource capability1 = Resource.newInstance(1024, 2, 2); + Resource capability2 = Resource.newInstance(1024, 1, 1); + Resource capability3 = Resource.newInstance(1000, 2, 2); + Resource capability4 = Resource.newInstance(2000, 1, 1); + Resource capability5 = Resource.newInstance(1000, 3, 3); + Resource capability6 = Resource.newInstance(2000, 1, 1); + Resource capability7 = Resource.newInstance(2000, 1, 1); ContainerRequest storedContainer1 = new ContainerRequest(capability1, nodes, racks, priority); @@ -267,7 +264,7 @@ public class TestAMRMClient { List> matches; ContainerRequest storedRequest; // exact match - Resource testCapability1 = Resource.newInstance(1024, 2); + Resource testCapability1 = Resource.newInstance(1024, 2, 2); matches = amClient.getMatchingRequests(priority, node, testCapability1); verifyMatches(matches, 1); storedRequest = matches.get(0).iterator().next(); @@ -275,7 +272,7 @@ public class TestAMRMClient { amClient.removeContainerRequest(storedContainer1); // exact matching with order maintained - Resource testCapability2 = Resource.newInstance(2000, 1); + Resource testCapability2 = Resource.newInstance(2000, 1, 1); matches = amClient.getMatchingRequests(priority, node, testCapability2); verifyMatches(matches, 2); // must be returned in the order they were made @@ -290,11 +287,11 @@ public class TestAMRMClient { amClient.removeContainerRequest(storedContainer6); // matching with larger container. all requests returned - Resource testCapability3 = Resource.newInstance(4000, 4); + Resource testCapability3 = Resource.newInstance(4000, 4, 4); matches = amClient.getMatchingRequests(priority, node, testCapability3); assert(matches.size() == 4); - Resource testCapability4 = Resource.newInstance(1024, 2); + Resource testCapability4 = Resource.newInstance(1024, 2, 2); matches = amClient.getMatchingRequests(priority, node, testCapability4); assert(matches.size() == 2); // verify non-fitting containers are not returned and fitting ones are @@ -307,13 +304,13 @@ public class TestAMRMClient { testRequest == storedContainer3); } - Resource testCapability5 = Resource.newInstance(512, 4); + Resource testCapability5 = Resource.newInstance(512, 4, 4); matches = amClient.getMatchingRequests(priority, node, testCapability5); assert(matches.size() == 0); // verify requests without relaxed locality are only returned at specific // locations - Resource testCapability7 = Resource.newInstance(2000, 1); + Resource testCapability7 = Resource.newInstance(2000, 1, 1); matches = amClient.getMatchingRequests(priority2, ResourceRequest.ANY, testCapability7); assert(matches.size() == 0); @@ -347,7 +344,7 @@ public class TestAMRMClient { amClient.start(); amClient.registerApplicationMaster("Host", 10000, ""); - Resource capability = Resource.newInstance(1024, 2); + Resource capability = Resource.newInstance(1024, 2, 2); ContainerRequest storedContainer1 = new ContainerRequest(capability, nodes, null, priority); @@ -552,7 +549,7 @@ public class TestAMRMClient { // create a invalid ContainerRequest - memory value is minus ContainerRequest invalidContainerRequest = - new ContainerRequest(Resource.newInstance(-1024, 1), + new ContainerRequest(Resource.newInstance(-1024, 1, 1), nodes, racks, priority); amClient.addContainerRequest(invalidContainerRequest); amClient.updateBlacklist(localNodeBlacklist, null); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java index cb8c86a..7246bae 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java @@ -46,7 +46,7 @@ public class TestAMRMClientContainerRequest { MyResolver.class, DNSToSwitchMapping.class); client.init(conf); - Resource capability = Resource.newInstance(1024, 1); + Resource capability = Resource.newInstance(1024, 1, 1); ContainerRequest request = new ContainerRequest(capability, new String[] {"host1", "host2"}, new String[] {"/rack2"}, Priority.newInstance(1)); @@ -68,7 +68,7 @@ public class TestAMRMClientContainerRequest { MyResolver.class, DNSToSwitchMapping.class); client.init(conf); - Resource capability = Resource.newInstance(1024, 1); + Resource capability = Resource.newInstance(1024, 1, 1); ContainerRequest nodeLevelRequest = new ContainerRequest(capability, new String[] {"host1", "host2"}, null, Priority.newInstance(1), false); @@ -135,7 +135,7 @@ public class TestAMRMClientContainerRequest { MyResolver.class, DNSToSwitchMapping.class); client.init(conf); - Resource capability = Resource.newInstance(1024, 1); + Resource capability = Resource.newInstance(1024, 1, 1); ContainerRequest request1 = new ContainerRequest(capability, new String[] {"host1", "host2"}, null, Priority.newInstance(1), false); @@ -156,7 +156,7 @@ public class TestAMRMClientContainerRequest { MyResolver.class, DNSToSwitchMapping.class); client.init(conf); - Resource capability = Resource.newInstance(1024, 1); + Resource capability = Resource.newInstance(1024, 1, 1); ContainerRequest request1 = new ContainerRequest(capability, new String[] {"host1", "host2"}, null, Priority.newInstance(1), false); @@ -195,7 +195,7 @@ public class TestAMRMClientContainerRequest { MyResolver.class, DNSToSwitchMapping.class); client.init(conf); - Resource capability = Resource.newInstance(1024, 1); + Resource capability = Resource.newInstance(1024, 1, 1); ContainerRequest request1 = new ContainerRequest(capability, new String[] {"host1", "host2"}, null, Priority.newInstance(1), false); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java index 4b60c52..99fb51a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java @@ -97,7 +91,7 @@ public class TestYarnCLI { ApplicationId applicationId = ApplicationId.newInstance(1234, 5); ApplicationResourceUsageReport usageReport = i == 0 ? null : ApplicationResourceUsageReport.newInstance( - 2, 0, null, null, null, 123456, 4567); + 2, 0, null, null, null, 123456, 4567, 4567); ApplicationReport newApplicationReport = ApplicationReport.newInstance( applicationId, ApplicationAttemptId.newInstance(applicationId, 1), "user", "queue", "appname", "host", 124, null, @@ -126,7 +120,7 @@ public class TestYarnCLI { pw.println("\tRPC Port : 124"); pw.println("\tAM Host : host"); pw.println("\tAggregate Resource Allocation : " + - (i == 0 ? "N/A" : "123456 MB-seconds, 4567 vcore-seconds")); + (i == 0 ? "N/A" : "123456 MB-seconds, 4567 vcore-seconds, 4567 gcore-seconds")); pw.println("\tDiagnostics : diagnostics"); pw.close(); String appReportStr = baos.toString("UTF-8"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java index 1cabadd..0703df7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java @@ -223,7 +223,19 @@ extends ApplicationResourceUsageReport { ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; return (p.getVcoreSeconds()); } - + + @Override + public synchronized void setGcoreSeconds(long gcore_seconds) { + maybeInitBuilder(); + builder.setGcoreSeconds(gcore_seconds); + } + + @Override + public synchronized long getGcoreSeconds() { + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + return (p.getGcoreSeconds()); + } + private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { return new ResourcePBImpl(p); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java index a28c6ed..d4a11bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java @@ -80,10 +80,25 @@ public class ResourcePBImpl extends Resource { } @Override + public int getGpuCores() { + ResourceProtoOrBuilder p = viaProto ? proto : builder; + return (p.getGpuCores()); + } + + @Override + public void setGpuCores(int gCores) { + maybeInitBuilder(); + builder.setGpuCores((gCores)); + } + + @Override public int compareTo(Resource other) { int diff = this.getMemory() - other.getMemory(); if (diff == 0) { diff = this.getVirtualCores() - other.getVirtualCores(); + if (diff == 0) { + diff = this.getGpuCores() - other.getGpuCores(); + } } return diff; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java index b9031d6..51741ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java @@ -125,7 +124,7 @@ public class CommonNodeLabelsManager extends AbstractService { protected Node(NodeId nodeid) { labels = null; - resource = Resource.newInstance(0, 0); + resource = Resource.newInstance(0, 0, 0); running = false; nodeId = nodeid; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/NodeLabel.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/NodeLabel.java index 1765a65..d189414 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/NodeLabel.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/NodeLabel.java @@ -32,7 +33,7 @@ public class NodeLabel implements Comparable { private Set nodeIds; public NodeLabel(String labelName) { - this(labelName, Resource.newInstance(0, 0), 0); + this(labelName, Resource.newInstance(0, 0, 0), 0); } protected NodeLabel(String labelName, Resource res, int activeNMs) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java index 2ee95ce..5b8a1c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java @@ -17,10 +17,14 @@ */ package org.apache.hadoop.yarn.util.resource; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; +import java.util.Arrays; + /** * A {@link ResourceCalculator} which uses the concept of * dominant resource to compare multi-dimensional resources. @@ -45,89 +49,71 @@ import org.apache.hadoop.yarn.api.records.Resource; @Private @Unstable public class DominantResourceCalculator extends ResourceCalculator { - + + private static final Log LOG = LogFactory.getLog(DominantResourceCalculator.class); + @Override public int compare(Resource clusterResource, Resource lhs, Resource rhs) { if (lhs.equals(rhs)) { return 0; } - - if (isInvalidDivisor(clusterResource)) { - if ((lhs.getMemory() < rhs.getMemory() && lhs.getVirtualCores() > rhs - .getVirtualCores()) - || (lhs.getMemory() > rhs.getMemory() && lhs.getVirtualCores() < rhs - .getVirtualCores())) { - return 0; - } else if (lhs.getMemory() > rhs.getMemory() - || lhs.getVirtualCores() > rhs.getVirtualCores()) { - return 1; - } else if (lhs.getMemory() < rhs.getMemory() - || lhs.getVirtualCores() < rhs.getVirtualCores()) { - return -1; - } - } - float l = getResourceAsValue(clusterResource, lhs, true); - float r = getResourceAsValue(clusterResource, rhs, true); - - if (l < r) { - return -1; - } else if (l > r) { - return 1; - } else { - l = getResourceAsValue(clusterResource, lhs, false); - r = getResourceAsValue(clusterResource, rhs, false); + float[] lValues = new float[] { + (clusterResource.getMemory() != 0) ? (float) lhs.getMemory() / clusterResource.getMemory() : lhs.getMemory(), + (clusterResource.getVirtualCores() != 0) ? (float) lhs.getVirtualCores() / clusterResource.getVirtualCores() : lhs.getVirtualCores(), + (clusterResource.getGpuCores() != 0) ? (float) lhs.getGpuCores() / clusterResource.getGpuCores() : 0.0f }; + Arrays.sort(lValues); + + float[] rValues = new float[] { + (clusterResource.getMemory() != 0) ? (float) rhs.getMemory() / clusterResource.getMemory() : rhs.getMemory(), + (clusterResource.getVirtualCores() != 0) ? (float) rhs.getVirtualCores() / clusterResource.getVirtualCores() : rhs.getVirtualCores(), + (clusterResource.getGpuCores() != 0) ? (float) rhs.getGpuCores() / clusterResource.getGpuCores() : 0.0f }; + Arrays.sort(rValues); + + int diff = 0; + for(int i = 0; i < 3; i++) { + float l = lValues[i]; + float r = rValues[i]; if (l < r) { - return -1; + diff = -1; } else if (l > r) { - return 1; + diff = 1; } } - return 0; + return diff; } - /** - * Use 'dominant' for now since we only have 2 resources - gives us a slight - * performance boost. - * - * Once we add more resources, we'll need a more complicated (and slightly - * less performant algorithm). - */ - protected float getResourceAsValue( - Resource clusterResource, Resource resource, boolean dominant) { - // Just use 'dominant' resource - return (dominant) ? - Math.max( - (float)resource.getMemory() / clusterResource.getMemory(), - (float)resource.getVirtualCores() / clusterResource.getVirtualCores() - ) - : - Math.min( - (float)resource.getMemory() / clusterResource.getMemory(), - (float)resource.getVirtualCores() / clusterResource.getVirtualCores() - ); + protected float getResourceAsValueMax( Resource clusterResource, + Resource resource) { + return Math.max((float) resource.getMemory() / clusterResource.getMemory(), + (float) resource.getVirtualCores() / clusterResource.getVirtualCores()); } - + @Override public int computeAvailableContainers(Resource available, Resource required) { - return Math.min( - available.getMemory() / required.getMemory(), + int min = Math.min( + available.getMemory() / required.getMemory(), available.getVirtualCores() / required.getVirtualCores()); + if (required.getGpuCores() != 0) { + min = Math.min(min, + available.getGpuCores() / required.getGpuCores()); + } + return min; } @Override public float divide(Resource clusterResource, Resource numerator, Resource denominator) { return - getResourceAsValue(clusterResource, numerator, true) / - getResourceAsValue(clusterResource, denominator, true); + getResourceAsValueMax(clusterResource, numerator) / + getResourceAsValueMax(clusterResource, denominator); } @Override public boolean isInvalidDivisor(Resource r) { - if (r.getMemory() == 0.0f || r.getVirtualCores() == 0.0f) { + if (r.getMemory() == 0.0f || r.getVirtualCores() == 0.0f || r.getGpuCores() == 0.0f) { return true; } return false; @@ -135,17 +121,22 @@ public class DominantResourceCalculator extends ResourceCalculator { @Override public float ratio(Resource a, Resource b) { - return Math.max( - (float)a.getMemory()/b.getMemory(), - (float)a.getVirtualCores()/b.getVirtualCores() - ); + float max = Math.max( + (float) a.getMemory() / b.getMemory(), + (float) a.getVirtualCores() / b.getVirtualCores()); + if (b.getGpuCores() != 0) { + max = Math.max(max, + (float) a.getGpuCores() / b.getGpuCores()); + } + return max; } @Override public Resource divideAndCeil(Resource numerator, int denominator) { return Resources.createResource( divideAndCeil(numerator.getMemory(), denominator), - divideAndCeil(numerator.getVirtualCores(), denominator) + divideAndCeil(numerator.getVirtualCores(), denominator), + divideAndCeil(numerator.getGpuCores(), denominator) ); } @@ -162,15 +153,21 @@ public class DominantResourceCalculator extends ResourceCalculator { Math.max(r.getVirtualCores(), minimumResource.getVirtualCores()), stepFactor.getVirtualCores()), maximumResource.getVirtualCores()); + int normalizedGCores = Math.min( + roundUpWithZero( + Math.max(r.getGpuCores(), minimumResource.getGpuCores()), + stepFactor.getGpuCores()), + maximumResource.getGpuCores()); return Resources.createResource(normalizedMemory, - normalizedCores); + normalizedCores, normalizedGCores); } @Override public Resource roundUp(Resource r, Resource stepFactor) { return Resources.createResource( roundUp(r.getMemory(), stepFactor.getMemory()), - roundUp(r.getVirtualCores(), stepFactor.getVirtualCores()) + roundUp(r.getVirtualCores(), stepFactor.getVirtualCores()), + roundUpWithZero(r.getGpuCores(), stepFactor.getGpuCores()) ); } @@ -178,7 +175,8 @@ public class DominantResourceCalculator extends ResourceCalculator { public Resource roundDown(Resource r, Resource stepFactor) { return Resources.createResource( roundDown(r.getMemory(), stepFactor.getMemory()), - roundDown(r.getVirtualCores(), stepFactor.getVirtualCores()) + roundDown(r.getVirtualCores(), stepFactor.getVirtualCores()), + roundDownWithZero(r.getGpuCores(), stepFactor.getGpuCores()) ); } @@ -190,7 +188,10 @@ public class DominantResourceCalculator extends ResourceCalculator { (int)Math.ceil(r.getMemory() * by), stepFactor.getMemory()), roundUp( (int)Math.ceil(r.getVirtualCores() * by), - stepFactor.getVirtualCores()) + stepFactor.getVirtualCores()), + roundUpWithZero( + (int)Math.ceil(r.getGpuCores() * by), + stepFactor.getGpuCores()) ); } @@ -205,6 +206,10 @@ public class DominantResourceCalculator extends ResourceCalculator { roundDown( (int)(r.getVirtualCores() * by), stepFactor.getVirtualCores() + ), + roundDownWithZero( + (int) (r.getGpuCores() * by), + stepFactor.getGpuCores() ) ); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java index 442196c..f58fe91 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java @@ -46,6 +46,19 @@ public abstract class ResourceCalculator { return (a / b) * b; } + public static int roundUpWithZero(int a, int b) { + if (b == 0) { + return a; + } + return ((a + (b - 1)) / b) * b; + } + + public static int roundDownWithZero(int a, int b) { + if (b==0) { + return a; + } + return roundDown(a, b); + } /** * Compute the number of containers which can be allocated given * available and required resources. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java index bcb0421..09400dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java @@ -51,10 +51,19 @@ public class Resources { } @Override + public int getGpuCores() { return 0; } + + @Override + public void setGpuCores(int gcores) { throw new RuntimeException("NONE cannot be modified!"); } + + @Override public int compareTo(Resource o) { int diff = 0 - o.getMemory(); if (diff == 0) { diff = 0 - o.getVirtualCores(); + if (diff == 0) { + diff = 0 - o.getGpuCores(); + } } return diff; } @@ -84,10 +93,23 @@ public class Resources { } @Override + public int getGpuCores() { + return Integer.MAX_VALUE; + } + + @Override + public void setGpuCores(int gcores) { + throw new RuntimeException("NONE cannot be modified!"); + } + + @Override public int compareTo(Resource o) { int diff = 0 - o.getMemory(); if (diff == 0) { diff = 0 - o.getVirtualCores(); + if (diff == 0) { + diff = 0 - o.getGpuCores(); + } } return diff; } @@ -95,13 +117,18 @@ public class Resources { }; public static Resource createResource(int memory) { - return createResource(memory, (memory > 0) ? 1 : 0); + return createResource(memory, (memory > 0) ? 1 : 0, 0); } public static Resource createResource(int memory, int cores) { + return createResource(memory, cores, 0); + } + + public static Resource createResource(int memory, int cores, int gcores) { Resource resource = Records.newRecord(Resource.class); resource.setMemory(memory); resource.setVirtualCores(cores); + resource.setGpuCores(gcores); return resource; } @@ -114,12 +141,13 @@ public class Resources { } public static Resource clone(Resource res) { - return createResource(res.getMemory(), res.getVirtualCores()); + return createResource(res.getMemory(), res.getVirtualCores(), res.getGpuCores()); } public static Resource addTo(Resource lhs, Resource rhs) { lhs.setMemory(lhs.getMemory() + rhs.getMemory()); lhs.setVirtualCores(lhs.getVirtualCores() + rhs.getVirtualCores()); + lhs.setGpuCores(lhs.getGpuCores() + rhs.getGpuCores()); return lhs; } @@ -130,6 +158,7 @@ public class Resources { public static Resource subtractFrom(Resource lhs, Resource rhs) { lhs.setMemory(lhs.getMemory() - rhs.getMemory()); lhs.setVirtualCores(lhs.getVirtualCores() - rhs.getVirtualCores()); + lhs.setGpuCores(lhs.getGpuCores() - rhs.getGpuCores()); return lhs; } @@ -144,6 +173,7 @@ public class Resources { public static Resource multiplyTo(Resource lhs, double by) { lhs.setMemory((int)(lhs.getMemory() * by)); lhs.setVirtualCores((int)(lhs.getVirtualCores() * by)); + lhs.setGpuCores((int) (lhs.getGpuCores() * by)); return lhs; } @@ -165,6 +195,7 @@ public class Resources { Resource out = clone(lhs); out.setMemory((int)(lhs.getMemory() * by)); out.setVirtualCores((int)(lhs.getVirtualCores() * by)); + out.setGpuCores((int)(lhs.getGpuCores() * by)); return out; } @@ -253,16 +284,19 @@ public class Resources { public static boolean fitsIn(Resource smaller, Resource bigger) { return smaller.getMemory() <= bigger.getMemory() && - smaller.getVirtualCores() <= bigger.getVirtualCores(); + smaller.getVirtualCores() <= bigger.getVirtualCores() && + smaller.getGpuCores() <= bigger.getGpuCores(); } public static Resource componentwiseMin(Resource lhs, Resource rhs) { return createResource(Math.min(lhs.getMemory(), rhs.getMemory()), - Math.min(lhs.getVirtualCores(), rhs.getVirtualCores())); + Math.min(lhs.getVirtualCores(), rhs.getVirtualCores()), + Math.min(lhs.getGpuCores(), rhs.getGpuCores())); } public static Resource componentwiseMax(Resource lhs, Resource rhs) { return createResource(Math.max(lhs.getMemory(), rhs.getMemory()), - Math.max(lhs.getVirtualCores(), rhs.getVirtualCores())); + Math.max(lhs.getVirtualCores(), rhs.getVirtualCores()), + Math.max(lhs.getGpuCores(), rhs.getGpuCores())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 079d4d2..55d5eaa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -272,6 +272,22 @@ + The minimum allocation for every container request at the RM, + in terms of GPU cores. Requests lower than this will throw a + InvalidResourceRequestException. + yarn.scheduler.minimum-allocation-gcores + 0 + + + + The maximum allocation for every container request at the RM, + in terms of GPU cores. Requests higher than this will throw a + InvalidResourceRequestException. + yarn.scheduler.maximum-allocation-gcores + 8 + + + Enable RM to recover state after starting. If true, then yarn.resourcemanager.store.class must be specified. yarn.resourcemanager.recovery.enabled @@ -925,6 +941,15 @@ + Number of gcores that can be allocated + for containers. This is used by the RM scheduler when allocating + resources for containers. This is not used to limit the number of + gpu cores used by YARN containers. + yarn.nodemanager.resource.gcores + 0 + + + Percentage of CPU that can be allocated for containers. This setting allows users to limit the amount of CPU that YARN containers use. Currently functional only diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java index e2071dd..3b74df7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java @@ -99,7 +99,7 @@ public class TestContainerLaunchRPC { ContainerId containerId = ContainerId.newContainerId(applicationAttemptId, 100); NodeId nodeId = NodeId.newInstance("localhost", 1234); - Resource resource = Resource.newInstance(1234, 2); + Resource resource = Resource.newInstance(1234, 2, 3); ContainerTokenIdentifier containerTokenIdentifier = new ContainerTokenIdentifier(containerId, "localhost", "user", resource, System.currentTimeMillis() + 10000, 42, 42, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java index 29b0ffe..cb776f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java @@ -35,7 +35,7 @@ public class TestContainerResourceDecrease { ContainerId containerId = ContainerId .newContainerId(ApplicationAttemptId.newInstance( ApplicationId.newInstance(1234, 3), 3), 7); - Resource resource = Resource.newInstance(1023, 3); + Resource resource = Resource.newInstance(1023, 3, 4); ContainerResourceDecrease ctx = ContainerResourceDecrease.newInstance( containerId, resource); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java index 932d5a7..2d2a7af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java @@ -40,7 +40,7 @@ public class TestContainerResourceIncrease { ContainerId containerId = ContainerId .newContainerId(ApplicationAttemptId.newInstance( ApplicationId.newInstance(1234, 3), 3), 7); - Resource resource = Resource.newInstance(1023, 3); + Resource resource = Resource.newInstance(1023, 3, 4); ContainerResourceIncrease ctx = ContainerResourceIncrease.newInstance( containerId, resource, token); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java index cf4dabf..041e3d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java @@ -35,7 +35,7 @@ public class TestContainerResourceIncreaseRequest { ContainerId containerId = ContainerId .newContainerId(ApplicationAttemptId.newInstance( ApplicationId.newInstance(1234, 3), 3), 7); - Resource resource = Resource.newInstance(1023, 3); + Resource resource = Resource.newInstance(1023, 3, 4); ContainerResourceIncreaseRequest context = ContainerResourceIncreaseRequest .newInstance(containerId, resource); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java index 5f4510b..03695e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java @@ -286,8 +285,10 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService ApplicationMetricsConstants.APP_CPU_METRICS).toString()); long memorySeconds=Long.parseLong(entityInfo.get( ApplicationMetricsConstants.APP_MEM_METRICS).toString()); + long gcoreSeconds=Long.parseLong(entityInfo.get( + ApplicationMetricsConstants.APP_GPU_METRICS).toString()); appResources=ApplicationResourceUsageReport - .newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds); + .newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds, gcoreSeconds); } } List events = entity.getEvents(); @@ -426,6 +427,7 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService TimelineEntity entity, String serverHttpAddress, String user) { int allocatedMem = 0; int allocatedVcore = 0; + int allocatedGcore = 0; String allocatedHost = null; int allocatedPort = -1; int allocatedPriority = 0; @@ -448,6 +450,11 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService ContainerMetricsConstants.ALLOCATED_VCORE_ENTITY_INFO); } if (entityInfo + .containsKey(ContainerMetricsConstants.ALLOCATED_GCORE_ENTITY_INFO)) { + allocatedGcore = (Integer) entityInfo.get( + ContainerMetricsConstants.ALLOCATED_GCORE_ENTITY_INFO); + } + if (entityInfo .containsKey(ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO)) { allocatedHost = entityInfo @@ -516,7 +523,7 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService user); return ContainerReport.newInstance( ConverterUtils.toContainerId(entity.getEntityId()), - Resource.newInstance(allocatedMem, allocatedVcore), + Resource.newInstance(allocatedMem, allocatedVcore, allocatedGcore), NodeId.newInstance(allocatedHost, allocatedPort), Priority.newInstance(allocatedPriority), createdTime, finishedTime, diagnosticsInfo, logUrl, exitStatus, state, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryStoreTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryStoreTestUtils.java index de4051a..8308b51 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryStoreTestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryStoreTestUtils.java @@ -72,7 +72,7 @@ public class ApplicationHistoryStoreTestUtils { protected void writeContainerStartData(ContainerId containerId) throws IOException { store.containerStarted(ContainerStartData.newInstance(containerId, - Resource.newInstance(0, 0), NodeId.newInstance("localhost", 0), + Resource.newInstance(0, 0, 0), NodeId.newInstance("localhost", 0), Priority.newInstance(containerId.getId()), 0)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryClientService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryClientService.java index ba701a1..604ac98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryClientService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryClientService.java @@ -152,6 +152,8 @@ public class TestApplicationHistoryClientService { .getMemorySeconds()); Assert.assertEquals(345, appReport.getApplicationResourceUsageReport() .getVcoreSeconds()); + Assert.assertEquals(345, appReport.getApplicationResourceUsageReport() + .getGcoreSeconds()); Assert.assertEquals("application_0_0001", appReport.getApplicationId() .toString()); Assert.assertEquals("test app type", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java index fcdafc2..7556204 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java @@ -216,6 +216,8 @@ public class TestApplicationHistoryManagerOnTimelineStore { applicationResourceUsageReport.getMemorySeconds()); Assert .assertEquals(345, applicationResourceUsageReport.getVcoreSeconds()); + Assert + .assertEquals(345, applicationResourceUsageReport.getGcoreSeconds()); Assert.assertEquals(FinalApplicationStatus.UNDEFINED, app.getFinalApplicationStatus()); Assert.assertEquals(YarnApplicationState.FINISHED, @@ -318,7 +320,7 @@ public class TestApplicationHistoryManagerOnTimelineStore { Assert.assertNotNull(container); Assert.assertEquals(Integer.MAX_VALUE + 1L, container.getCreationTime()); Assert.assertEquals(Integer.MAX_VALUE + 2L, container.getFinishTime()); - Assert.assertEquals(Resource.newInstance(-1, -1), + Assert.assertEquals(Resource.newInstance(-1, -1, -1), container.getAllocatedResource()); Assert.assertEquals(NodeId.newInstance("test host", 100), container.getAssignedNode()); @@ -549,6 +551,7 @@ public class TestApplicationHistoryManagerOnTimelineStore { Map entityInfo = new HashMap(); entityInfo.put(ContainerMetricsConstants.ALLOCATED_MEMORY_ENTITY_INFO, -1); entityInfo.put(ContainerMetricsConstants.ALLOCATED_VCORE_ENTITY_INFO, -1); + entityInfo.put(ContainerMetricsConstants.ALLOCATED_GCORE_ENTITY_INFO, -1); entityInfo.put(ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO, "test host"); entityInfo.put(ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO, 100); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java index df8eecb..5f72786 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java @@ -70,6 +70,9 @@ public class ApplicationMetricsConstants { public static final String APP_MEM_METRICS = "YARN_APPLICATION_MEM_METRIC"; + public static final String APP_GPU_METRICS = + "YARN_APPLICATION_GPU_METRIC"; + public static final String LATEST_APP_ATTEMPT_EVENT_INFO = "YARN_APPLICATION_LATEST_APP_ATTEMPT"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ContainerMetricsConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ContainerMetricsConstants.java index 0d5540d..5ba5247 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ContainerMetricsConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ContainerMetricsConstants.java @@ -39,6 +39,9 @@ public class ContainerMetricsConstants { public static final String ALLOCATED_VCORE_ENTITY_INFO = "YARN_CONTAINER_ALLOCATED_VCORE"; + public static final String ALLOCATED_GCORE_ENTITY_INFO = + "YARN_CONTAINER_ALLOCATED_GCORE"; + public static final String ALLOCATED_HOST_ENTITY_INFO = "YARN_CONTAINER_ALLOCATED_HOST"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java index c366e94..bf56c57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java @@ -394,7 +393,7 @@ public class BuilderUtils { public static ApplicationResourceUsageReport newApplicationResourceUsageReport( int numUsedContainers, int numReservedContainers, Resource usedResources, Resource reservedResources, Resource neededResources, long memorySeconds, - long vcoreSeconds) { + long vcoreSeconds, long gcoreSeconds) { ApplicationResourceUsageReport report = recordFactory.newRecordInstance(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -404,6 +403,7 @@ public class BuilderUtils { report.setNeededResources(neededResources); report.setMemorySeconds(memorySeconds); report.setVcoreSeconds(vcoreSeconds); + report.setGcoreSeconds(gcoreSeconds); return report; } @@ -411,6 +411,15 @@ public class BuilderUtils { Resource resource = recordFactory.newRecordInstance(Resource.class); resource.setMemory(memory); resource.setVirtualCores(vCores); + resource.setGpuCores(0); + return resource; + } + + public static Resource newResource(int memory, int vCores, int gCores) { + Resource resource = recordFactory.newRecordInstance(Resource.class); + resource.setMemory(memory); + resource.setVirtualCores(vCores); + resource.setGpuCores(gCores); return resource; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java index cae8d2e..b94f534 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java @@ -119,7 +118,8 @@ public class ContainerBlock extends HtmlBlock { ._( "Resource:", container.getAllocatedMB() + " Memory, " - + container.getAllocatedVCores() + " VCores") + + container.getAllocatedVCores() + " VCores, " + + container.getAllocatedGCores() + " GCores") ._("Logs:", container.getLogUrl() == null ? "#" : container.getLogUrl(), container.getLogUrl() == null ? "N/A" : "Logs") ._("Diagnostics:", container.getDiagnosticsInfo() == null ? diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerInfo.java index d0d4df6..23358f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerInfo.java @@ -38,6 +37,7 @@ public class ContainerInfo { protected String containerId; protected int allocatedMB; protected int allocatedVCores; + protected int allocatedGCores; protected String assignedNodeId; protected int priority; protected long startedTime; @@ -58,6 +58,7 @@ public class ContainerInfo { if (container.getAllocatedResource() != null) { allocatedMB = container.getAllocatedResource().getMemory(); allocatedVCores = container.getAllocatedResource().getVirtualCores(); + allocatedGCores = container.getAllocatedResource().getGpuCores(); } if (container.getAssignedNode() != null) { assignedNodeId = container.getAssignedNode().toString(); @@ -85,6 +86,10 @@ public class ContainerInfo { return allocatedVCores; } + public int getAllocatedGCores() { + return allocatedGCores; + } + public String getAssignedNodeId() { return assignedNodeId; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java index 20983b6..86d6e98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java @@ -133,6 +133,7 @@ public class TestYarnServerApiClasses { Resource resource = recordFactory.newRecordInstance(Resource.class); resource.setMemory(10000); resource.setVirtualCores(2); + resource.setGpuCores(3); original.setResource(resource); RegisterNodeManagerRequestPBImpl copy = new RegisterNodeManagerRequestPBImpl( original.getProto()); @@ -141,6 +142,7 @@ public class TestYarnServerApiClasses { assertEquals(9090, copy.getNodeId().getPort()); assertEquals(10000, copy.getResource().getMemory()); assertEquals(2, copy.getResource().getVirtualCores()); + assertEquals(3, copy.getResource().getGpuCores()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java index 86e49f0..4581cef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java @@ -52,7 +52,7 @@ public class TestProtocolRecords { ApplicationId appId = ApplicationId.newInstance(123456789, 1); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); ContainerId containerId = ContainerId.newContainerId(attemptId, 1); - Resource resource = Resource.newInstance(1000, 200); + Resource resource = Resource.newInstance(1000, 200, 300); NMContainerStatus report = NMContainerStatus.newInstance(containerId, @@ -80,13 +80,13 @@ public class TestProtocolRecords { NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, - ContainerState.RUNNING, Resource.newInstance(1024, 1), "diagnostics", + ContainerState.RUNNING, Resource.newInstance(1024, 1, 2), "diagnostics", 0, Priority.newInstance(10), 1234); List reports = Arrays.asList(containerReport); RegisterNodeManagerRequest request = RegisterNodeManagerRequest.newInstance( NodeId.newInstance("1.1.1.1", 1000), 8080, - Resource.newInstance(1024, 1), "NM-version-id", reports, + Resource.newInstance(1024, 1, 2), "NM-version-id", reports, Arrays.asList(appId)); RegisterNodeManagerRequest requestProto = new RegisterNodeManagerRequestPBImpl( @@ -97,7 +97,7 @@ public class TestProtocolRecords { Assert.assertEquals("NM-version-id", requestProto.getNMVersion()); Assert.assertEquals(NodeId.newInstance("1.1.1.1", 1000), requestProto.getNodeId()); - Assert.assertEquals(Resource.newInstance(1024, 1), + Assert.assertEquals(Resource.newInstance(1024, 1, 2), requestProto.getResource()); Assert.assertEquals(1, requestProto.getRunningApplications().size()); Assert.assertEquals(appId, requestProto.getRunningApplications().get(0)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 6ddd7e4..ddabcb3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -148,7 +147,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements conf.getInt( YarnConfiguration.NM_VCORES, YarnConfiguration.DEFAULT_NM_VCORES); - this.totalResource = Resource.newInstance(memoryMb, virtualCores); + int gpuCores = + conf.getInt( + YarnConfiguration.NM_GCORES, YarnConfiguration.DEFAULT_NM_GCORES); + + this.totalResource = Resource.newInstance(memoryMb, virtualCores, gpuCores); metrics.addResource(totalResource); this.tokenKeepAliveEnabled = isTokenKeepAliveEnabled(conf); this.tokenRemovalDelayMs = @@ -179,7 +182,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements super.serviceInit(conf); LOG.info("Initialized nodemanager for " + nodeId + ":" + " physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb + - " virtual-cores=" + virtualCores); + " virtual-cores=" + virtualCores + " gpu-cores=" + gpuCores); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java index 4fde7b9..63ef4f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java @@ -29,4 +29,6 @@ public interface ResourceView { boolean isPmemCheckEnabled(); long getVCoresAllocatedForContainers(); + + long getGCoresAllocatedForContainers(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index f55e0e5..017fff9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -498,10 +498,11 @@ public class ContainerImpl implements Container { YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); long vmemBytes = (long) (pmemRatio * pmemBytes); int cpuVcores = getResource().getVirtualCores(); + int gpuGcores = getResource().getGpuCores(); dispatcher.getEventHandler().handle( new ContainerStartMonitoringEvent(containerId, - vmemBytes, pmemBytes, cpuVcores)); + vmemBytes, pmemBytes, cpuVcores, gpuGcores)); } private void addDiagnostics(String... diags) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index ffa72a4..e6f0d27 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -45,6 +45,7 @@ public class ContainerMetrics implements MetricsSource { public static final String PMEM_LIMIT_METRIC_NAME = "pMemLimitMBs"; public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs"; public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit"; + public static final String GCORE_LIMIT_METRIC_NAME = "gCoreLimit"; public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs"; private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent"; @@ -74,6 +75,9 @@ public class ContainerMetrics implements MetricsSource { @Metric public MutableGaugeInt cpuVcoreLimit; + @Metric + public MutableGaugeInt gpuGcoreLimit; + static final MetricsInfo RECORD_INFO = info("ContainerResource", "Resource limit and usage by container"); @@ -122,6 +126,8 @@ public class ContainerMetrics implements MetricsSource { VMEM_LIMIT_METRIC_NAME, "Virtual memory limit in MBs", 0); this.cpuVcoreLimit = registry.newGauge( VCORE_LIMIT_METRIC_NAME, "CPU limit in number of vcores", 0); + this.gpuGcoreLimit = registry.newGauge( + GCORE_LIMIT_METRIC_NAME, "GPU limit in number of gcores", 0); } ContainerMetrics tag(MetricsInfo info, ContainerId containerId) { @@ -207,10 +213,11 @@ public class ContainerMetrics implements MetricsSource { registry.tag(PROCESSID_INFO, processId); } - public void recordResourceLimit(int vmemLimit, int pmemLimit, int cpuVcores) { + public void recordResourceLimit(int vmemLimit, int pmemLimit, int cpuVcores, int gpuGcores) { this.vMemLimitMbs.set(vmemLimit); this.pMemLimitMbs.set(pmemLimit); this.cpuVcoreLimit.set(cpuVcores); + this.gpuGcoreLimit.set(gpuGcores); } private synchronized void scheduleTimerTaskIfRequired() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java index 56e2d8e..c89e1e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java @@ -25,13 +25,15 @@ public class ContainerStartMonitoringEvent extends ContainersMonitorEvent { private final long vmemLimit; private final long pmemLimit; private final int cpuVcores; + private final int gpuGcores; public ContainerStartMonitoringEvent(ContainerId containerId, - long vmemLimit, long pmemLimit, int cpuVcores) { + long vmemLimit, long pmemLimit, int cpuVcores, int gpuGcores) { super(containerId, ContainersMonitorEventType.START_MONITORING_CONTAINER); this.vmemLimit = vmemLimit; this.pmemLimit = pmemLimit; this.cpuVcores = cpuVcores; + this.gpuGcores = gpuGcores; } public long getVmemLimit() { @@ -45,4 +47,8 @@ public class ContainerStartMonitoringEvent extends ContainersMonitorEvent { public int getCpuVcores() { return this.cpuVcores; } + + public int getGpuGcores() { + return this.gpuGcores; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 5153051..b6054d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -74,6 +73,7 @@ public class ContainersMonitorImpl extends AbstractService implements private boolean vmemCheckEnabled; private long maxVCoresAllottedForContainers; + private long maxGCoresAllottedForContainers; private static final long UNKNOWN_MEMORY_LIMIT = -1L; private int nodeCpuPercentageForYARN; @@ -125,12 +125,16 @@ public class ContainersMonitorImpl extends AbstractService implements YarnConfiguration.NM_VCORES, YarnConfiguration.DEFAULT_NM_VCORES); + long configuredGCoresForContainers = conf.getLong( + YarnConfiguration.NM_GCORES, + YarnConfiguration.DEFAULT_NM_GCORES); // Setting these irrespective of whether checks are enabled. Required in // the UI. // ///////// Physical memory configuration ////// this.maxPmemAllottedForContainers = configuredPMemForContainers; this.maxVCoresAllottedForContainers = configuredVCoresForContainers; + this.maxGCoresAllottedForContainers = configuredGCoresForContainers; // ///////// Virtual memory configuration ////// float vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, @@ -225,16 +229,18 @@ public class ContainersMonitorImpl extends AbstractService implements private long vmemLimit; private long pmemLimit; private int cpuVcores; + private int gpuGcores; public ProcessTreeInfo(ContainerId containerId, String pid, ResourceCalculatorProcessTree pTree, long vmemLimit, long pmemLimit, - int cpuVcores) { + int cpuVcores, int gpuGcores) { this.containerId = containerId; this.pid = pid; this.pTree = pTree; this.vmemLimit = vmemLimit; this.pmemLimit = pmemLimit; this.cpuVcores = cpuVcores; + this.gpuGcores = gpuGcores; } public ContainerId getContainerId() { @@ -275,6 +281,10 @@ public class ContainersMonitorImpl extends AbstractService implements public int getCpuVcores() { return this.cpuVcores; } + + public int getGpuGcores() { + return this.gpuGcores; + } } @@ -419,10 +429,11 @@ public class ContainersMonitorImpl extends AbstractService implements ContainerMetrics usageMetrics = ContainerMetrics .forContainer(containerId, containerMetricsPeriodMs); int cpuVcores = ptInfo.getCpuVcores(); + int gpuGcores = ptInfo.getGpuGcores(); final int vmemLimit = (int) (ptInfo.getVmemLimit() >> 20); final int pmemLimit = (int) (ptInfo.getPmemLimit() >> 20); usageMetrics.recordResourceLimit( - vmemLimit, pmemLimit, cpuVcores); + vmemLimit, pmemLimit, cpuVcores, gpuGcores); usageMetrics.recordProcessId(pId); } } @@ -592,6 +603,10 @@ public class ContainersMonitorImpl extends AbstractService implements return this.maxVCoresAllottedForContainers; } + @Override + public long getGCoresAllocatedForContainers() { + return this.maxGCoresAllottedForContainers; + } /** * Is the total virtual memory check enabled? * @@ -618,7 +633,7 @@ public class ContainersMonitorImpl extends AbstractService implements ProcessTreeInfo processTreeInfo = new ProcessTreeInfo(containerId, null, null, startEvent.getVmemLimit(), startEvent.getPmemLimit(), - startEvent.getCpuVcores()); + startEvent.getCpuVcores(), startEvent.getGpuGcores()); this.containersToBeAdded.put(containerId, processTreeInfo); } break; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 3615fee..bf40f10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -46,6 +46,9 @@ public class NodeManagerMetrics { @Metric("Current allocated Virtual Cores") MutableGaugeInt allocatedVCores; @Metric MutableGaugeInt availableVCores; + @Metric("Current allocated GPU Cores") + MutableGaugeInt allocatedGCores; + @Metric MutableGaugeInt availableGCores; @Metric("Container launch duration") MutableRate containerLaunchDuration; @@ -103,6 +106,8 @@ public class NodeManagerMetrics { availableGB.set((int)Math.floor(availableMB/1024d)); allocatedVCores.incr(res.getVirtualCores()); availableVCores.decr(res.getVirtualCores()); + allocatedGCores.incr(res.getGpuCores()); + availableGCores.decr(res.getGpuCores()); } public void releaseContainer(Resource res) { @@ -113,12 +118,15 @@ public class NodeManagerMetrics { availableGB.set((int)Math.floor(availableMB/1024d)); allocatedVCores.decr(res.getVirtualCores()); availableVCores.incr(res.getVirtualCores()); + allocatedGCores.decr(res.getGpuCores()); + availableGCores.incr(res.getGpuCores()); } public void addResource(Resource res) { availableMB = availableMB + res.getMemory(); availableGB.incr((int)Math.floor(availableMB/1024d)); availableVCores.incr(res.getVirtualCores()); + availableGCores.incr(res.getGpuCores()); } public void addContainerLaunchDuration(long value) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java index 92c4187..89889d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java @@ -74,7 +73,9 @@ public class NodePage extends NMView { ._("Pmem enforcement enabled", info.isPmemCheckEnabled()) ._("Total VCores allocated for Containers", - String.valueOf(info.getTotalVCoresAllocated())) + String.valueOf(info.getTotalVCoresAllocated())) + ._("Total GCores allocated for Containers", + String.valueOf(info.getTotalGCoresAllocated())) ._("NodeHealthyStatus", info.getHealthStatus()) ._("LastNodeHealthTime", new Date( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java index 5d67c9e..763a471 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java @@ -37,6 +37,7 @@ public class NodeInfo { protected long totalVmemAllocatedContainersMB; protected long totalPmemAllocatedContainersMB; protected long totalVCoresAllocatedContainers; + protected long totalGCoresAllocatedContainers; protected boolean vmemCheckEnabled; protected boolean pmemCheckEnabled; protected long lastNodeUpdateTime; @@ -65,6 +66,8 @@ public class NodeInfo { this.pmemCheckEnabled = resourceView.isPmemCheckEnabled(); this.totalVCoresAllocatedContainers = resourceView .getVCoresAllocatedForContainers(); + this.totalGCoresAllocatedContainers = resourceView + .getGCoresAllocatedForContainers(); this.nodeHealthy = context.getNodeHealthStatus().getIsNodeHealthy(); this.lastNodeUpdateTime = context.getNodeHealthStatus() .getLastHealthReportTime(); @@ -131,6 +134,10 @@ public class NodeInfo { return this.totalVCoresAllocatedContainers; } + public long getTotalGCoresAllocated() { + return this.totalGCoresAllocatedContainers; + } + public boolean isVmemCheckEnabled() { return this.vmemCheckEnabled; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 71a420e..7385e1f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -234,7 +234,7 @@ public class TestNodeStatusUpdater { ContainerId.newContainerId(appAttemptID, heartBeatID); ContainerLaunchContext launchContext = recordFactory .newRecordInstance(ContainerLaunchContext.class); - Resource resource = BuilderUtils.newResource(2, 1); + Resource resource = BuilderUtils.newResource(2, 1, 1); long currentTime = System.currentTimeMillis(); String user = "testUser"; ContainerTokenIdentifier containerToken = BuilderUtils @@ -266,7 +266,7 @@ public class TestNodeStatusUpdater { .newRecordInstance(ContainerLaunchContext.class); long currentTime = System.currentTimeMillis(); String user = "testUser"; - Resource resource = BuilderUtils.newResource(3, 1); + Resource resource = BuilderUtils.newResource(3, 1, 2); ContainerTokenIdentifier containerToken = BuilderUtils .newContainerTokenIdentifier(BuilderUtils.newContainerToken( secondContainerID, InetAddress.getByName("localhost") diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java index c628648..27a547c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java @@ -26,7 +26,6 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.junit.Test; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.doReturn; @@ -87,9 +86,10 @@ public class TestContainerMetrics { int anyPmemLimit = 1024; int anyVmemLimit = 2048; int anyVcores = 10; + int anyGcores = 10; String anyProcessId = "1234"; - metrics.recordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores); + metrics.recordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores, anyGcores); metrics.recordProcessId(anyProcessId); Thread.sleep(110); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index 4dc4648..6bb6d82 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -35,9 +35,11 @@ public class TestNodeManagerMetrics { Resource total = Records.newRecord(Resource.class); total.setMemory(8*GiB); total.setVirtualCores(16); + total.setGpuCores(16); Resource resource = Records.newRecord(Resource.class); resource.setMemory(512); //512MiB resource.setVirtualCores(2); + resource.setGpuCores(1); metrics.addResource(total); @@ -73,13 +75,13 @@ public class TestNodeManagerMetrics { // while allocatedGB is expected to be ceiled. // allocatedGB: 3.5GB allocated memory is shown as 4GB // availableGB: 4.5GB available memory is shown as 4GB - checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 14, 2); + checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 14, 2, 7, 9); } private void checkMetrics(int launched, int completed, int failed, int killed, int initing, int running, int allocatedGB, int allocatedContainers, int availableGB, int allocatedVCores, - int availableVCores) { + int availableVCores, int allocatedGCores, int availableGCores) { MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics"); assertCounter("ContainersLaunched", launched, rb); assertCounter("ContainersCompleted", completed, rb); @@ -89,9 +91,11 @@ public class TestNodeManagerMetrics { assertGauge("ContainersRunning", running, rb); assertGauge("AllocatedGB", allocatedGB, rb); assertGauge("AllocatedVCores", allocatedVCores, rb); + assertGauge("AllocatedGCores", allocatedGCores, rb); assertGauge("AllocatedContainers", allocatedContainers, rb); assertGauge("AvailableGB", availableGB, rb); assertGauge("AvailableVCores",availableVCores, rb); + assertGauge("AvailableGCores", availableGCores, rb); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index a7006e0..5d72a3e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -95,6 +95,10 @@ public class TestNMWebServer { return 0; } @Override + public long getGCoresAllocatedForContainers() { + return 0; + } + @Override public boolean isVmemCheckEnabled() { return true; } @@ -158,6 +162,10 @@ public class TestNMWebServer { return 0; } @Override + public long getGCoresAllocatedForContainers() { + return 0; + } + @Override public boolean isVmemCheckEnabled() { return true; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 7caad4a..1528c2b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -128,6 +128,10 @@ public long getVCoresAllocatedForContainers() { return new Long("4000"); } @Override + public long getGCoresAllocatedForContainers() { + return new Long("4000"); + } + @Override public boolean isVmemCheckEnabled() { return true; } @@ -381,6 +385,7 @@ public void verifyNodesXML(NodeList nodes) throws JSONException, Exception { "totalPmemAllocatedContainersMB"), WebServicesTestUtils.getXmlLong(element, "totalVCoresAllocatedContainers"), + WebServicesTestUtils.getXmlLong(element, "totalGCoresAllocatedContainers"), WebServicesTestUtils.getXmlBoolean(element, "vmemCheckEnabled"), WebServicesTestUtils.getXmlBoolean(element, "pmemCheckEnabled"), WebServicesTestUtils.getXmlLong(element, "lastNodeUpdateTime"), @@ -399,11 +404,12 @@ public void verifyNodesXML(NodeList nodes) throws JSONException, Exception { public void verifyNodeInfo(JSONObject json) throws JSONException, Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject info = json.getJSONObject("nodeInfo"); - assertEquals("incorrect number of elements", 16, info.length()); + assertEquals("incorrect number of elements", 17, info.length()); verifyNodeInfoGeneric(info.getString("id"), info.getString("healthReport"), info.getLong("totalVmemAllocatedContainersMB"), info.getLong("totalPmemAllocatedContainersMB"), info.getLong("totalVCoresAllocatedContainers"), + info.getLong("totalGCoresAllocatedContainers"), info.getBoolean("vmemCheckEnabled"), info.getBoolean("pmemCheckEnabled"), info.getLong("lastNodeUpdateTime"), info.getBoolean("nodeHealthy"), @@ -417,7 +423,7 @@ public void verifyNodeInfo(JSONObject json) throws JSONException, Exception { public void verifyNodeInfoGeneric(String id, String healthReport, long totalVmemAllocatedContainersMB, long totalPmemAllocatedContainersMB, - long totalVCoresAllocatedContainers, + long totalVCoresAllocatedContainers, long totalGCoresAllocatedContainers, boolean vmemCheckEnabled, boolean pmemCheckEnabled, long lastNodeUpdateTime, Boolean nodeHealthy, String nodeHostName, String hadoopVersionBuiltOn, String hadoopBuildVersion, @@ -433,6 +439,7 @@ public void verifyNodeInfoGeneric(String id, String healthReport, totalPmemAllocatedContainersMB); assertEquals("totalVCoresAllocatedContainers incorrect", 4000, totalVCoresAllocatedContainers); + assertEquals("totalGCoresAllocatedContainers incorrect", 4000, totalGCoresAllocatedContainers); assertEquals("vmemCheckEnabled incorrect", true, vmemCheckEnabled); assertEquals("pmemCheckEnabled incorrect", true, pmemCheckEnabled); assertTrue("lastNodeUpdateTime incorrect", lastNodeUpdateTime == nmContext diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java index 3e7aac8..284e879 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java @@ -121,6 +117,10 @@ public class TestNMWebServicesApps extends JerseyTestBase { return new Long("4000"); } + @Override + public long getGCoresAllocatedForContainers() { + return new Long("4000"); + } @Override public boolean isVmemCheckEnabled() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index ceb1d57..0ff61bc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -112,6 +108,11 @@ public class TestNMWebServicesContainers extends JerseyTestBase { } @Override + public long getGCoresAllocatedForContainers() { + return new Long("4000"); + } + + @Override public boolean isVmemCheckEnabled() { return true; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 6cdf104..3839ec0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -550,7 +549,7 @@ public class ApplicationMasterService extends AbstractService implements for(RMNode rmNode: updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.getNodeReport(rmNode.getNodeID()); - Resource used = BuilderUtils.newResource(0, 0); + Resource used = BuilderUtils.newResource(0, 0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.getUsedResource(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 9a2bb24..5033e16 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -878,7 +877,7 @@ public class ClientRMService extends AbstractService implements private NodeReport createNodeReports(RMNode rmNode) { SchedulerNodeReport schedulerNodeReport = scheduler.getNodeReport(rmNode.getNodeID()); - Resource used = BuilderUtils.newResource(0, 0); + Resource used = BuilderUtils.newResource(0, 0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.getUsedResource(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 17fbd73..eadaf1d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -170,6 +169,7 @@ public class RMAppManager implements EventHandler, .add("finalStatus", app.getFinalApplicationStatus()) .add("memorySeconds", metrics.getMemorySeconds()) .add("vcoreSeconds", metrics.getVcoreSeconds()) + .add("gcoreSeconds", metrics.getGcoreSeconds()) .add("preemptedAMContainers", metrics.getNumAMContainersPreempted()) .add("preemptedNonAMContainers", metrics.getNumNonAMContainersPreempted()) .add("preemptedResources", metrics.getResourcePreempted()) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index fd17153..b9cb208 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -261,8 +261,8 @@ public class RMServerUtils { public static final ApplicationResourceUsageReport DUMMY_APPLICATION_RESOURCE_USAGE_REPORT = BuilderUtils.newApplicationResourceUsageReport(-1, -1, - Resources.createResource(-1, -1), Resources.createResource(-1, -1), - Resources.createResource(-1, -1), 0, 0); + Resources.createResource(-1, -1, -1), Resources.createResource(-1, -1, -1), + Resources.createResource(-1, -1, -1), 0, 0, 0); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 0de556b..4ab82a2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -99,6 +98,7 @@ public class ResourceTrackerService extends AbstractService implements private int minAllocMb; private int minAllocVcores; + private int minAllocGcores; static { resync.setNodeAction(NodeAction.RESYNC); @@ -144,6 +144,9 @@ public class ResourceTrackerService extends AbstractService implements minAllocVcores = conf.getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + minAllocGcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); minimumNodeManagerVersion = conf.get( YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION, @@ -285,7 +288,8 @@ public class ResourceTrackerService extends AbstractService implements // Check if this node has minimum allocations if (capability.getMemory() < minAllocMb - || capability.getVirtualCores() < minAllocVcores) { + || capability.getVirtualCores() < minAllocVcores + || capability.getGpuCores() < minAllocGcores) { String message = "NodeManager from " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java index b849b00..a5007ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java @@ -283,7 +283,9 @@ public class SystemMetricsPublisher extends CompositeService { appMetrics.getVcoreSeconds()); entity.addOtherInfo(ApplicationMetricsConstants.APP_MEM_METRICS, appMetrics.getMemorySeconds()); - + entity.addOtherInfo(ApplicationMetricsConstants.APP_GPU_METRICS, + appMetrics.getGcoreSeconds()); + tEvent.setEventInfo(eventInfo); entity.addEvent(tEvent); putEntity(entity); @@ -382,6 +384,8 @@ public class SystemMetricsPublisher extends CompositeService { event.getAllocatedResource().getMemory()); entityInfo.put(ContainerMetricsConstants.ALLOCATED_VCORE_ENTITY_INFO, event.getAllocatedResource().getVirtualCores()); + entityInfo.put(ContainerMetricsConstants.ALLOCATED_GCORE_ENTITY_INFO, + event.getAllocatedResource().getGpuCores()); entityInfo.put(ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO, event.getAllocatedNode().getHost()); entityInfo.put(ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java index 87a2a00..72e3dfa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java @@ -360,7 +358,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic // based on ideal assignment computed above and current assignment we derive // how much preemption is required overall - Resource totPreemptionNeeded = Resource.newInstance(0, 0); + Resource totPreemptionNeeded = Resource.newInstance(0, 0, 0); for (TempQueue t:queues) { if (Resources.greaterThan(rc, tot_guarant, t.current, t.idealAssigned)) { Resources.addTo(totPreemptionNeeded, @@ -431,7 +429,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic //assign all cluster resources until no more demand, or no resources are left while (!orderedByNeed.isEmpty() && Resources.greaterThan(rc,tot_guarant, unassigned,Resources.none())) { - Resource wQassigned = Resource.newInstance(0, 0); + Resource wQassigned = Resource.newInstance(0, 0, 0); // we compute normalizedGuarantees capacity based on currently active // queues resetCapacity(rc, unassigned, orderedByNeed, ignoreGuarantee); @@ -447,7 +445,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic for (Iterator i = underserved.iterator(); i.hasNext();) { TempQueue sub = i.next(); Resource wQavail = Resources.multiplyAndNormalizeUp(rc, - unassigned, sub.normalizedGuarantee, Resource.newInstance(1, 1)); + unassigned, sub.normalizedGuarantee, Resource.newInstance(1, 1, 0)); Resource wQidle = sub.offer(wQavail, rc, tot_guarant); Resource wQdone = Resources.subtract(wQavail, wQidle); @@ -492,7 +490,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic */ private void resetCapacity(ResourceCalculator rc, Resource clusterResource, Collection queues, boolean ignoreGuar) { - Resource activeCap = Resource.newInstance(0, 0); + Resource activeCap = Resource.newInstance(0, 0, 0); if (ignoreGuar) { for (TempQueue q : queues) { @@ -530,7 +528,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic if (qT.preemptionDisabled && qT.leafQueue != null) { if (LOG.isDebugEnabled()) { if (Resources.greaterThan(rc, clusterResource, - qT.toBePreempted, Resource.newInstance(0, 0))) { + qT.toBePreempted, Resource.newInstance(0, 0, 0))) { LOG.debug("Tried to preempt the following " + "resources from non-preemptable queue: " + qT.queueName + " - Resources: " + qT.toBePreempted); @@ -546,7 +544,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic // accounts for natural termination of containers Resource resToObtain = Resources.multiply(qT.toBePreempted, naturalTerminationFactor); - Resource skippedAMSize = Resource.newInstance(0, 0); + Resource skippedAMSize = Resource.newInstance(0, 0, 0); // lock the leafqueue while we scan applications and unreserve synchronized (qT.leafQueue) { @@ -751,7 +749,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic Resource guaranteed = Resources.multiply(clusterResources, absCap); Resource maxCapacity = Resources.multiply(clusterResources, absMaxCap); - Resource extra = Resource.newInstance(0, 0); + Resource extra = Resource.newInstance(0, 0, 0); if (Resources.greaterThan(rc, clusterResources, current, guaranteed)) { extra = Resources.subtract(current, guaranteed); } @@ -767,10 +765,10 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic } ret.setLeafQueue(l); } else { - Resource pending = Resource.newInstance(0, 0); + Resource pending = Resource.newInstance(0, 0, 0); ret = new TempQueue(root.getQueueName(), current, pending, guaranteed, maxCapacity, false); - Resource childrensPreemptable = Resource.newInstance(0, 0); + Resource childrensPreemptable = Resource.newInstance(0, 0, 0); for (CSQueue c : root.getChildQueues()) { TempQueue subq = cloneQueues(c, clusterResources); Resources.addTo(childrensPreemptable, subq.preemptableExtra); @@ -779,7 +777,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic // untouchableExtra = max(extra - childrenPreemptable, 0) if (Resources.greaterThanOrEqual( rc, clusterResources, childrensPreemptable, extra)) { - ret.untouchableExtra = Resource.newInstance(0, 0); + ret.untouchableExtra = Resource.newInstance(0, 0, 0); } else { ret.untouchableExtra = Resources.subtractFrom(extra, childrensPreemptable); @@ -837,13 +835,13 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic this.pending = pending; this.guaranteed = guaranteed; this.maxCapacity = maxCapacity; - this.idealAssigned = Resource.newInstance(0, 0); - this.actuallyPreempted = Resource.newInstance(0, 0); - this.toBePreempted = Resource.newInstance(0, 0); + this.idealAssigned = Resource.newInstance(0, 0, 0); + this.actuallyPreempted = Resource.newInstance(0, 0, 0); + this.toBePreempted = Resource.newInstance(0, 0, 0); this.normalizedGuarantee = Float.NaN; this.children = new ArrayList(); - this.untouchableExtra = Resource.newInstance(0, 0); - this.preemptableExtra = Resource.newInstance(0, 0); + this.untouchableExtra = Resource.newInstance(0, 0, 0); + this.preemptableExtra = Resource.newInstance(0, 0, 0); this.preemptionDisabled = preemptionDisabled; } @@ -878,7 +876,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic Resource clusterResource) { Resource absMaxCapIdealAssignedDelta = Resources.componentwiseMax( Resources.subtract(maxCapacity, idealAssigned), - Resource.newInstance(0, 0)); + Resource.newInstance(0, 0, 0)); // remain = avail - min(avail, (max - assigned), (current + pending - assigned)) Resource accepted = Resources.min(rc, clusterResource, @@ -921,7 +919,7 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic toBePreempted = Resources.multiply( Resources.subtract(current, idealAssigned), scalingFactor); } else { - toBePreempted = Resource.newInstance(0, 0); + toBePreempted = Resource.newInstance(0, 0, 0); } } @@ -929,16 +927,22 @@ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolic sb.append(queueName).append(", ") .append(current.getMemory()).append(", ") .append(current.getVirtualCores()).append(", ") + .append(current.getGpuCores()).append(", ") .append(pending.getMemory()).append(", ") .append(pending.getVirtualCores()).append(", ") + .append(pending.getGpuCores()).append(", ") .append(guaranteed.getMemory()).append(", ") .append(guaranteed.getVirtualCores()).append(", ") + .append(guaranteed.getGpuCores()).append(", ") .append(idealAssigned.getMemory()).append(", ") .append(idealAssigned.getVirtualCores()).append(", ") + .append(idealAssigned.getGpuCores()).append(", ") .append(toBePreempted.getMemory()).append(", ") .append(toBePreempted.getVirtualCores() ).append(", ") + .append(toBePreempted.getGpuCores()).append(", ") .append(actuallyPreempted.getMemory()).append(", ") - .append(actuallyPreempted.getVirtualCores()); + .append(actuallyPreempted.getVirtualCores()).append(", ") + .append(actuallyPreempted.getGpuCores()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java index e5abdc9..f4ea150 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java @@ -53,7 +52,7 @@ public class RMNodeLabelsManager extends CommonNodeLabelsManager { protected Queue() { acccessibleNodeLabels = Collections.newSetFromMap(new ConcurrentHashMap()); - resource = Resource.newInstance(0, 0); + resource = Resource.newInstance(0, 0, 0); } } @@ -243,7 +242,7 @@ public class RMNodeLabelsManager extends CommonNodeLabelsManager { } else { // set nm is not running, and its resource = 0 nm.running = false; - nm.resource = Resource.newInstance(0, 0); + nm.resource = Resource.newInstance(0, 0, 0); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index bccde53..7a8df3f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -640,7 +639,7 @@ public abstract class RMStateStore extends AbstractService { appAttempt.getMasterContainer(), credentials, appAttempt.getStartTime(), resUsage.getMemorySeconds(), - resUsage.getVcoreSeconds()); + resUsage.getVcoreSeconds(),resUsage.getGcoreSeconds()); dispatcher.getEventHandler().handle( new RMStateStoreAppAttemptEvent(attemptState)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java index 391783b..eeca77a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java @@ -40,7 +40,7 @@ public abstract class ApplicationAttemptStateData { Credentials attemptTokens, long startTime, RMAppAttemptState finalState, String finalTrackingUrl, String diagnostics, FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus, - long finishTime, long memorySeconds, long vcoreSeconds) { + long finishTime, long memorySeconds, long vcoreSeconds, long gcoreSeconds) { ApplicationAttemptStateData attemptStateData = Records.newRecord(ApplicationAttemptStateData.class); attemptStateData.setAttemptId(attemptId); @@ -55,16 +55,17 @@ public abstract class ApplicationAttemptStateData { attemptStateData.setFinishTime(finishTime); attemptStateData.setMemorySeconds(memorySeconds); attemptStateData.setVcoreSeconds(vcoreSeconds); + attemptStateData.setGcoreSeconds(gcoreSeconds); return attemptStateData; } public static ApplicationAttemptStateData newInstance( ApplicationAttemptId attemptId, Container masterContainer, Credentials attemptTokens, long startTime, long memorySeconds, - long vcoreSeconds) { + long vcoreSeconds, long gcoreSeconds) { return newInstance(attemptId, masterContainer, attemptTokens, startTime, null, "N/A", "", null, ContainerExitStatus.INVALID, 0, - memorySeconds, vcoreSeconds); + memorySeconds, vcoreSeconds, gcoreSeconds); } @@ -182,4 +183,12 @@ public abstract class ApplicationAttemptStateData { @Public @Unstable public abstract void setVcoreSeconds(long vcoreSeconds); + + @Public + @Unstable + public abstract long getGcoreSeconds(); + + @Public + @Unstable + public abstract void setGcoreSeconds(long gcoreSeconds); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java index bae3f9c..bb5f146 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java @@ -251,6 +251,12 @@ public class ApplicationAttemptStateDataPBImpl extends } @Override + public long getGcoreSeconds() { + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + return p.getGcoreSeconds(); + } + + @Override public void setMemorySeconds(long memorySeconds) { maybeInitBuilder(); builder.setMemorySeconds(memorySeconds); @@ -263,6 +269,12 @@ public class ApplicationAttemptStateDataPBImpl extends } @Override + public void setGcoreSeconds(long gcoreSeconds) { + maybeInitBuilder(); + builder.setGcoreSeconds(gcoreSeconds); + } + + @Override public FinalApplicationStatus getFinalApplicationStatus() { ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; if (!p.hasFinalApplicationStatus()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/AbstractSchedulerPlanFollower.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/AbstractSchedulerPlanFollower.java index ea7f27d..d98e55f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/AbstractSchedulerPlanFollower.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/AbstractSchedulerPlanFollower.java @@ -92,7 +92,7 @@ public abstract class AbstractSchedulerPlanFollower implements PlanFollower { Set currentReservations = plan.getReservationsAtTime(now); Set curReservationNames = new HashSet(); - Resource reservedResources = Resource.newInstance(0, 0); + Resource reservedResources = Resource.newInstance(0, 0, 0); int numRes = getReservedResources(now, currentReservations, curReservationNames, reservedResources); @@ -161,7 +161,7 @@ public abstract class AbstractSchedulerPlanFollower implements PlanFollower { Resource capToAssign = res.getResourcesAtTime(now); float targetCapacity = 0f; if (planResources.getMemory() > 0 - && planResources.getVirtualCores() > 0) { + && planResources.getVirtualCores() > 0 && planResources.getGpuCores() > 0) { targetCapacity = calculateReservationToPlanRatio(clusterResources, planResources, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java index afba7ea..eef807d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java @@ -100,7 +100,7 @@ public class CapacityOverTimePolicy implements SharingPolicy { // define variable that will store integral of resources (need diff class to // avoid overflow issues for long/large allocations) - IntegralResource runningTot = new IntegralResource(0L, 0L); + IntegralResource runningTot = new IntegralResource(0L, 0L, 0L); IntegralResource maxAllowed = new IntegralResource(maxAvgRes); maxAllowed.multiplyBy(validWindow / step); @@ -205,43 +205,52 @@ public class CapacityOverTimePolicy implements SharingPolicy { private static class IntegralResource { long memory; long vcores; + long gcores; public IntegralResource(Resource resource) { this.memory = resource.getMemory(); this.vcores = resource.getVirtualCores(); + this.gcores = resource.getGpuCores(); } - public IntegralResource(long mem, long vcores) { + public IntegralResource(long mem, long vcores, long gcores) { this.memory = mem; this.vcores = vcores; + this.gcores = gcores; } public void add(Resource r) { memory += r.getMemory(); vcores += r.getVirtualCores(); + gcores += r.getGpuCores(); } public void subtract(Resource r) { memory -= r.getMemory(); vcores -= r.getVirtualCores(); + gcores -= r.getGpuCores(); } public void multiplyBy(long window) { memory = memory * window; vcores = vcores * window; + gcores = gcores * window; } public long compareTo(IntegralResource other) { long diff = memory - other.memory; if (diff == 0) { diff = vcores - other.vcores; + if (diff == 0) { + diff = gcores - other.gcores; + } } return diff; } @Override public String toString() { - return ""; + return ""; } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/GreedyReservationAgent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/GreedyReservationAgent.java index 5a61b94..09176ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/GreedyReservationAgent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/GreedyReservationAgent.java @@ -179,7 +179,7 @@ public class GreedyReservationAgent implements ReservationAgent { // create reservation with above allocations if not null/empty ReservationRequest ZERO_RES = - ReservationRequest.newInstance(Resource.newInstance(0, 0), 0); + ReservationRequest.newInstance(Resource.newInstance(0, 0, 0), 0); long firstStartTime = findEarliestTime(allocations.keySet()); @@ -285,7 +285,7 @@ public class GreedyReservationAgent implements ReservationAgent { // As we run along we will logically remove the previous allocation for // this reservation // if one existed - Resource oldResCap = Resource.newInstance(0, 0); + Resource oldResCap = Resource.newInstance(0, 0, 0); if (oldResAllocation != null) { oldResCap = oldResAllocation.getResourcesAtTime(t); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryPlan.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryPlan.java index ce2e7d7..ec33b4a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryPlan.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryPlan.java @@ -46,7 +46,7 @@ private static final Logger LOG = LoggerFactory.getLogger(InMemoryPlan.class); - private static final Resource ZERO_RESOURCE = Resource.newInstance(0, 0); + private static final Resource ZERO_RESOURCE = Resource.newInstance(0, 0, 0); private TreeMap> currentReservations = new TreeMap>(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryReservationAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryReservationAllocation.java index fc8407b..6b616b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryReservationAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/InMemoryReservationAllocation.java @@ -121,7 +121,7 @@ class InMemoryReservationAllocation implements ReservationAllocation { @Override public Resource getResourcesAtTime(long tick) { if (tick < startTime || tick >= endTime) { - return Resource.newInstance(0, 0); + return Resource.newInstance(0, 0, 0); } return Resources.clone(resourcesOverTime.getCapacityAtTime(tick)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java index f87e9dc..1ed3a29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java @@ -59,7 +59,7 @@ public class NoOverCommitPolicy implements SharingPolicy { for (long t = startTime; t < endTime; t += step) { Resource currExistingAllocTot = plan.getTotalCommittedResources(t); Resource currNewAlloc = reservation.getResourcesAtTime(t); - Resource currOldAlloc = Resource.newInstance(0, 0); + Resource currOldAlloc = Resource.newInstance(0, 0, 0); if (oldReservation != null) { oldReservation.getResourcesAtTime(t); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/RLESparseResourceAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/RLESparseResourceAllocation.java index 3f6f405..dd89b47 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/RLESparseResourceAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/RLESparseResourceAllocation.java @@ -46,7 +45,7 @@ import com.google.gson.stream.JsonWriter; public class RLESparseResourceAllocation { private static final int THRESHOLD = 100; - private static final Resource ZERO_RESOURCE = Resource.newInstance(0, 0); + private static final Resource ZERO_RESOURCE = Resource.newInstance(0, 0, 0); private TreeMap cumulativeCapacity = new TreeMap(); @@ -98,7 +97,7 @@ public class RLESparseResourceAllocation { NavigableMap ticks = cumulativeCapacity.headMap(endKey, false); if (ticks != null && !ticks.isEmpty()) { - Resource updatedCapacity = Resource.newInstance(0, 0); + Resource updatedCapacity = Resource.newInstance(0, 0, 0); Entry lowEntry = ticks.floorEntry(startKey); if (lowEntry == null) { // This is the earliest starting interval @@ -155,7 +154,7 @@ public class RLESparseResourceAllocation { List ReservationRequests, Resource clusterResource) { ReservationRequest aggregateReservationRequest = Records.newRecord(ReservationRequest.class); - Resource capacity = Resource.newInstance(0, 0); + Resource capacity = Resource.newInstance(0, 0, 0); for (ReservationRequest ReservationRequest : ReservationRequests) { Resources.addTo(capacity, Resources.multiply( ReservationRequest.getCapability(), @@ -194,7 +193,7 @@ public class RLESparseResourceAllocation { // Decrease all the capacities of overlapping intervals SortedMap overlapSet = ticks.tailMap(startKey); if (overlapSet != null && !overlapSet.isEmpty()) { - Resource updatedCapacity = Resource.newInstance(0, 0); + Resource updatedCapacity = Resource.newInstance(0, 0, 0); long currentKey = -1; for (Iterator> overlapEntries = overlapSet.entrySet().iterator(); overlapEntries.hasNext();) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationInputValidator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationInputValidator.java index fb0831a..379e525 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationInputValidator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationInputValidator.java @@ -125,7 +125,7 @@ public class ReservationInputValidator { } // compute minimum duration and max gang size long minDuration = 0; - Resource maxGangSize = Resource.newInstance(0, 0); + Resource maxGangSize = Resource.newInstance(0, 0, 0); ReservationRequestInterpreter type = contract.getReservationRequests().getInterpreter(); for (ReservationRequest rr : resReq) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/SimpleCapacityReplanner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/SimpleCapacityReplanner.java index b5a6a99..e6c4310 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/SimpleCapacityReplanner.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/SimpleCapacityReplanner.java @@ -48,7 +47,7 @@ public class SimpleCapacityReplanner implements Planner { private static final Log LOG = LogFactory .getLog(SimpleCapacityReplanner.class); - private static final Resource ZERO_RESOURCE = Resource.newInstance(0, 0); + private static final Resource ZERO_RESOURCE = Resource.newInstance(0, 0, 0); private final Clock clock; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceType.java index 9dd245b..eae1012 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceType.java @@ -24,5 +24,5 @@ import org.apache.hadoop.classification.InterfaceStability.Evolving; @Private @Evolving public enum ResourceType { - MEMORY, CPU + MEMORY, CPU, GPU } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 2d1737a..5460ee3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -604,6 +603,7 @@ public class RMAppImpl implements RMApp, Recoverable { RMAppMetrics rmAppMetrics = getRMAppMetrics(); appUsageReport.setMemorySeconds(rmAppMetrics.getMemorySeconds()); appUsageReport.setVcoreSeconds(rmAppMetrics.getVcoreSeconds()); + appUsageReport.setGcoreSeconds(rmAppMetrics.getGcoreSeconds()); } if (currentApplicationAttemptId == null) { @@ -1301,11 +1301,12 @@ public class RMAppImpl implements RMApp, Recoverable { @Override public RMAppMetrics getRMAppMetrics() { - Resource resourcePreempted = Resource.newInstance(0, 0); + Resource resourcePreempted = Resource.newInstance(0, 0, 0); int numAMContainerPreempted = 0; int numNonAMContainerPreempted = 0; long memorySeconds = 0; long vcoreSeconds = 0; + long gcoreSeconds = 0; for (RMAppAttempt attempt : attempts.values()) { if (null != attempt) { RMAppAttemptMetrics attemptMetrics = @@ -1321,12 +1322,13 @@ public class RMAppImpl implements RMApp, Recoverable { attempt.getRMAppAttemptMetrics().getAggregateAppResourceUsage(); memorySeconds += resUsage.getMemorySeconds(); vcoreSeconds += resUsage.getVcoreSeconds(); + gcoreSeconds += resUsage.getGcoreSeconds(); } } return new RMAppMetrics(resourcePreempted, numNonAMContainerPreempted, numAMContainerPreempted, - memorySeconds, vcoreSeconds); + memorySeconds, vcoreSeconds, gcoreSeconds); } @Private diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java index 5091470..b3576dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java @@ -26,15 +26,17 @@ public class RMAppMetrics { final int numAMContainersPreempted; final long memorySeconds; final long vcoreSeconds; + final long gcoreSeconds; public RMAppMetrics(Resource resourcePreempted, int numNonAMContainersPreempted, int numAMContainersPreempted, - long memorySeconds, long vcoreSeconds) { + long memorySeconds, long vcoreSeconds, long gcoreSeconds) { this.resourcePreempted = resourcePreempted; this.numNonAMContainersPreempted = numNonAMContainersPreempted; this.numAMContainersPreempted = numAMContainersPreempted; this.memorySeconds = memorySeconds; this.vcoreSeconds = vcoreSeconds; + this.gcoreSeconds = gcoreSeconds; } public Resource getResourcePreempted() { @@ -56,4 +58,9 @@ public class RMAppMetrics { public long getVcoreSeconds() { return vcoreSeconds; } + + public long getGcoreSeconds() { + return gcoreSeconds; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java index f0c2b34..3623acf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java @@ -24,10 +24,12 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; public class AggregateAppResourceUsage { long memorySeconds; long vcoreSeconds; + long gcoreSeconds; - public AggregateAppResourceUsage(long memorySeconds, long vcoreSeconds) { + public AggregateAppResourceUsage(long memorySeconds, long vcoreSeconds, long gcoreSeconds) { this.memorySeconds = memorySeconds; this.vcoreSeconds = vcoreSeconds; + this.gcoreSeconds = gcoreSeconds; } /** @@ -57,4 +59,18 @@ public class AggregateAppResourceUsage { public void setVcoreSeconds(long vcoreSeconds) { this.vcoreSeconds = vcoreSeconds; } + + /** + * @return the gcoreSeconds + */ + public long getGcoreSeconds() { + return gcoreSeconds; + } + + /** + * @param gcoreSeconds the gcoreSeconds to set + */ + public void setGcoreSeconds(long gcoreSeconds) { + this.gcoreSeconds = gcoreSeconds; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 230bbeb..59dbccd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -817,6 +814,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { this.attemptMetrics.getAggregateAppResourceUsage(); report.setMemorySeconds(resUsage.getMemorySeconds()); report.setVcoreSeconds(resUsage.getVcoreSeconds()); + report.setGcoreSeconds(resUsage.getGcoreSeconds()); return report; } finally { this.readLock.unlock(); @@ -848,7 +846,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { this.startTime = attemptState.getStartTime(); this.finishTime = attemptState.getFinishTime(); this.attemptMetrics.updateAggregateAppResourceUsage( - attemptState.getMemorySeconds(),attemptState.getVcoreSeconds()); + attemptState.getMemorySeconds(),attemptState.getVcoreSeconds(), attemptState.getGcoreSeconds()); } public void transferStateFromPreviousAttempt(RMAppAttempt attempt) { @@ -1165,7 +1163,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { startTime, stateToBeStored, finalTrackingUrl, diags, finalStatus, exitStatus, getFinishTime(), resUsage.getMemorySeconds(), - resUsage.getVcoreSeconds()); + resUsage.getVcoreSeconds(), resUsage.getGcoreSeconds()); LOG.info("Updating application attempt " + applicationAttemptId + " with final state: " + targetedFinalState + ", and exit status: " + exitStatus); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java index 0a3638b..e101b95 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java @@ -40,9 +40,9 @@ private ApplicationAttemptId attemptId = null; // preemption info - private Resource resourcePreempted = Resource.newInstance(0, 0); + private Resource resourcePreempted = Resource.newInstance(0, 0, 0); // application headroom - private volatile Resource applicationHeadroom = Resource.newInstance(0, 0); + private volatile Resource applicationHeadroom = Resource.newInstance(0, 0, 0); private AtomicInteger numNonAMContainersPreempted = new AtomicInteger(0); private AtomicBoolean isPreempted = new AtomicBoolean(false); @@ -50,6 +50,7 @@ public class RMAppAttemptMetrics { private WriteLock writeLock; private AtomicLong finishedMemorySeconds = new AtomicLong(0); private AtomicLong finishedVcoreSeconds = new AtomicLong(0); + private AtomicLong finishedGcoreSeconds = new AtomicLong(0); private RMContext rmContext; private int[][] localityStatistics = @@ -113,6 +114,7 @@ public class RMAppAttemptMetrics { public AggregateAppResourceUsage getAggregateAppResourceUsage() { long memorySeconds = finishedMemorySeconds.get(); long vcoreSeconds = finishedVcoreSeconds.get(); + long gcoreSeconds = finishedGcoreSeconds.get(); // Only add in the running containers if this is the active attempt. RMAppAttempt currentAttempt = rmContext.getRMApps() @@ -123,15 +125,17 @@ public class RMAppAttemptMetrics { if (appResUsageReport != null) { memorySeconds += appResUsageReport.getMemorySeconds(); vcoreSeconds += appResUsageReport.getVcoreSeconds(); + gcoreSeconds += appResUsageReport.getGcoreSeconds(); } } - return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds, gcoreSeconds); } public void updateAggregateAppResourceUsage(long finishedMemorySeconds, - long finishedVcoreSeconds) { + long finishedVcoreSeconds, long finishedGcoreSeconds) { this.finishedMemorySeconds.addAndGet(finishedMemorySeconds); this.finishedVcoreSeconds.addAndGet(finishedVcoreSeconds); + this.finishedGcoreSeconds.addAndGet(finishedGcoreSeconds); } public void incNumAllocatedContainers(NodeType containerType, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index 38a03ae..00b4418 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -526,8 +526,10 @@ public class RMContainerImpl implements RMContainer { * usedMillis / DateUtils.MILLIS_PER_SECOND; long vcoreSeconds = resource.getVirtualCores() * usedMillis / DateUtils.MILLIS_PER_SECOND; + long gcoreSeconds = resource.getGpuCores() + * usedMillis / DateUtils.MILLIS_PER_SECOND; rmAttempt.getRMAppAttemptMetrics() - .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds); + .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds, gcoreSeconds); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index e1f94cf..aa717bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -77,13 +77,14 @@ public abstract class AbstractYarnScheduler protected Map nodes = new ConcurrentHashMap(); // Whole capacity of the cluster - protected Resource clusterResource = Resource.newInstance(0, 0); + protected Resource clusterResource = Resource.newInstance(0, 0, 0); protected Resource minimumAllocation; private Resource maximumAllocation; private Resource configuredMaximumAllocation; private int maxNodeMemory = -1; private int maxNodeVCores = -1; + private int maxNodeGCores = -1; private final ReadLock maxAllocReadLock; private final WriteLock maxAllocWriteLock; @@ -601,6 +602,12 @@ public abstract class AbstractYarnScheduler maximumAllocation.setVirtualCores(Math.min( configuredMaximumAllocation.getVirtualCores(), maxNodeVCores)); } + int nodeGCores = totalResource.getGpuCores(); + if (nodeGCores > maxNodeGCores) { + maxNodeGCores = nodeGCores; + maximumAllocation.setGpuCores(Math.min( + configuredMaximumAllocation.getGpuCores(), maxNodeGCores)); + } } else { // removed node if (maxNodeMemory == totalResource.getMemory()) { maxNodeMemory = -1; @@ -608,9 +615,12 @@ public abstract class AbstractYarnScheduler if (maxNodeVCores == totalResource.getVirtualCores()) { maxNodeVCores = -1; } + if (maxNodeGCores == totalResource.getGpuCores()) { + maxNodeGCores = -1; + } // We only have to iterate through the nodes if the current max memory // or vcores was equal to the removed node's - if (maxNodeMemory == -1 || maxNodeVCores == -1) { + if (maxNodeMemory == -1 || maxNodeVCores == -1 || maxNodeGCores == -1) { for (Map.Entry nodeEntry : nodes.entrySet()) { int nodeMemory = nodeEntry.getValue().getTotalResource().getMemory(); @@ -622,6 +632,11 @@ public abstract class AbstractYarnScheduler if (nodeVCores > maxNodeVCores) { maxNodeVCores = nodeVCores; } + int nodeGCores = + nodeEntry.getValue().getTotalResource().getGpuCores(); + if (nodeGCores > maxNodeGCores) { + maxNodeGCores = nodeGCores; + } } if (maxNodeMemory == -1) { // no nodes maximumAllocation.setMemory(configuredMaximumAllocation.getMemory()); @@ -635,6 +650,12 @@ public abstract class AbstractYarnScheduler maximumAllocation.setVirtualCores( Math.min(configuredMaximumAllocation.getVirtualCores(), maxNodeVCores)); } + if (maxNodeGCores == -1) { // no nodes + maximumAllocation.setGpuCores(configuredMaximumAllocation.getGpuCores()); + } else { + maximumAllocation.setGpuCores( + Math.min(configuredMaximumAllocation.getGpuCores(), maxNodeGCores)); + } } } } finally { @@ -654,7 +675,11 @@ public abstract class AbstractYarnScheduler if (maxNodeVCores != -1) { maxVcores = Math.min(maxVcores, maxNodeVCores); } - maximumAllocation = Resources.createResource(maxMemory, maxVcores); + int maxGcores = newMaxAlloc.getGpuCores(); + if (maxNodeGCores != -1) { + maxGcores = Math.min(maxGcores, maxNodeGCores); + } + maximumAllocation = Resources.createResource(maxMemory, maxVcores, maxGcores); } finally { maxAllocWriteLock.unlock(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 507b798..dcd4189 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -61,16 +59,20 @@ public class QueueMetrics implements MetricsSource { @Metric("Allocated memory in MB") MutableGaugeInt allocatedMB; @Metric("Allocated CPU in virtual cores") MutableGaugeInt allocatedVCores; + @Metric("Allocated GPU in cores") MutableGaugeInt allocatedGCores; @Metric("# of allocated containers") MutableGaugeInt allocatedContainers; @Metric("Aggregate # of allocated containers") MutableCounterLong aggregateContainersAllocated; @Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased; @Metric("Available memory in MB") MutableGaugeInt availableMB; @Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores; + @Metric("Available GPU in cores") MutableGaugeInt availableGCores; @Metric("Pending memory allocation in MB") MutableGaugeInt pendingMB; @Metric("Pending CPU allocation in virtual cores") MutableGaugeInt pendingVCores; + @Metric("Pending GPU allocation in cores") MutableGaugeInt pendingGCores; @Metric("# of pending containers") MutableGaugeInt pendingContainers; @Metric("# of reserved memory in MB") MutableGaugeInt reservedMB; @Metric("Reserved CPU in virtual cores") MutableGaugeInt reservedVCores; + @Metric("Reserved GPU in cores") MutableGaugeInt reservedGCores; @Metric("# of reserved containers") MutableGaugeInt reservedContainers; @Metric("# of active users") MutableGaugeInt activeUsers; @Metric("# of active applications") MutableGaugeInt activeApplications; @@ -319,6 +321,7 @@ public class QueueMetrics implements MetricsSource { public void setAvailableResourcesToQueue(Resource limit) { availableMB.set(limit.getMemory()); availableVCores.set(limit.getVirtualCores()); + availableGCores.set(limit.getGpuCores()); } /** @@ -356,6 +359,7 @@ public class QueueMetrics implements MetricsSource { pendingContainers.incr(containers); pendingMB.incr(res.getMemory() * containers); pendingVCores.incr(res.getVirtualCores() * containers); + pendingGCores.incr(res.getGpuCores() * containers); } public void decrPendingResources(String user, int containers, Resource res) { @@ -373,6 +377,7 @@ public class QueueMetrics implements MetricsSource { pendingContainers.decr(containers); pendingMB.decr(res.getMemory() * containers); pendingVCores.decr(res.getVirtualCores() * containers); + pendingGCores.decr(res.getGpuCores() * containers); } public void allocateResources(String user, int containers, Resource res, @@ -381,6 +386,7 @@ public class QueueMetrics implements MetricsSource { aggregateContainersAllocated.incr(containers); allocatedMB.incr(res.getMemory() * containers); allocatedVCores.incr(res.getVirtualCores() * containers); + allocatedGCores.incr(res.getGpuCores() * containers); if (decrPending) { _decrPendingResources(containers, res); } @@ -398,6 +404,7 @@ public class QueueMetrics implements MetricsSource { aggregateContainersReleased.incr(containers); allocatedMB.decr(res.getMemory() * containers); allocatedVCores.decr(res.getVirtualCores() * containers); + allocatedGCores.decr(res.getGpuCores() * containers); QueueMetrics userMetrics = getUserMetrics(user); if (userMetrics != null) { userMetrics.releaseResources(user, containers, res); @@ -411,6 +418,7 @@ public class QueueMetrics implements MetricsSource { reservedContainers.incr(); reservedMB.incr(res.getMemory()); reservedVCores.incr(res.getVirtualCores()); + reservedGCores.incr(res.getGpuCores()); QueueMetrics userMetrics = getUserMetrics(user); if (userMetrics != null) { userMetrics.reserveResource(user, res); @@ -424,6 +432,7 @@ public class QueueMetrics implements MetricsSource { reservedContainers.decr(); reservedMB.decr(res.getMemory()); reservedVCores.decr(res.getVirtualCores()); + reservedGCores.decr(res.getGpuCores()); QueueMetrics userMetrics = getUserMetrics(user); if (userMetrics != null) { userMetrics.unreserveResource(user, res); @@ -488,7 +497,7 @@ public class QueueMetrics implements MetricsSource { } public Resource getAllocatedResources() { - return BuilderUtils.newResource(allocatedMB.value(), allocatedVCores.value()); + return BuilderUtils.newResource(allocatedMB.value(), allocatedVCores.value(), allocatedGCores.value()); } public int getAllocatedMB() { @@ -499,6 +508,10 @@ public class QueueMetrics implements MetricsSource { return allocatedVCores.value(); } + public int getAllocatedGpuCores() { + return allocatedGCores.value(); + } + public int getAllocatedContainers() { return allocatedContainers.value(); } @@ -511,6 +524,10 @@ public class QueueMetrics implements MetricsSource { return availableVCores.value(); } + public int getAvailableGpuCores() { + return availableGCores.value(); + } + public int getPendingMB() { return pendingMB.value(); } @@ -519,6 +536,10 @@ public class QueueMetrics implements MetricsSource { return pendingVCores.value(); } + public int getPendingGpuCores() { + return pendingGCores.value(); + } + public int getPendingContainers() { return pendingContainers.value(); } @@ -531,6 +552,10 @@ public class QueueMetrics implements MetricsSource { return reservedVCores.value(); } + public int getReservedGpuCores() { + return reservedGCores.value(); + } + public int getReservedContainers() { return reservedContainers.value(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceUsage.java index 0ec5873..f20f10e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceUsage.java @@ -72,7 +72,7 @@ public class ResourceUsage { public UsageByLabel(String label) { resArr = new Resource[ResourceType.values().length]; for (int i = 0; i < resArr.length; i++) { - resArr[i] = Resource.newInstance(0, 0); + resArr[i] = Resource.newInstance(0, 0, 0); }; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 9816699..b1e8a11 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -77,6 +77,7 @@ protected long lastMemoryAggregateAllocationUpdateTime = 0; private long lastMemorySeconds = 0; private long lastVcoreSeconds = 0; + private long lastGcoreSeconds = 0; protected final AppSchedulingInfo appSchedulingInfo; protected ApplicationAttemptId attemptId; @@ -87,9 +88,9 @@ private final Multiset reReservations = HashMultiset.create(); - protected final Resource currentReservation = Resource.newInstance(0, 0); - private Resource resourceLimit = Resource.newInstance(0, 0); - protected Resource currentConsumption = Resource.newInstance(0, 0); + protected final Resource currentReservation = Resource.newInstance(0, 0, 0); + private Resource resourceLimit = Resource.newInstance(0, 0, 0); + protected Resource currentConsumption = Resource.newInstance(0, 0, 0); private Resource amResource = Resources.none(); private boolean unmanagedAM = true; private boolean amRunning = false; @@ -531,6 +532,7 @@ synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() { > MEM_AGGREGATE_ALLOCATION_CACHE_MSECS) { long memorySeconds = 0; long vcoreSeconds = 0; + long gcoreSeconds = 0; for (RMContainer rmContainer : this.liveContainers.values()) { long usedMillis = currentTimeMillis - rmContainer.getCreationTime(); Resource resource = rmContainer.getContainer().getResource(); @@ -538,13 +540,15 @@ synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() { DateUtils.MILLIS_PER_SECOND; vcoreSeconds += resource.getVirtualCores() * usedMillis / DateUtils.MILLIS_PER_SECOND; + gcoreSeconds += resource.getGpuCores() * usedMillis / DateUtils.MILLIS_PER_SECOND; } lastMemoryAggregateAllocationUpdateTime = currentTimeMillis; lastMemorySeconds = memorySeconds; lastVcoreSeconds = vcoreSeconds; + lastGcoreSeconds = gcoreSeconds; } - return new AggregateAppResourceUsage(lastMemorySeconds, lastVcoreSeconds); + return new AggregateAppResourceUsage(lastMemorySeconds, lastVcoreSeconds, lastGcoreSeconds); } public synchronized ApplicationResourceUsageReport getResourceUsageReport() { @@ -553,7 +557,7 @@ public synchronized ApplicationResourceUsageReport getResourceUsageReport() { reservedContainers.size(), Resources.clone(currentConsumption), Resources.clone(currentReservation), Resources.add(currentConsumption, currentReservation), - resUsage.getMemorySeconds(), resUsage.getVcoreSeconds()); + resUsage.getMemorySeconds(), resUsage.getVcoreSeconds(), resUsage.getGcoreSeconds()); } public synchronized Map getLiveContainersMap() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index f03663a..053b90c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -53,8 +52,8 @@ public abstract class SchedulerNode { private static final Log LOG = LogFactory.getLog(SchedulerNode.class); - private Resource availableResource = Resource.newInstance(0, 0); - private Resource usedResource = Resource.newInstance(0, 0); + private Resource availableResource = Resource.newInstance(0, 0, 0); + private Resource usedResource = Resource.newInstance(0, 0, 0); private Resource totalResourceCapability; private RMContainer reservedContainer; private volatile int numContainers; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java index 071f293..481f003 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java @@ -256,7 +255,7 @@ public class SchedulerUtils { /** * Utility method to validate a resource request, by insuring that the - * requested memory/vcore is non-negative and not greater than max + * requested memory/vcore/gcore is non-negative and not greater than max * * @throws InvalidResourceRequestException when there is invalid request */ @@ -281,6 +280,16 @@ public class SchedulerUtils { + resReq.getCapability().getVirtualCores() + ", maxVirtualCores=" + maximumResource.getVirtualCores()); } + if (resReq.getCapability().getGpuCores() < 0 || + resReq.getCapability().getGpuCores() > + maximumResource.getGpuCores()) { + throw new InvalidResourceRequestException("Invalid resource request" + + ", requested gpu cores < 0" + + ", or requested gpu cores > max configured" + + ", requestedGpuCores=" + + resReq.getCapability().getGpuCores() + + ", maxGpuCores=" + maximumResource.getGpuCores()); + } String labelExp = resReq.getNodeLabelExpression(); // we don't allow specify label expression other than resourceName=ANY now diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java index d5b65ba..5078381 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java @@ -51,7 +51,7 @@ public class CSAssignment { } public CSAssignment(boolean skipped) { - this.resource = Resources.createResource(0, 0); + this.resource = Resources.createResource(0, 0, 0); this.type = NodeType.NODE_LOCAL; this.application = null; this.excessReservation = null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 70fe57e..c806355 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -193,6 +193,24 @@ public class CapacityScheduler extends + "=" + maxVcores + ", min and max should be greater than 0" + ", max should be no smaller than min."); } + + // validate scheduler gcores allocation setting + int minGcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); + int maxGcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); + + if (minGcores < 0 || minGcores > maxGcores) { + throw new YarnRuntimeException("Invalid resource scheduler gcores" + + " allocation configuration" + + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES + + "=" + minGcores + + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES + + "=" + maxGcores + ", min and max should be greater than 0" + + ", max should be no smaller than min."); + } } @Override @@ -1614,7 +1632,7 @@ public class CapacityScheduler extends return EnumSet.of(SchedulerResourceTypes.MEMORY); } return EnumSet - .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU); + .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU, SchedulerResourceTypes.GPU); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index dfbe900..6ff8c64 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -120,6 +119,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur "maximum-allocation-vcores"; @Private + public static final String MAXIMUM_ALLOCATION_GCORES = + "maximum-allocation-gcores"; + + @Private public static final int DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS = 10000; @Private @@ -573,7 +576,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur int minimumCores = getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); - return Resources.createResource(minimumMemory, minimumCores); + int minimumGCores = getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); + return Resources.createResource(minimumMemory, minimumCores, minimumGCores); } public Resource getMaximumAllocation() { @@ -583,7 +589,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur int maximumCores = getInt( YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); - return Resources.createResource(maximumMemory, maximumCores); + int maximumGCores = getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); + return Resources.createResource(maximumMemory, maximumCores, maximumGCores); } /** @@ -600,11 +609,15 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur (int)UNDEFINED); int maxAllocationVcoresPerQueue = getInt( queuePrefix + MAXIMUM_ALLOCATION_VCORES, (int)UNDEFINED); + int maxAllocationGcoresPerQueue = getInt( + queuePrefix + MAXIMUM_ALLOCATION_GCORES, (int)UNDEFINED); if (LOG.isDebugEnabled()) { LOG.debug("max alloc mb per queue for " + queue + " is " + maxAllocationMbPerQueue); LOG.debug("max alloc vcores per queue for " + queue + " is " + maxAllocationVcoresPerQueue); + LOG.debug("max alloc gcores per queue for " + queue + " is " + + maxAllocationGcoresPerQueue); } Resource clusterMax = getMaximumAllocation(); if (maxAllocationMbPerQueue == (int)UNDEFINED) { @@ -615,10 +628,15 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur LOG.info("max alloc vcore per queue for " + queue + " is undefined"); maxAllocationVcoresPerQueue = clusterMax.getVirtualCores(); } + if (maxAllocationGcoresPerQueue == (int)UNDEFINED) { + LOG.info("max alloc gcore per queue for " + queue + " is undefined"); + maxAllocationGcoresPerQueue = clusterMax.getGpuCores(); + } Resource result = Resources.createResource(maxAllocationMbPerQueue, - maxAllocationVcoresPerQueue); + maxAllocationVcoresPerQueue, maxAllocationGcoresPerQueue); if (maxAllocationMbPerQueue > clusterMax.getMemory() - || maxAllocationVcoresPerQueue > clusterMax.getVirtualCores()) { + || maxAllocationVcoresPerQueue > clusterMax.getVirtualCores() + || maxAllocationGcoresPerQueue > clusterMax.getGpuCores()) { throw new IllegalArgumentException( "Queue maximum allocation cannot be larger than the cluster setting" + " for queue " + queue diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 0228561..58aba1d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -467,7 +467,8 @@ public class LeafQueue extends AbstractCSQueue { Resource oldMax = getMaximumAllocation(); Resource newMax = newlyParsedLeafQueue.getMaximumAllocation(); if (newMax.getMemory() < oldMax.getMemory() - || newMax.getVirtualCores() < oldMax.getVirtualCores()) { + || newMax.getVirtualCores() < oldMax.getVirtualCores() + || newMax.getGpuCores() < oldMax.getGpuCores()) { throw new IOException( "Trying to reinitialize " + getQueuePath() @@ -796,7 +797,7 @@ public class LeafQueue extends AbstractCSQueue { } private static final CSAssignment NULL_ASSIGNMENT = - new CSAssignment(Resources.createResource(0, 0), NodeType.NODE_LOCAL); + new CSAssignment(Resources.createResource(0, 0, 0), NodeType.NODE_LOCAL); private static final CSAssignment SKIP_ASSIGNMENT = new CSAssignment(true); @@ -1088,7 +1089,7 @@ public class LeafQueue extends AbstractCSQueue { // with miniscule capacity (< 1 slot) make progress // * If we're running over capacity, then its // (usedResources + required) (which extra resources we are allocating) - Resource queueCapacity = Resource.newInstance(0, 0); + Resource queueCapacity = Resource.newInstance(0, 0, 0); if (requestedLabels != null && !requestedLabels.isEmpty()) { // if we have multiple labels to request, we will choose to use the first // label @@ -1815,7 +1816,7 @@ public class LeafQueue extends AbstractCSQueue { @VisibleForTesting public static class User { ResourceUsage userResourceUsage = new ResourceUsage(); - volatile Resource userResourceLimit = Resource.newInstance(0, 0); + volatile Resource userResourceLimit = Resource.newInstance(0, 0, 0); int pendingApplications = 0; int activeApplications = 0; @@ -1925,7 +1926,7 @@ public class LeafQueue extends AbstractCSQueue { // return a single Resource capturing the overal amount of pending resources public synchronized Resource getTotalResourcePending() { - Resource ret = BuilderUtils.newResource(0, 0); + Resource ret = BuilderUtils.newResource(0, 0, 0); for (FiCaSchedulerApp f : activeApplications) { Resources.addTo(ret, f.getTotalPendingRequests()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index 5ed6bb8..c060568 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -378,7 +378,7 @@ public class ParentQueue extends AbstractCSQueue { public synchronized CSAssignment assignContainers(Resource clusterResource, FiCaSchedulerNode node, ResourceLimits resourceLimits) { CSAssignment assignment = - new CSAssignment(Resources.createResource(0, 0), NodeType.NODE_LOCAL); + new CSAssignment(Resources.createResource(0, 0, 0), NodeType.NODE_LOCAL); Set nodeLabels = node.getLabels(); // if our queue cannot access this node, just return @@ -397,7 +397,7 @@ public class ParentQueue extends AbstractCSQueue { // looking if (!super.canAssignToThisQueue(clusterResource, nodeLabels, resourceLimits, minimumAllocation, Resources.createResource(getMetrics() - .getReservedMB(), getMetrics().getReservedVirtualCores()))) { + .getReservedMB(), getMetrics().getReservedVirtualCores(), getMetrics().getReservedGpuCores()))) { break; } @@ -491,7 +491,7 @@ public class ParentQueue extends AbstractCSQueue { private synchronized CSAssignment assignContainersToChildQueues( Resource cluster, FiCaSchedulerNode node, ResourceLimits limits) { CSAssignment assignment = - new CSAssignment(Resources.createResource(0, 0), NodeType.NODE_LOCAL); + new CSAssignment(Resources.createResource(0, 0, 0), NodeType.NODE_LOCAL); printChildQueues(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index f937d09..995a52e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -234,7 +234,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { } public synchronized Resource getTotalPendingRequests() { - Resource ret = Resource.newInstance(0, 0); + Resource ret = Resource.newInstance(0, 0, 0); for (ResourceRequest rr : appSchedulingInfo.getAllResourceRequests()) { // to avoid double counting we count only "ANY" resource requests if (ResourceRequest.isAnyLocation(rr.getResourceName())){ @@ -268,7 +268,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { Set currentContPreemption = Collections.unmodifiableSet( new HashSet(containersToPreempt)); containersToPreempt.clear(); - Resource tot = Resource.newInstance(0, 0); + Resource tot = Resource.newInstance(0, 0, 0); for(ContainerId c : currentContPreemption){ Resources.addTo(tot, liveContainers.get(c).getContainer().getResource()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index 763cc82..bedf5b0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -73,7 +73,7 @@ public class FSAppAttempt extends SchedulerApplicationAttempt private ResourceWeights resourceWeights; private Resource demand = Resources.createResource(0); private FairScheduler scheduler; - private Resource fairShare = Resources.createResource(0, 0); + private Resource fairShare = Resources.createResource(0, 0, 0); private Resource preemptedResources = Resources.createResource(0); private RMContainerComparator comparator = new RMContainerComparator(); private final Map preemptionMap = new HashMap(); @@ -417,6 +417,7 @@ public class FSAppAttempt extends SchedulerApplicationAttempt public void clearPreemptedResources() { preemptedResources.setMemory(0); preemptedResources.setVirtualCores(0); + preemptedResources.setGpuCores(0); } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index 3c97535..5be20b1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -76,7 +76,7 @@ public class FSLeafQueue extends FSQueue { this.lastTimeAtMinShare = scheduler.getClock().getTime(); this.lastTimeAtFairShareThreshold = scheduler.getClock().getTime(); activeUsersManager = new ActiveUsersManager(getMetrics()); - amResourceUsage = Resource.newInstance(0, 0); + amResourceUsage = Resource.newInstance(0, 0, 0); } public void addApp(FSAppAttempt app, boolean runnable) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java index 1562bf6..74ee244 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java @@ -41,8 +41,8 @@ @Private @Unstable public abstract class FSQueue implements Queue, Schedulable { - private Resource fairShare = Resources.createResource(0, 0); - private Resource steadyFairShare = Resources.createResource(0, 0); + private Resource fairShare = Resources.createResource(0, 0, 0); + private Resource steadyFairShare = Resources.createResource(0, 0, 0); private final String name; protected final FairScheduler scheduler; private final FSQueueMetrics metrics; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java index 82c422b..0c5124c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java @@ -33,12 +33,16 @@ public class FSQueueMetrics extends QueueMetrics { @Metric("Fair share of memory in MB") MutableGaugeInt fairShareMB; @Metric("Fair share of CPU in vcores") MutableGaugeInt fairShareVCores; + @Metric("Fair share of GPU in gcores") MutableGaugeInt fairShareGCores; @Metric("Steady fair share of memory in MB") MutableGaugeInt steadyFairShareMB; @Metric("Steady fair share of CPU in vcores") MutableGaugeInt steadyFairShareVCores; + @Metric("Steady fair share of GPU in gcores") MutableGaugeInt steadyFairShareGCores; @Metric("Minimum share of memory in MB") MutableGaugeInt minShareMB; @Metric("Minimum share of CPU in vcores") MutableGaugeInt minShareVCores; + @Metric("Minimum share of GPU in gcores") MutableGaugeInt minShareGCores; @Metric("Maximum share of memory in MB") MutableGaugeInt maxShareMB; @Metric("Maximum share of CPU in vcores") MutableGaugeInt maxShareVCores; + @Metric("Maximum share of GPU in gcores") MutableGaugeInt maxShareGCores; FSQueueMetrics(MetricsSystem ms, String queueName, Queue parent, boolean enableUserMetrics, Configuration conf) { @@ -48,6 +52,7 @@ public class FSQueueMetrics extends QueueMetrics { public void setFairShare(Resource resource) { fairShareMB.set(resource.getMemory()); fairShareVCores.set(resource.getVirtualCores()); + fairShareGCores.set(resource.getGpuCores()); } public int getFairShareMB() { @@ -58,9 +63,14 @@ public class FSQueueMetrics extends QueueMetrics { return fairShareVCores.value(); } + public int getFairShareGpuCores() { + return fairShareGCores.value(); + } + public void setSteadyFairShare(Resource resource) { steadyFairShareMB.set(resource.getMemory()); steadyFairShareVCores.set(resource.getVirtualCores()); + steadyFairShareGCores.set(resource.getGpuCores()); } public int getSteadyFairShareMB() { @@ -71,9 +81,14 @@ public class FSQueueMetrics extends QueueMetrics { return steadyFairShareVCores.value(); } + public int getSteadyFairShareGCores() { + return steadyFairShareGCores.value(); + } + public void setMinShare(Resource resource) { minShareMB.set(resource.getMemory()); minShareVCores.set(resource.getVirtualCores()); + minShareGCores.set(resource.getGpuCores()); } public int getMinShareMB() { @@ -83,10 +98,15 @@ public class FSQueueMetrics extends QueueMetrics { public int getMinShareVirtualCores() { return minShareVCores.value(); } - + + public int getMinShareGpuCores() { + return minShareGCores.value(); + } + public void setMaxShare(Resource resource) { maxShareMB.set(resource.getMemory()); maxShareVCores.set(resource.getVirtualCores()); + maxShareGCores.set(resource.getGpuCores()); } public int getMaxShareMB() { @@ -96,6 +116,10 @@ public class FSQueueMetrics extends QueueMetrics { public int getMaxShareVirtualCores() { return maxShareVCores.value(); } + + public int getMaxShareGpuCores() { + return maxShareGCores.value(); + } public synchronized static FSQueueMetrics forQueue(String queueName, Queue parent, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 5fef817..3f4149c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -236,6 +236,24 @@ public class FairScheduler extends + "=" + maxVcores + ", min should equal greater than 0" + ", max should be no smaller than min."); } + + // validate scheduler gcores allocating setting + int minGcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); + int maxGcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); + + if (minGcores < 0 || minGcores > maxGcores) { + throw new YarnRuntimeException("Invalid resource scheduler gcores" + + " allocation configuration" + + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES + + "=" + minGcores + + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES + + "=" + maxGcores + ", min should equal greater than 0" + + ", max should be no smaller than min."); + } } public FairSchedulerConfiguration getConf() { @@ -319,7 +337,8 @@ public class FairScheduler extends " Allocations: " + rootMetrics.getAllocatedResources() + " Availability: " + Resource.newInstance( rootMetrics.getAvailableMB(), - rootMetrics.getAvailableVirtualCores()) + + rootMetrics.getAvailableVirtualCores(), + rootMetrics.getAvailableGpuCores()) + " Demand: " + rootQueue.getDemand()); } } @@ -1143,8 +1162,9 @@ public class FairScheduler extends if (preemptionEnabled) { return (preemptionUtilizationThreshold < Math.max( (float) rootMetrics.getAllocatedMB() / clusterResource.getMemory(), - (float) rootMetrics.getAllocatedVirtualCores() / - clusterResource.getVirtualCores())); + Math.max((float) rootMetrics.getAllocatedVirtualCores() / + clusterResource.getVirtualCores(), + (float) rootMetrics.getAllocatedGpuCores() / clusterResource.getGpuCores()))); } return false; } @@ -1638,7 +1658,7 @@ public class FairScheduler extends @Override public EnumSet getSchedulingResourceTypes() { return EnumSet - .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU); + .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU, SchedulerResourceTypes.GPU); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java index e477e6e..e7993b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java @@ -49,7 +47,9 @@ public class FairSchedulerConfiguration extends Configuration { public static final String RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES = YarnConfiguration.YARN_PREFIX + "scheduler.increment-allocation-vcores"; public static final int DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES = 1; - + public static final String RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES = + YarnConfiguration.YARN_PREFIX + "scheduler.increment-allocation-gcores"; + public static final int DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES = 1; private static final String CONF_PREFIX = "yarn.scheduler.fair."; public static final String ALLOCATION_FILE = CONF_PREFIX + "allocation.file"; @@ -144,7 +144,10 @@ public class FairSchedulerConfiguration extends Configuration { int cpu = getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); - return Resources.createResource(mem, cpu); + int gpu = getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); + return Resources.createResource(mem, cpu, gpu); } public Resource getMaximumAllocation() { @@ -154,7 +157,10 @@ public class FairSchedulerConfiguration extends Configuration { int cpu = getInt( YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); - return Resources.createResource(mem, cpu); + int gpu = getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); + return Resources.createResource(mem, cpu, gpu); } public Resource getIncrementAllocation() { @@ -164,7 +170,10 @@ public class FairSchedulerConfiguration extends Configuration { int incrementCores = getInt( RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES); - return Resources.createResource(incrementMemory, incrementCores); + int incrementGCores = getInt( + RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES, + DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES); + return Resources.createResource(incrementMemory, incrementCores, incrementGCores); } public float getLocalityThresholdNode() { @@ -235,7 +244,7 @@ public class FairSchedulerConfiguration extends Configuration { /** * Parses a resource config value of a form like "1024", "1024 mb", - * or "1024 mb, 3 vcores". If no units are given, megabytes are assumed. + * or "1024 mb, 3 vcores" or "1024 mb, 3 vcores, 1 gcores". If no units are given, megabytes are assumed. * * @throws AllocationConfigurationException */ @@ -245,7 +254,8 @@ public class FairSchedulerConfiguration extends Configuration { val = StringUtils.toLowerCase(val); int memory = findResource(val, "mb"); int vcores = findResource(val, "vcores"); - return BuilderUtils.newResource(memory, vcores); + int gcores = findResource(val, "gcores"); + return BuilderUtils.newResource(memory, vcores, gcores); } catch (AllocationConfigurationException ex) { throw ex; } catch (Exception ex) { @@ -262,9 +272,10 @@ public class FairSchedulerConfiguration extends Configuration { throws AllocationConfigurationException { Pattern pattern = Pattern.compile("(\\d+)\\s*" + units); Matcher matcher = pattern.matcher(val); - if (!matcher.find()) { + if (!matcher.find() && !units.equals("gcores")) { throw new AllocationConfigurationException("Missing resource: " + units); + } else { + return matcher.find(0) ? Integer.parseInt(matcher.group(1)) : 0; } - return Integer.parseInt(matcher.group(1)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java index f4fad32..50d54c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java @@ -258,6 +258,8 @@ public class ComputeFairShares { return resource.getMemory(); case CPU: return resource.getVirtualCores(); + case GPU: + return resource.getGpuCores(); default: throw new IllegalArgumentException("Invalid resource"); } @@ -271,6 +273,9 @@ public class ComputeFairShares { case CPU: resource.setVirtualCores(val); break; + case GPU: + resource.setGpuCores(val); + break; default: throw new IllegalArgumentException("Invalid resource"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java index 918db9d..e580190 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java @@ -121,7 +120,7 @@ public class FairSharePolicy extends SchedulingPolicy { queueFairShare.getMemory() - queueUsage.getMemory(), 0); Resource headroom = Resources.createResource( Math.min(maxAvailable.getMemory(), queueAvailableMemory), - maxAvailable.getVirtualCores()); + maxAvailable.getVirtualCores(), maxAvailable.getGpuCores()); return headroom; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java index 7d88933..86ad694 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java @@ -114,7 +113,7 @@ public class FifoPolicy extends SchedulingPolicy { queueFairShare.getMemory() - queueUsage.getMemory(), 0); Resource headroom = Resources.createResource( Math.min(maxAvailable.getMemory(), queueAvailableMemory), - maxAvailable.getVirtualCores()); + maxAvailable.getVirtualCores(), maxAvailable.getGpuCores()); return headroom; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index b8c419c..b194b56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -230,7 +229,10 @@ public class FifoScheduler extends YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB), conf.getInt( YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES))); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES), + conf.getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES))); this.usePortForNodeName = conf.getBoolean( YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME, YarnConfiguration.DEFAULT_RM_SCHEDULER_USE_PORT_FOR_NODE_NAME); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index 7ee2ca4..307c9db 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -73,6 +73,9 @@ public class MetricsOverviewTable extends HtmlBlock { th().$class("ui-state-default")._("VCores Used")._(). th().$class("ui-state-default")._("VCores Total")._(). th().$class("ui-state-default")._("VCores Reserved")._(). + th().$class("ui-state-default")._("GCores Used")._(). + th().$class("ui-state-default")._("GCores Total")._(). + th().$class("ui-state-default")._("GCores Reserved")._(). th().$class("ui-state-default")._("Active Nodes")._(). th().$class("ui-state-default")._("Decommissioned Nodes")._(). th().$class("ui-state-default")._("Lost Nodes")._(). @@ -98,6 +101,9 @@ public class MetricsOverviewTable extends HtmlBlock { td(String.valueOf(clusterMetrics.getAllocatedVirtualCores())). td(String.valueOf(clusterMetrics.getTotalVirtualCores())). td(String.valueOf(clusterMetrics.getReservedVirtualCores())). + td(String.valueOf(clusterMetrics.getAllocatedGpuCores())). + td(String.valueOf(clusterMetrics.getTotalGpuCores())). + td(String.valueOf(clusterMetrics.getReservedGpuCores())). td().a(url("nodes"),String.valueOf(clusterMetrics.getActiveNodes()))._(). td().a(url("nodes/decommissioned"),String.valueOf(clusterMetrics.getDecommissionedNodes()))._(). td().a(url("nodes/lost"),String.valueOf(clusterMetrics.getLostNodes()))._(). @@ -127,6 +133,9 @@ public class MetricsOverviewTable extends HtmlBlock { th().$class("ui-state-default")._("VCores Used")._(). th().$class("ui-state-default")._("VCores Pending")._(). th().$class("ui-state-default")._("VCores Reserved")._(). + th().$class("ui-state-default")._("GCores Used")._(). + th().$class("ui-state-default")._("GCores Pending")._(). + th().$class("ui-state-default")._("GCores Reserved")._(). _(). _(). tbody().$class("ui-widget-content"). @@ -149,6 +158,9 @@ public class MetricsOverviewTable extends HtmlBlock { td(String.valueOf(userMetrics.getAllocatedVirtualCores())). td(String.valueOf(userMetrics.getPendingVirtualCores())). td(String.valueOf(userMetrics.getReservedVirtualCores())). + td(String.valueOf(userMetrics.getAllocatedGpuCores())). + td(String.valueOf(userMetrics.getPendingGpuCores())). + td(String.valueOf(userMetrics.getReservedGpuCores())). _(). _()._(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index a2bab0c..cdfedc6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -78,6 +74,8 @@ class NodesPage extends RmView { .th(".mem", "Mem Avail") .th(".vcores", "VCores Used") .th(".vcores", "VCores Avail") + .th(".gcores", "GCores Used") + .th(".gcores", "GCores Avail") .th(".nodeManagerVersion", "Version")._()._().tbody(); NodeState stateFilter = null; if (type != null && !type.isEmpty()) { @@ -144,6 +142,8 @@ class NodesPage extends RmView { ._(StringUtils.byteDesc(availableMemory * BYTES_IN_MB))._() .td(String.valueOf(info.getUsedVirtualCores())) .td(String.valueOf(info.getAvailableVirtualCores())) + .td(String.valueOf(info.getUsedGpuCores())) + .td(String.valueOf(info.getAvailableGpuCores())) .td(ni.getNodeManagerVersion())._(); } tbody._()._(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java index 279c3ea..afaf70b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java @@ -100,7 +98,7 @@ public class RMAppAttemptBlock extends AppAttemptBlock{ } private Resource getTotalResource(List requests) { - Resource totalResource = Resource.newInstance(0, 0); + Resource totalResource = Resource.newInstance(0, 0, 0); if (requests == null) { return totalResource; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java index e986cab..f7a87bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java @@ -97,9 +96,10 @@ public class RMAppBlock extends AppBlock{ ._("Number of Non-AM Containers Preempted from Current Attempt:", attemptNumNonAMContainerPreempted) ._("Aggregate Resource Allocation:", - String.format("%d MB-seconds, %d vcore-seconds", + String.format("%d MB-seconds, %d vcore-seconds, %d gcore-seconds", appMetrics == null ? "N/A" : appMetrics.getMemorySeconds(), - appMetrics == null ? "N/A" : appMetrics.getVcoreSeconds())); + appMetrics == null ? "N/A" : appMetrics.getVcoreSeconds(), + appMetrics == null ? "N/A" : appMetrics.getGcoreSeconds())); pdiv._(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 937a1ec..2fed00f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -1301,9 +1301,15 @@ public class RMWebServices { String msg = "Requested more memory than configured max"; throw new BadRequestException(msg); } + if (newApp.getResource().getgCores() > rm.getConfig().getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES)) { + String msg = "Requested more gcores than configured max"; + throw new BadRequestException(msg); + } Resource r = Resource.newInstance(newApp.getResource().getMemory(), newApp - .getResource().getvCores()); + .getResource().getvCores(), newApp.getResource().getgCores()); return r; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index 79b2248..4d83716 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -82,13 +80,16 @@ public class AppInfo { protected String amHostHttpAddress; protected int allocatedMB; protected int allocatedVCores; + protected int allocatedGCores; protected int runningContainers; protected long memorySeconds; protected long vcoreSeconds; + protected long gcoreSeconds; // preemption info fields protected int preemptedResourceMB; protected int preemptedResourceVCores; + protected int preemptedResourceGCores; protected int numNonAMContainerPreempted; protected int numAMContainerPreempted; @@ -160,6 +161,7 @@ public class AppInfo { Resource usedResources = resourceReport.getUsedResources(); allocatedMB = usedResources.getMemory(); allocatedVCores = usedResources.getVirtualCores(); + allocatedGCores = usedResources.getGpuCores(); runningContainers = resourceReport.getNumUsedContainers(); } resourceRequests = @@ -178,8 +180,11 @@ public class AppInfo { appMetrics.getNumNonAMContainersPreempted(); preemptedResourceVCores = appMetrics.getResourcePreempted().getVirtualCores(); + preemptedResourceGCores = + appMetrics.getResourcePreempted().getGpuCores(); memorySeconds = appMetrics.getMemorySeconds(); vcoreSeconds = appMetrics.getVcoreSeconds(); + gcoreSeconds = appMetrics.getGcoreSeconds(); } } @@ -286,7 +291,11 @@ public class AppInfo { public int getAllocatedVCores() { return this.allocatedVCores; } - + + public int getAllocatedGCores() { + return this.allocatedGCores; + } + public int getPreemptedMB() { return preemptedResourceMB; } @@ -295,6 +304,10 @@ public class AppInfo { return preemptedResourceVCores; } + public int getPreemptedGCores() { + return preemptedResourceGCores; + } + public int getNumNonAMContainersPreempted() { return numNonAMContainerPreempted; } @@ -311,6 +324,10 @@ public class AppInfo { return vcoreSeconds; } + public long getGcoreSeconds() { + return gcoreSeconds; + } + public List getResourceRequests() { return this.resourceRequests; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java index 16a5c01..6ae37c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java @@ -45,12 +45,17 @@ protected long availableVirtualCores; protected long allocatedVirtualCores; + protected long reservedGpuCores; + protected long availableGpuCores; + protected long allocatedGpuCores; + protected int containersAllocated; protected int containersReserved; protected int containersPending; protected long totalMB; protected long totalVirtualCores; + protected long totalGpuCores; protected int totalNodes; protected int lostNodes; protected int unhealthyNodes; @@ -81,12 +86,17 @@ public class ClusterMetricsInfo { this.availableVirtualCores = metrics.getAvailableVirtualCores(); this.allocatedVirtualCores = metrics.getAllocatedVirtualCores(); + this.reservedGpuCores = metrics.getReservedGpuCores(); + this.availableGpuCores = metrics.getAvailableGpuCores(); + this.allocatedGpuCores = metrics.getAllocatedGpuCores(); + this.containersAllocated = metrics.getAllocatedContainers(); this.containersPending = metrics.getPendingContainers(); this.containersReserved = metrics.getReservedContainers(); this.totalMB = availableMB + allocatedMB; this.totalVirtualCores = availableVirtualCores + allocatedVirtualCores; + this.totalGpuCores = availableGpuCores + allocatedGpuCores; this.activeNodes = clusterMetrics.getNumActiveNMs(); this.lostNodes = clusterMetrics.getNumLostNMs(); this.unhealthyNodes = clusterMetrics.getUnhealthyNMs(); @@ -144,6 +154,18 @@ public class ClusterMetricsInfo { return this.allocatedVirtualCores; } + public long getReservedGpuCores() { + return this.reservedGpuCores; + } + + public long getAvailableGpuCores() { + return this.availableGpuCores; + } + + public long getAllocatedGpuCores() { + return this.allocatedGpuCores; + } + public int getContainersAllocated() { return this.containersAllocated; } @@ -164,6 +186,10 @@ public class ClusterMetricsInfo { return this.totalVirtualCores; } + public long getTotalGpuCores() { + return this.totalGpuCores; + } + public int getTotalNodes() { return this.totalNodes; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java index 3104117..99e3e6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java @@ -49,6 +48,8 @@ public class NodeInfo { protected long availMemoryMB; protected long usedVirtualCores; protected long availableVirtualCores; + protected long usedGpuCores; + protected long availableGpuCores; protected ArrayList nodeLabels = new ArrayList(); public NodeInfo() { @@ -66,6 +67,8 @@ public class NodeInfo { this.availMemoryMB = report.getAvailableResource().getMemory(); this.usedVirtualCores = report.getUsedResource().getVirtualCores(); this.availableVirtualCores = report.getAvailableResource().getVirtualCores(); + this.usedGpuCores = report.getUsedResource().getGpuCores(); + this.availableGpuCores = report.getAvailableResource().getGpuCores(); } this.id = id.toString(); this.rack = ni.getRackName(); @@ -136,6 +139,14 @@ public class NodeInfo { return this.availableVirtualCores; } + public long getUsedGpuCores() { + return this.usedGpuCores; + } + + public long getAvailableGpuCores() { + return this.availableGpuCores; + } + public ArrayList getNodeLabels() { return this.nodeLabels; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java index 9510f5f..c1de6fc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java @@ -29,6 +29,7 @@ public class ResourceInfo { int memory; int vCores; + int gCores; public ResourceInfo() { } @@ -36,6 +37,7 @@ public class ResourceInfo { public ResourceInfo(Resource res) { memory = res.getMemory(); vCores = res.getVirtualCores(); + gCores = res.getGpuCores(); } public int getMemory() { @@ -45,10 +47,14 @@ public class ResourceInfo { public int getvCores() { return vCores; } + + public int getgCores() { + return gCores; + } @Override public String toString() { - return ""; + return ""; } public void setMemory(int memory) { @@ -58,4 +64,8 @@ public class ResourceInfo { public void setvCores(int vCores) { this.vCores = vCores; } + + public void setgCores(int gCores) { + this.gCores = gCores; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/UserMetricsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/UserMetricsInfo.java index bfa5bd2..b9e2db2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/UserMetricsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/UserMetricsInfo.java @@ -46,6 +45,9 @@ public class UserMetricsInfo { protected long reservedVirtualCores; protected long pendingVirtualCores; protected long allocatedVirtualCores; + protected long reservedGpuCores; + protected long pendingGpuCores; + protected long allocatedGpuCores; @XmlTransient protected boolean userMetricsAvailable; @@ -80,6 +82,10 @@ public class UserMetricsInfo { this.reservedVirtualCores = userMetrics.getReservedVirtualCores(); this.pendingVirtualCores = userMetrics.getPendingVirtualCores(); this.allocatedVirtualCores = userMetrics.getAllocatedVirtualCores(); + + this.reservedGpuCores = userMetrics.getReservedGpuCores(); + this.pendingGpuCores = userMetrics.getPendingGpuCores(); + this.allocatedGpuCores = userMetrics.getAllocatedGpuCores(); } } @@ -135,6 +141,18 @@ public class UserMetricsInfo { return this.pendingVirtualCores; } + public long getReservedGpuCores() { + return this.reservedGpuCores; + } + + public long getAllocatedGpuCores() { + return this.allocatedGpuCores; + } + + public long getPendingGpuCores() { + return this.pendingGpuCores; + } + public int getReservedContainers() { return this.reservedContainers; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto index 3c8ac34..f2e1fac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto @@ -82,6 +82,7 @@ message ApplicationAttemptStateDataProto { optional int64 memory_seconds = 10; optional int64 vcore_seconds = 11; optional int64 finish_time = 12; + optional int64 gcore_seconds = 13; } message EpochProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 9f7bd88..c19941a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -451,18 +451,28 @@ public MockNM registerNode(String nodeIdStr, int memory) throws Exception { return nm; } - public MockNM registerNode(String nodeIdStr, int memory, int vCores) + public MockNM registerNode(String nodeIdStr, int memory, int vCores) + throws Exception { + return registerNode(nodeIdStr, memory, vCores, 0); + } + + public MockNM registerNode(String nodeIdStr, int memory, int vCores, int gCores) throws Exception { MockNM nm = - new MockNM(nodeIdStr, memory, vCores, getResourceTrackerService()); + new MockNM(nodeIdStr, memory, vCores, gCores, getResourceTrackerService()); nm.registerNode(); return nm; } - public MockNM registerNode(String nodeIdStr, int memory, int vCores, + public MockNM registerNode(String nodeIdStr, int memory, int vCores, + List runningApplications) throws Exception { + return registerNode(nodeIdStr, memory, vCores, 0, runningApplications); + } + + public MockNM registerNode(String nodeIdStr, int memory, int vCores, int gCores, List runningApplications) throws Exception { MockNM nm = - new MockNM(nodeIdStr, memory, vCores, getResourceTrackerService(), + new MockNM(nodeIdStr, memory, vCores, gCores, getResourceTrackerService(), YarnVersionInfo.getVersion()); nm.registerNode(runningApplications); return nm; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java index b2cde9e..cc1835e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java @@ -576,7 +570,7 @@ public class TestAppManager{ when(app.getState()).thenReturn(RMAppState.RUNNING); when(app.getApplicationType()).thenReturn("MAPREDUCE"); RMAppMetrics metrics = - new RMAppMetrics(Resource.newInstance(1234, 56), 10, 1, 16384, 64); + new RMAppMetrics(Resource.newInstance(1234, 56, 56), 10, 1, 16384, 64, 64); when(app.getRMAppMetrics()).thenReturn(metrics); RMAppManager.ApplicationSummary.SummaryBuilder summary = @@ -592,9 +586,10 @@ public class TestAppManager{ Assert.assertTrue(msg.contains("Multiline" + escaped +"QueueName")); Assert.assertTrue(msg.contains("memorySeconds=16384")); Assert.assertTrue(msg.contains("vcoreSeconds=64")); + Assert.assertTrue(msg.contains("gcoreSeconds=64")); Assert.assertTrue(msg.contains("preemptedAMContainers=1")); Assert.assertTrue(msg.contains("preemptedNonAMContainers=10")); - Assert.assertTrue(msg.contains("preemptedResources=")); + Assert.assertTrue(msg.contains("preemptedResources=")); Assert.assertTrue(msg.contains("applicationType=MAPREDUCE")); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java index 6e08aeb..6ca15b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java @@ -365,7 +365,7 @@ public class TestApplicationCleanup { // alloc another container on nm2 AllocateResponse allocResponse = am0.allocate(Arrays.asList(ResourceRequest.newInstance( - Priority.newInstance(1), "*", Resource.newInstance(1024, 0), 1)), + Priority.newInstance(1), "*", Resource.newInstance(1024, 0, 0), 1)), null); while (null == allocResponse.getAllocatedContainers() || allocResponse.getAllocatedContainers().isEmpty()) { @@ -382,7 +382,7 @@ public class TestApplicationCleanup { nm1.setResourceTrackerService(rm2.getResourceTrackerService()); nm1.registerNode(Arrays.asList(NMContainerStatus.newInstance( ContainerId.newContainerId(am0.getApplicationAttemptId(), 1), - ContainerState.COMPLETE, Resource.newInstance(1024, 1), "", 0, + ContainerState.COMPLETE, Resource.newInstance(1024, 1, 1), "", 0, Priority.newInstance(0), 1234)), Arrays.asList(app0.getApplicationId())); nm2.setResourceTrackerService(rm2.getResourceTrackerService()); nm2.registerNode(Arrays.asList(app0.getApplicationId())); @@ -594,7 +594,7 @@ public class TestApplicationCleanup { ContainerId containerId = ContainerId.newContainerId(appAttemptId, id); NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, containerState, - Resource.newInstance(memory, 1), "recover container", 0, + Resource.newInstance(memory, 1, 1), "recover container", 0, Priority.newInstance(0), 0); return containerReport; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index d4ac41d..c804e74 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -330,6 +327,7 @@ public class TestClientRMService { report.getApplicationResourceUsageReport(); Assert.assertEquals(10, usageReport.getMemorySeconds()); Assert.assertEquals(3, usageReport.getVcoreSeconds()); + Assert.assertEquals(3, usageReport.getGcoreSeconds()); } finally { rmService.close(); } @@ -1190,11 +1188,11 @@ public class TestClientRMService { ApplicationId applicationId3 = getApplicationId(3); YarnConfiguration config = new YarnConfiguration(); apps.put(applicationId1, getRMApp(rmContext, yarnScheduler, applicationId1, - config, "testqueue", 10, 3)); + config, "testqueue", 10, 3, 3)); apps.put(applicationId2, getRMApp(rmContext, yarnScheduler, applicationId2, - config, "a", 20, 2)); + config, "a", 20, 2, 2)); apps.put(applicationId3, getRMApp(rmContext, yarnScheduler, applicationId3, - config, "testqueue", 40, 5)); + config, "testqueue", 40, 5, 5)); return apps; } @@ -1217,7 +1215,7 @@ public class TestClientRMService { private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler, ApplicationId applicationId3, YarnConfiguration config, String queueName, - final long memorySeconds, final long vcoreSeconds) { + final long memorySeconds, final long vcoreSeconds, final long gcoreSeconds) { ApplicationSubmissionContext asContext = mock(ApplicationSubmissionContext.class); when(asContext.getMaxAppAttempts()).thenReturn(1); @@ -1227,7 +1225,7 @@ public class TestClientRMService { System.currentTimeMillis(), "YARN", null, BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - Resource.newInstance(1024, 1), 1)){ + Resource.newInstance(1024, 1, 1), 1)){ @Override public ApplicationReport createAndGetApplicationReport( String clientUserName, boolean allowAccess) { @@ -1237,6 +1235,7 @@ public class TestClientRMService { report.getApplicationResourceUsageReport(); usageReport.setMemorySeconds(memorySeconds); usageReport.setVcoreSeconds(vcoreSeconds); + usageReport.setGcoreSeconds(gcoreSeconds); report.setApplicationResourceUsageReport(usageReport); return report; } @@ -1308,7 +1307,7 @@ public class TestClientRMService { rm.start(); MockNM nm; try { - nm = rm.registerNode("127.0.0.1:1", 102400, 100); + nm = rm.registerNode("127.0.0.1:1", 102400, 100, 100); // allow plan follower to synchronize Thread.sleep(1050); } catch (Exception e) { @@ -1380,7 +1379,7 @@ public class TestClientRMService { int numContainers, long arrival, long deadline, long duration) { // create a request with a single atomic ask ReservationRequest r = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), numContainers, 1, duration); ReservationRequests reqs = ReservationRequests.newInstance(Collections.singletonList(r), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java index fcb48a0..5826a39 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java @@ -90,6 +90,11 @@ public class TestContainerResourceUsage { + rmAppMetrics.getVcoreSeconds(), rmAppMetrics.getVcoreSeconds() == 0); + Assert.assertTrue( + "Before app submission, gcore seconds should have been 0 but was " + + rmAppMetrics.getGcoreSeconds(), + rmAppMetrics.getGcoreSeconds() == 0); + RMAppAttempt attempt0 = app0.getCurrentAppAttempt(); nm.nodeHeartbeat(true); @@ -127,7 +132,8 @@ public class TestContainerResourceUsage { ru.getMemorySeconds(), rmAppMetrics.getMemorySeconds()); Assert.assertEquals("Unexpected VcoreSeconds value", ru.getVcoreSeconds(), rmAppMetrics.getVcoreSeconds()); - + Assert.assertEquals("Unexpected GcoreSeconds value", + ru.getGcoreSeconds(), rmAppMetrics.getGcoreSeconds()); rm.stop(); } @@ -216,10 +222,12 @@ public class TestContainerResourceUsage { // Check that the container metrics match those from the app usage report. long memorySeconds = 0; long vcoreSeconds = 0; + long gcoreSeconds = 0; for (RMContainer c : rmContainers) { AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); memorySeconds += ru.getMemorySeconds(); vcoreSeconds += ru.getVcoreSeconds(); + gcoreSeconds += ru.getGcoreSeconds(); } RMAppMetrics metricsBefore = app0.getRMAppMetrics(); @@ -227,6 +235,8 @@ public class TestContainerResourceUsage { memorySeconds, metricsBefore.getMemorySeconds()); Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore.getVcoreSeconds()); + Assert.assertEquals("Unexpected GcoreSeconds value", + gcoreSeconds, metricsBefore.getGcoreSeconds()); // create new RM to represent RM restart. Load up the state store. MockRM rm1 = new MockRM(conf, memStore); @@ -240,7 +250,8 @@ public class TestContainerResourceUsage { metricsBefore.getVcoreSeconds(), metricsAfter.getVcoreSeconds()); Assert.assertEquals("Memory seconds were not the same after RM Restart", metricsBefore.getMemorySeconds(), metricsAfter.getMemorySeconds()); - + Assert.assertEquals("Gcore seconds were not the same after RM Restart", + metricsBefore.getGcoreSeconds(), metricsAfter.getGcoreSeconds()); rm0.stop(); rm0.close(); rm1.stop(); @@ -312,6 +323,7 @@ public class TestContainerResourceUsage { long memorySeconds = 0; long vcoreSeconds = 0; + long gcoreSeconds = 0; // Calculate container usage metrics for first attempt. if (keepRunningContainers) { @@ -321,6 +333,7 @@ public class TestContainerResourceUsage { AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); memorySeconds += ru.getMemorySeconds(); vcoreSeconds += ru.getVcoreSeconds(); + gcoreSeconds += ru.getGcoreSeconds(); } else { // The remaining container should be RUNNING. Assert.assertTrue("After first attempt failed, remaining container " @@ -335,6 +348,7 @@ public class TestContainerResourceUsage { AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); memorySeconds += ru.getMemorySeconds(); vcoreSeconds += ru.getVcoreSeconds(); + gcoreSeconds += ru.getGcoreSeconds(); } } @@ -386,6 +400,7 @@ public class TestContainerResourceUsage { AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); memorySeconds += ru.getMemorySeconds(); vcoreSeconds += ru.getVcoreSeconds(); + gcoreSeconds += ru.getGcoreSeconds(); } RMAppMetrics rmAppMetrics = app.getRMAppMetrics(); @@ -394,7 +409,8 @@ public class TestContainerResourceUsage { memorySeconds, rmAppMetrics.getMemorySeconds()); Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, rmAppMetrics.getVcoreSeconds()); - + Assert.assertEquals("Unexpected GcoreSeconds value", + gcoreSeconds, rmAppMetrics.getGcoreSeconds()); rm.stop(); return; } @@ -408,6 +424,8 @@ public class TestContainerResourceUsage { * usedMillis / DateUtils.MILLIS_PER_SECOND; long vcoreSeconds = resource.getVirtualCores() * usedMillis / DateUtils.MILLIS_PER_SECOND; - return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + long gcoreSeconds = resource.getGpuCores() + * usedMillis / DateUtils.MILLIS_PER_SECOND; + return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds, gcoreSeconds); } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index b74faeb..502cb5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -237,7 +235,7 @@ public class TestFifoScheduler { scheduler.reinitialize(conf, rm.getRMContext()); RMNode node = MockNodes.newNodeInfo(1, - Resources.createResource(1024, 4), 1, "127.0.0.1"); + Resources.createResource(1024, 4, 4), 1, "127.0.0.1"); scheduler.handle(new NodeAddedSchedulerEvent(node)); ApplicationId appId = ApplicationId.newInstance(0, 1); @@ -575,7 +573,7 @@ public class TestFifoScheduler { Map nodeResourceMap = new HashMap(); nodeResourceMap.put(nm1.getNodeId(), - ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1)); + ResourceOption.newInstance(Resource.newInstance(2 * GB, 1, 1), -1)); UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap); AdminService as = rm.adminService; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java index c6da3fd..e34c7b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java @@ -463,7 +458,7 @@ public class TestRMNodeTransitions { private RMNodeImpl getRunningNode(String nmVersion) { NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); - Resource capability = Resource.newInstance(4096, 4); + Resource capability = Resource.newInstance(4096, 4, 4); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0, null, capability, nmVersion); node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null)); @@ -497,7 +492,7 @@ public class TestRMNodeTransitions { private RMNodeImpl getRebootedNode() { NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); - Resource capability = Resource.newInstance(4096, 4); + Resource capability = Resource.newInstance(4096, 4, 4); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0, null, capability, null); node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null)); @@ -562,11 +557,12 @@ public class TestRMNodeTransitions { assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), - ResourceOption.newInstance(Resource.newInstance(2048, 2), + ResourceOption.newInstance(Resource.newInstance(2048, 2, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); + assertEquals("GPU resource is not match.", newCapacity.getGpuCores(), 2); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); @@ -576,16 +572,17 @@ public class TestRMNodeTransitions { @Test public void testResourceUpdateOnNewNode() { - RMNodeImpl node = getNewNode(Resource.newInstance(4096, 4)); + RMNodeImpl node = getNewNode(Resource.newInstance(4096, 4, 4)); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), - ResourceOption.newInstance(Resource.newInstance(2048, 2), + ResourceOption.newInstance(Resource.newInstance(2048, 2, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); + assertEquals("GPU resource is not match.", newCapacity.getGpuCores(), 2); Assert.assertEquals(NodeState.NEW, node.getState()); } @@ -597,11 +594,12 @@ public class TestRMNodeTransitions { assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), - ResourceOption.newInstance(Resource.newInstance(2048, 2), + ResourceOption.newInstance(Resource.newInstance(2048, 2, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); + assertEquals("GPU resource is not match.", newCapacity.getGpuCores(), 2); Assert.assertEquals(NodeState.REBOOTED, node.getState()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index a904dc0..737f4f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -299,7 +297,7 @@ public class TestResourceTrackerService { RegisterNodeManagerRequest req = Records.newRecord( RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); - Resource capability = BuilderUtils.newResource(1024, 1); + Resource capability = BuilderUtils.newResource(1024, 1, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); @@ -324,7 +322,7 @@ public class TestResourceTrackerService { RegisterNodeManagerRequest req = Records.newRecord( RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); - Resource capability = BuilderUtils.newResource(1024, 1); + Resource capability = BuilderUtils.newResource(1024, 1, 1); req.setResource(capability); req.setNodeId(nodeId); req.setHttpPort(1234); @@ -383,6 +381,7 @@ public class TestResourceTrackerService { Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, "2048"); conf.set(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, "4"); + conf.set(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, "4"); rm = new MockRM(conf); rm.start(); @@ -393,7 +392,7 @@ public class TestResourceTrackerService { NodeId nodeId = BuilderUtils.newNodeId("host", 1234); req.setNodeId(nodeId); - Resource capability = BuilderUtils.newResource(1024, 1); + Resource capability = BuilderUtils.newResource(1024, 1, 1); req.setResource(capability); RegisterNodeManagerResponse response1 = resourceTrackerService.registerNodeManager(req); @@ -401,6 +400,7 @@ public class TestResourceTrackerService { capability.setMemory(2048); capability.setVirtualCores(1); + capability.setGpuCores(1); req.setResource(capability); RegisterNodeManagerResponse response2 = resourceTrackerService.registerNodeManager(req); @@ -408,6 +408,7 @@ public class TestResourceTrackerService { capability.setMemory(1024); capability.setVirtualCores(4); + capability.setGpuCores(4); req.setResource(capability); RegisterNodeManagerResponse response3 = resourceTrackerService.registerNodeManager(req); @@ -415,6 +416,7 @@ public class TestResourceTrackerService { capability.setMemory(2048); capability.setVirtualCores(4); + capability.setGpuCores(4); req.setResource(capability); RegisterNodeManagerResponse response4 = resourceTrackerService.registerNodeManager(req); @@ -513,7 +515,7 @@ public class TestResourceTrackerService { NMContainerStatus.newInstance( ContainerId.newContainerId( ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), - ContainerState.COMPLETE, Resource.newInstance(1024, 1), + ContainerState.COMPLETE, Resource.newInstance(1024, 1, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event) any()); @@ -524,7 +526,7 @@ public class TestResourceTrackerService { currentAttempt.setMasterContainer(null); report = NMContainerStatus.newInstance( ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), - ContainerState.COMPLETE, Resource.newInstance(1024, 1), + ContainerState.COMPLETE, Resource.newInstance(1024, 1, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event)any()); @@ -536,7 +538,7 @@ public class TestResourceTrackerService { report = NMContainerStatus.newInstance( ContainerId.newContainerId( ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), - ContainerState.COMPLETE, Resource.newInstance(1024, 1), + ContainerState.COMPLETE, Resource.newInstance(1024, 1, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { rm.getResourceTrackerService().handleNMContainerStatus(report, null); @@ -551,7 +553,7 @@ public class TestResourceTrackerService { currentAttempt.setMasterContainer(null); report = NMContainerStatus.newInstance( ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), - ContainerState.COMPLETE, Resource.newInstance(1024, 1), + ContainerState.COMPLETE, Resource.newInstance(1024, 1, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { rm.getResourceTrackerService().handleNMContainerStatus(report, null); @@ -628,7 +630,7 @@ public class TestResourceTrackerService { // reconnect of node with changed capability and running applications List runningApps = new ArrayList(); runningApps.add(ApplicationId.newInstance(1, 0)); - nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); + nm1 = rm.registerNode("host2:5678", 15360, 2, 2, runningApps); dispatcher.await(); response = nm1.nodeHeartbeat(true); dispatcher.await(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java index f827bf4..9a170be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java @@ -168,7 +167,7 @@ public class TestRMApplicationHistoryWriter { when(container.getAllocatedNode()).thenReturn( NodeId.newInstance("test host", -100)); when(container.getAllocatedResource()).thenReturn( - Resource.newInstance(-1, -1)); + Resource.newInstance(-1, -1, -1)); when(container.getAllocatedPriority()).thenReturn(Priority.UNDEFINED); when(container.getCreationTime()).thenReturn(0L); when(container.getFinishTime()).thenReturn(1L); @@ -299,7 +298,7 @@ public class TestRMApplicationHistoryWriter { Assert.assertNotNull(containerHD); Assert.assertEquals(NodeId.newInstance("test host", -100), containerHD.getAssignedNode()); - Assert.assertEquals(Resource.newInstance(-1, -1), + Assert.assertEquals(Resource.newInstance(-1, -1, -1), containerHD.getAllocatedResource()); Assert.assertEquals(Priority.UNDEFINED, containerHD.getPriority()); Assert.assertEquals(0L, container.getCreationTime()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index f8d92aa..e1e85c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -178,7 +177,7 @@ public abstract class MockAsm extends MockApps { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0, 0), 0, 0, 0, 0, 0); } @Override @@ -283,7 +282,7 @@ public abstract class MockAsm extends MockApps { String clientUserName, boolean allowAccess) { ApplicationResourceUsageReport usageReport = ApplicationResourceUsageReport.newInstance(0, 0, null, null, null, - 0, 0); + 0, 0, 0); ApplicationReport report = ApplicationReport.newInstance( getApplicationId(), appAttemptId, getUser(), getQueue(), getName(), null, 0, null, null, getDiagnostics().toString(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java index 7ed3835..bc07386 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java @@ -160,6 +160,10 @@ public class TestSystemMetricsPublisher { app.getRMAppMetrics().getVcoreSeconds(), Long.parseLong(entity.getOtherInfo() .get(ApplicationMetricsConstants.APP_CPU_METRICS).toString())); + Assert.assertEquals( + app.getRMAppMetrics().getGcoreSeconds(), + Long.parseLong(entity.getOtherInfo() + .get(ApplicationMetricsConstants.APP_GPU_METRICS).toString())); } boolean hasCreatedEvent = false; boolean hasFinishedEvent = false; @@ -303,6 +307,10 @@ public class TestSystemMetricsPublisher { entity.getOtherInfo().get( ContainerMetricsConstants.ALLOCATED_VCORE_ENTITY_INFO)); Assert.assertEquals( + container.getAllocatedResource().getGpuCores(), + entity.getOtherInfo().get( + ContainerMetricsConstants.ALLOCATED_GCORE_ENTITY_INFO)); + Assert.assertEquals( container.getAllocatedPriority().getPriority(), entity.getOtherInfo().get( ContainerMetricsConstants.ALLOCATED_PRIORITY_ENTITY_INFO)); @@ -351,7 +359,7 @@ public class TestSystemMetricsPublisher { when(app.getFinalApplicationStatus()).thenReturn( FinalApplicationStatus.UNDEFINED); when(app.getRMAppMetrics()).thenReturn( - new RMAppMetrics(null, 0, 0, Integer.MAX_VALUE, Long.MAX_VALUE)); + new RMAppMetrics(null, 0, 0, Integer.MAX_VALUE, Long.MAX_VALUE, Long.MAX_VALUE)); return app; } @@ -378,7 +386,7 @@ public class TestSystemMetricsPublisher { when(container.getAllocatedNode()).thenReturn( NodeId.newInstance("test host", -100)); when(container.getAllocatedResource()).thenReturn( - Resource.newInstance(-1, -1)); + Resource.newInstance(-1, -1, -1)); when(container.getAllocatedPriority()).thenReturn(Priority.UNDEFINED); when(container.getCreationTime()).thenReturn(Integer.MAX_VALUE + 1L); when(container.getFinishTime()).thenReturn(Integer.MAX_VALUE + 2L); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java index 8f5237e..14abe17 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java @@ -818,7 +804,7 @@ public class TestProportionalCapacityPreemptionPolicy { ProportionalCapacityPreemptionPolicy policy = buildPolicy(qData); // Subtracting Label X resources from cluster resources when(lm.getResourceByLabel(anyString(), any(Resource.class))).thenReturn( - Resources.clone(Resource.newInstance(80, 0))); + Resources.clone(Resource.newInstance(80, 0, 0))); clusterResources.setMemory(100); policy.editSchedule(); @@ -938,7 +924,7 @@ public class TestProportionalCapacityPreemptionPolicy { when(mCS.getRootQueue()).thenReturn(mRoot); clusterResources = - Resource.newInstance(leafAbsCapacities(qData[0], qData[7]), 0); + Resource.newInstance(leafAbsCapacities(qData[0], qData[7]), 0, 0); when(mCS.getClusterResource()).thenReturn(clusterResources); return policy; } @@ -1030,7 +1016,7 @@ public class TestProportionalCapacityPreemptionPolicy { List appAttemptIdList = new ArrayList(); when(lq.getTotalResourcePending()).thenReturn( - Resource.newInstance(pending[i], 0)); + Resource.newInstance(pending[i], 0, 0)); // consider moving where CapacityScheduler::comparator accessible NavigableSet qApps = new TreeSet( new Comparator() { @@ -1073,7 +1059,7 @@ public class TestProportionalCapacityPreemptionPolicy { when(app.getApplicationAttemptId()).thenReturn(appAttId); int cAlloc = 0; - Resource unit = Resource.newInstance(gran, 0); + Resource unit = Resource.newInstance(gran, 0, 0); List cReserved = new ArrayList(); for (int i = 0; i < reserved; i += gran) { cReserved.add(mockContainer(appAttId, cAlloc, unit, priority.CONTAINER diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java index 8a37c24..dbb29a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java @@ -41,9 +41,9 @@ import com.google.common.collect.ImmutableSet; public class TestRMNodeLabelsManager extends NodeLabelTestBase { - private final Resource EMPTY_RESOURCE = Resource.newInstance(0, 0); - private final Resource SMALL_RESOURCE = Resource.newInstance(100, 0); - private final Resource LARGE_NODE = Resource.newInstance(1000, 0); + private final Resource EMPTY_RESOURCE = Resource.newInstance(0, 0, 0); + private final Resource SMALL_RESOURCE = Resource.newInstance(100, 0, 0); + private final Resource LARGE_NODE = Resource.newInstance(1000, 0, 0); NullRMNodeLabelsManager mgr = null; @@ -207,7 +206,7 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { @Test(timeout=5000) public void testGetQueueResource() throws Exception { - Resource clusterResource = Resource.newInstance(9999, 1); + Resource clusterResource = Resource.newInstance(9999, 1, 1); /* * Node->Labels: @@ -490,11 +489,11 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { @Test(timeout = 5000) public void testPullRMNodeLabelsInfo() throws IOException { mgr.addToCluserNodeLabels(toSet("x", "y", "z")); - mgr.activateNode(NodeId.newInstance("n1", 1), Resource.newInstance(10, 0)); - mgr.activateNode(NodeId.newInstance("n2", 1), Resource.newInstance(10, 0)); - mgr.activateNode(NodeId.newInstance("n3", 1), Resource.newInstance(10, 0)); - mgr.activateNode(NodeId.newInstance("n4", 1), Resource.newInstance(10, 0)); - mgr.activateNode(NodeId.newInstance("n5", 1), Resource.newInstance(10, 0)); + mgr.activateNode(NodeId.newInstance("n1", 1), Resource.newInstance(10, 0, 0)); + mgr.activateNode(NodeId.newInstance("n2", 1), Resource.newInstance(10, 0, 0)); + mgr.activateNode(NodeId.newInstance("n3", 1), Resource.newInstance(10, 0, 0)); + mgr.activateNode(NodeId.newInstance("n4", 1), Resource.newInstance(10, 0, 0)); + mgr.activateNode(NodeId.newInstance("n5", 1), Resource.newInstance(10, 0, 0)); mgr.replaceLabelsOnNode(ImmutableMap.of(toNodeId("n1"), toSet("x"), toNodeId("n2"), toSet("x"), toNodeId("n3"), toSet("y"))); @@ -510,7 +509,7 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { @Test(timeout = 5000) public void testLabelsToNodesOnNodeActiveDeactive() throws Exception { // Activate a node without assigning any labels - mgr.activateNode(NodeId.newInstance("n1", 1), Resource.newInstance(10, 0)); + mgr.activateNode(NodeId.newInstance("n1", 1), Resource.newInstance(10, 0, 0)); Assert.assertTrue(mgr.getLabelsToNodes().isEmpty()); assertLabelsToNodesEquals( mgr.getLabelsToNodes(), transposeNodeToLabels(mgr.getNodeLabels())); @@ -524,7 +523,7 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { mgr.getLabelsToNodes(), transposeNodeToLabels(mgr.getNodeLabels())); // Activate a node for which host to label mapping exists - mgr.activateNode(NodeId.newInstance("n1", 2), Resource.newInstance(10, 0)); + mgr.activateNode(NodeId.newInstance("n1", 2), Resource.newInstance(10, 0, 0)); // p1 -> n1, n1:1, n1:2 Assert.assertEquals(3, mgr.getLabelsToNodes().get("p1").size()); assertLabelsToNodesEquals( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java index bb53f8e..725f37e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java @@ -179,7 +172,7 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{ when(mockAttempt.getRMAppAttemptMetrics()) .thenReturn(mockRmAppAttemptMetrics); when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) - .thenReturn(new AggregateAppResourceUsage(0, 0)); + .thenReturn(new AggregateAppResourceUsage(0, 0, 0)); dispatcher.attemptId = attemptId; store.storeNewApplicationAttempt(mockAttempt); waitNotify(dispatcher); @@ -268,7 +261,7 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{ when(mockRemovedAttempt.getRMAppAttemptMetrics()) .thenReturn(mockRmAppAttemptMetrics); when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) - .thenReturn(new AggregateAppResourceUsage(0,0)); + .thenReturn(new AggregateAppResourceUsage(0,0,0)); attempts.put(attemptIdRemoved, mockRemovedAttempt); store.removeApplication(mockRemovedApp); @@ -344,7 +337,7 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{ oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.SUCCEEDED, 100, - oldAttemptState.getFinishTime(), 0, 0); + oldAttemptState.getFinishTime(), 0, 0, 0); store.updateApplicationAttemptState(newAttemptState); // test updating the state of an app/attempt whose initial state was not @@ -368,7 +361,7 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{ oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.SUCCEEDED, 111, - oldAttemptState.getFinishTime(), 0, 0); + oldAttemptState.getFinishTime(), 0, 0, 0); store.updateApplicationAttemptState(dummyAttempt); // let things settle down diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java index 991c35f..32ff620 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java @@ -307,7 +307,7 @@ public class TestZKRMStateStore extends RMStateStoreTestBase { when(mockAttempt.getRMAppAttemptMetrics()) .thenReturn(mockRmAppAttemptMetrics); when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) - .thenReturn(new AggregateAppResourceUsage(0,0)); + .thenReturn(new AggregateAppResourceUsage(0,0,0)); store.storeNewApplicationAttempt(mockAttempt); assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState()); @@ -319,7 +319,7 @@ public class TestZKRMStateStore extends RMStateStoreTestBase { store.getCredentialsFromAppAttempt(mockAttempt), startTime, RMAppAttemptState.FINISHED, "testUrl", "test", FinalApplicationStatus.SUCCEEDED, 100, - finishTime, 0, 0); + finishTime, 0, 0, 0); store.updateApplicationAttemptState(newAttemptState); assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java index bfaf06b..4b7912f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java @@ -333,7 +331,7 @@ public class ReservationSystemTestUtil { int par = (rand.nextInt(1000) + 1) * gang; long dur = rand.nextInt(2 * 3600 * 1000); // random duration within 2h ReservationRequest r = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), par, + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), par, gang, dur); ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setReservationResources(Collections.singletonList(r)); @@ -365,7 +363,7 @@ public class ReservationSystemTestUtil { int par = 100000; // 100k tasks long dur = rand.nextInt(60 * 1000); // 1min tasks ReservationRequest r = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), par, + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), par, gang, dur); ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setReservationResources(Collections.singletonList(r)); @@ -385,14 +383,14 @@ public class ReservationSystemTestUtil { for (int i = 0; i < alloc.length; i++) { req.put(new ReservationInterval(startTime + i * step, startTime + (i + 1) * step), ReservationRequest.newInstance( - Resource.newInstance(1024, 1), alloc[i])); + Resource.newInstance(1024, 1, 1), alloc[i])); } return req; } public static Resource calculateClusterResource(int numContainers) { Resource clusterResource = Resource.newInstance(numContainers * 1024, - numContainers); + numContainers, numContainers); return clusterResource; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java index 61561e9..1c0e6c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java @@ -69,9 +67,9 @@ public class TestCapacityOverTimePolicy { instConstraint = 70; initTime = System.currentTimeMillis(); - minAlloc = Resource.newInstance(1024, 1); + minAlloc = Resource.newInstance(1024, 1, 1); res = new DefaultResourceCalculator(); - maxAlloc = Resource.newInstance(1024 * 8, 8); + maxAlloc = Resource.newInstance(1024 * 8, 8, 8); mAgent = mock(ReservationAgent.class); ReservationSystemTestUtil testUtil = new ReservationSystemTestUtil(); @@ -203,7 +201,7 @@ public class TestCapacityOverTimePolicy { long win = timeWindow / 2 + 100; int cont = (int) Math.ceil(0.5 * totCont); req.put(new ReservationInterval(initTime, initTime + win), - ReservationRequest.newInstance(Resource.newInstance(1024, 1), cont)); + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), cont)); assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation( @@ -219,7 +217,7 @@ public class TestCapacityOverTimePolicy { long win = 86400000 / 4 + 1; int cont = (int) Math.ceil(0.5 * totCont); req.put(new ReservationInterval(initTime, initTime + win), - ReservationRequest.newInstance(Resource.newInstance(1024, 1), cont)); + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), cont)); assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation( ReservationSystemTestUtil.getNewReservationId(), null, "u1", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacitySchedulerPlanFollower.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacitySchedulerPlanFollower.java index b8663f6..08adac2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacitySchedulerPlanFollower.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacitySchedulerPlanFollower.java @@ -96,9 +92,9 @@ public class TestCapacitySchedulerPlanFollower extends TestSchedulerPlanFollower when(csContext.getMinimumResourceCapability()).thenReturn(minAlloc); when(csContext.getMaximumResourceCapability()).thenReturn(maxAlloc); when(csContext.getClusterResource()).thenReturn( - Resources.createResource(100 * 16 * GB, 100 * 32)); + Resources.createResource(100 * 16 * GB, 100 * 32, 100 * 32)); when(scheduler.getClusterResource()).thenReturn( - Resources.createResource(125 * GB, 125)); + Resources.createResource(125 * GB, 125, 125)); when(csContext.getResourceCalculator()).thenReturn( new DefaultResourceCalculator()); RMContainerTokenSecretManager containerTokenSecretManager = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestGreedyReservationAgent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestGreedyReservationAgent.java index b8cf6c5..a56310c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestGreedyReservationAgent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestGreedyReservationAgent.java @@ -51,9 +51,9 @@ ReservationAgent agent; InMemoryPlan plan; - Resource minAlloc = Resource.newInstance(1024, 1); + Resource minAlloc = Resource.newInstance(1024, 1, 1); ResourceCalculator res = new DefaultResourceCalculator(); - Resource maxAlloc = Resource.newInstance(1024 * 8, 8); + Resource maxAlloc = Resource.newInstance(1024 * 8, 8, 8); Random rand = new Random(); long step; @@ -66,7 +64,7 @@ public class TestGreedyReservationAgent { // setting completely loose quotas long timeWindow = 1000000L; - Resource clusterCapacity = Resource.newInstance(100 * 1024, 100); + Resource clusterCapacity = Resource.newInstance(100 * 1024, 100, 100); step = 1000L; ReservationSystemTestUtil testUtil = new ReservationSystemTestUtil(); String reservationQ = testUtil.getFullReservationQueueName(); @@ -98,7 +96,7 @@ public class TestGreedyReservationAgent { rr.setArrival(5 * step); rr.setDeadline(20 * step); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 10, 5, 10 * step); + Resource.newInstance(2048, 2, 2), 10, 5, 10 * step); ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setReservationResources(Collections.singletonList(r)); rr.setReservationRequests(reqs); @@ -122,7 +120,7 @@ public class TestGreedyReservationAgent { assertTrue( "Agent-based allocation unexpected", Resources.equals(cs.getResourcesAtTime(i), - Resource.newInstance(2048 * 10, 2 * 10))); + Resource.newInstance(2048 * 10, 2 * 10, 2 * 10))); } } @@ -148,9 +146,9 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ORDER); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 10, 1, 10 * step); + Resource.newInstance(2048, 2, 2), 10, 1, 10 * step); ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 10, 10, 20 * step); + Resource.newInstance(1024, 1, 1), 10, 10, 20 * step); List list = new ArrayList(); list.add(r); list.add(r2); @@ -204,9 +202,9 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ORDER_NO_GAP); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 10, 1, 10); + Resource.newInstance(2048, 2, 2), 10, 1, 10); ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 10, 10, 20); + Resource.newInstance(1024, 1, 1), 10, 10, 20); List list = new ArrayList(); list.add(r); list.add(r2); @@ -249,9 +247,9 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ORDER_NO_GAP); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 10, 1, 10 * step); + Resource.newInstance(2048, 2, 2), 10, 1, 10 * step); ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 10, 10, 20 * step); + Resource.newInstance(1024, 1, 1), 10, 10, 20 * step); List list = new ArrayList(); list.add(r); list.add(r2); @@ -296,7 +294,7 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ALL); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 200, 10, 10 * step); + Resource.newInstance(1024, 1, 1), 200, 10, 10 * step); List list = new ArrayList(); list.add(r); @@ -337,11 +335,11 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ANY); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 5, 5, 10 * step); + Resource.newInstance(1024, 1, 1), 5, 5, 10 * step); ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 10, 5, 10 * step); + Resource.newInstance(2048, 2, 2), 10, 5, 10 * step); ReservationRequest r3 = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 110, 110, 10 * step); + Resource.newInstance(1024, 1, 1), 110, 110, 10 * step); List list = new ArrayList(); list.add(r); @@ -383,10 +381,10 @@ public class TestGreedyReservationAgent { // longer than arrival-deadline ReservationRequest r1 = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 35, 5, 30); + Resource.newInstance(1024, 1, 1), 35, 5, 30); // above max cluster size ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 110, 110, 10); + Resource.newInstance(1024, 1, 1), 110, 110, 10); List list = new ArrayList(); list.add(r1); @@ -426,9 +424,9 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ALL); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 5, 5, 10 * step); + Resource.newInstance(1024, 1, 1), 5, 5, 10 * step); ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 10, 10, 20 * step); + Resource.newInstance(2048, 2, 2), 10, 10, 20 * step); List list = new ArrayList(); list.add(r); @@ -469,9 +467,9 @@ public class TestGreedyReservationAgent { ReservationRequests reqs = new ReservationRequestsPBImpl(); reqs.setInterpreter(ReservationRequestInterpreter.R_ALL); ReservationRequest r = ReservationRequest.newInstance( - Resource.newInstance(1024, 1), 55, 5, 10); + Resource.newInstance(1024, 1, 1), 55, 5, 10); ReservationRequest r2 = ReservationRequest.newInstance( - Resource.newInstance(2048, 2), 55, 5, 20); + Resource.newInstance(2048, 2, 2), 55, 5, 20); List list = new ArrayList(); list.add(r); @@ -535,7 +533,7 @@ public class TestGreedyReservationAgent { for (long i = start; i < end; i++) { res = res && Resources.equals(cs.getResourcesAtTime(i), - Resource.newInstance(mem * containers, cores * containers)); + Resource.newInstance(mem * containers, cores * containers, cores * containers)); } return res; } @@ -543,7 +541,7 @@ public class TestGreedyReservationAgent { public void testStress(int numJobs) throws PlanningException, IOException { long timeWindow = 1000000L; - Resource clusterCapacity = Resource.newInstance(500 * 100 * 1024, 500 * 32); + Resource clusterCapacity = Resource.newInstance(500 * 100 * 1024, 500 * 32, 500 * 32); step = 1000L; ReservationSystemTestUtil testUtil = new ReservationSystemTestUtil(); CapacityScheduler scheduler = testUtil.mockCapacityScheduler(500 * 100); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryPlan.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryPlan.java index 91c1962..9f7de58 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryPlan.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryPlan.java @@ -61,9 +61,9 @@ public class TestInMemoryPlan { @Before public void setUp() throws PlanningException { resCalc = new DefaultResourceCalculator(); - minAlloc = Resource.newInstance(1024, 1); - maxAlloc = Resource.newInstance(64 * 1024, 20); - totalCapacity = Resource.newInstance(100 * 1024, 100); + minAlloc = Resource.newInstance(1024, 1, 1); + maxAlloc = Resource.newInstance(64 * 1024, 20, 20); + totalCapacity = Resource.newInstance(100 * 1024, 100, 100); clock = mock(Clock.class); queueMetrics = mock(QueueMetrics.class); @@ -111,10 +111,10 @@ public class TestInMemoryPlan { } doAssertions(plan, rAllocation); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), - plan.getTotalCommittedResources(start + i)); - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), - plan.getConsumptionForUser(user, start + i)); + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), + plan.getTotalCommittedResources(start + i)); + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), + plan.getConsumptionForUser(user, start + i)); } } @@ -169,10 +169,10 @@ public class TestInMemoryPlan { } doAssertions(plan, rAllocation); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), - plan.getTotalCommittedResources(start + i)); - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), - plan.getConsumptionForUser(user, start + i)); + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), + plan.getTotalCommittedResources(start + i)); + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), + plan.getConsumptionForUser(user, start + i)); } // Try to add it again @@ -213,10 +213,10 @@ public class TestInMemoryPlan { } doAssertions(plan, rAllocation); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), - plan.getTotalCommittedResources(start + i)); - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), - plan.getConsumptionForUser(user, start + i)); + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), + plan.getTotalCommittedResources(start + i)); + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), + plan.getConsumptionForUser(user, start + i)); } // Now update it @@ -237,11 +237,11 @@ public class TestInMemoryPlan { doAssertions(plan, rAllocation); for (int i = 0; i < updatedAlloc.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (updatedAlloc[i] + i), updatedAlloc[i] - + i), plan.getTotalCommittedResources(start + i)); + Resource.newInstance(1024 * (updatedAlloc[i] + i), updatedAlloc[i] + + i, updatedAlloc[i] + i), plan.getTotalCommittedResources(start + i)); Assert.assertEquals( - Resource.newInstance(1024 * (updatedAlloc[i] + i), updatedAlloc[i] - + i), plan.getConsumptionForUser(user, start + i)); + Resource.newInstance(1024 * (updatedAlloc[i] + i), updatedAlloc[i] + + i, updatedAlloc[i] + i), plan.getConsumptionForUser(user, start + i)); } } @@ -302,10 +302,10 @@ public class TestInMemoryPlan { doAssertions(plan, rAllocation); for (int i = 0; i < alloc.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i)), - plan.getTotalCommittedResources(start + i)); + Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i), (alloc[i] + i)), + plan.getTotalCommittedResources(start + i)); Assert.assertEquals( - Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i)), + Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i), (alloc[i] + i)), plan.getConsumptionForUser(user, start + i)); } @@ -317,9 +317,9 @@ public class TestInMemoryPlan { } Assert.assertNull(plan.getReservationById(reservationID)); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), plan.getTotalCommittedResources(start + i)); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), plan.getConsumptionForUser(user, start + i)); } } @@ -372,10 +372,10 @@ public class TestInMemoryPlan { doAssertions(plan, rAllocation); for (int i = 0; i < alloc1.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc1[i]), (alloc1[i])), + Resource.newInstance(1024 * (alloc1[i]), (alloc1[i]), (alloc1[i])), plan.getTotalCommittedResources(start + i)); Assert.assertEquals( - Resource.newInstance(1024 * (alloc1[i]), (alloc1[i])), + Resource.newInstance(1024 * (alloc1[i]), (alloc1[i]), (alloc1[i])), plan.getConsumptionForUser(user, start + i)); } @@ -402,10 +402,10 @@ public class TestInMemoryPlan { for (int i = 0; i < alloc2.length; i++) { Assert.assertEquals( Resource.newInstance(1024 * (alloc1[i] + alloc2[i] + i), alloc1[i] - + alloc2[i] + i), plan.getTotalCommittedResources(start + i)); + + alloc2[i] + i, alloc1[i] + alloc2[i] + i), plan.getTotalCommittedResources(start + i)); Assert.assertEquals( Resource.newInstance(1024 * (alloc1[i] + alloc2[i] + i), alloc1[i] - + alloc2[i] + i), plan.getConsumptionForUser(user, start + i)); + + alloc2[i] + i, alloc1[i] + alloc2[i] + i), plan.getConsumptionForUser(user, start + i)); } // Now archive completed reservations @@ -422,10 +422,10 @@ public class TestInMemoryPlan { Assert.assertNull(plan.getReservationById(reservationID2)); for (int i = 0; i < alloc1.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc1[i]), (alloc1[i])), + Resource.newInstance(1024 * (alloc1[i]), (alloc1[i]), (alloc1[i])), plan.getTotalCommittedResources(start + i)); Assert.assertEquals( - Resource.newInstance(1024 * (alloc1[i]), (alloc1[i])), + Resource.newInstance(1024 * (alloc1[i]), (alloc1[i]), (alloc1[i])), plan.getConsumptionForUser(user, start + i)); } when(clock.getTime()).thenReturn(107L); @@ -438,9 +438,9 @@ public class TestInMemoryPlan { } Assert.assertNull(plan.getReservationById(reservationID1)); for (int i = 0; i < alloc1.length; i++) { - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), plan.getTotalCommittedResources(start + i)); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), plan.getConsumptionForUser(user, start + i)); } } @@ -484,7 +484,7 @@ public class TestInMemoryPlan { numContainers = alloc[i]; } ReservationRequest rr = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), (numContainers)); req.put(new ReservationInterval(startTime + i, startTime + i + 1), rr); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryReservationAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryReservationAllocation.java index 76f39dc..bdec1a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryReservationAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestInMemoryReservationAllocation.java @@ -49,7 +49,7 @@ public class TestInMemoryReservationAllocation { @Before public void setUp() { resCalc = new DefaultResourceCalculator(); - minAlloc = Resource.newInstance(1, 1); + minAlloc = Resource.newInstance(1, 1, 1); } @After @@ -77,7 +77,7 @@ public class TestInMemoryReservationAllocation { doAssertions(rAllocation, reservationID, rDef, allocations, start, alloc); Assert.assertFalse(rAllocation.containsGangs()); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), rAllocation.getResourcesAtTime(start + i)); } } @@ -100,7 +100,7 @@ public class TestInMemoryReservationAllocation { Assert.assertFalse(rAllocation.containsGangs()); for (int i = 0; i < alloc.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i)), + Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i), (alloc[i] + i)), rAllocation.getResourcesAtTime(start + i)); } } @@ -123,7 +123,7 @@ public class TestInMemoryReservationAllocation { Assert.assertFalse(rAllocation.containsGangs()); for (int i = 0; i < alloc.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i)), + Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i), (alloc[i] + i)), rAllocation.getResourcesAtTime(start + i)); } } @@ -164,7 +164,7 @@ public class TestInMemoryReservationAllocation { doAssertions(rAllocation, reservationID, rDef, allocations, start, alloc); Assert.assertTrue(rAllocation.containsGangs()); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), rAllocation.getResourcesAtTime(start + i)); } } @@ -186,7 +186,7 @@ public class TestInMemoryReservationAllocation { long deadline, long duration) { // create a request with a single atomic ask ReservationRequest r = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), 1, 1, + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), 1, 1, duration); ReservationDefinition rDef = new ReservationDefinitionPBImpl(); ReservationRequests reqs = new ReservationRequestsPBImpl(); @@ -210,7 +210,7 @@ public class TestInMemoryReservationAllocation { numContainers = alloc[i]; } ReservationRequest rr = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), (numContainers)); if (isGang) { rr.setConcurrency(numContainers); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java index 1e15618..16fa844 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java @@ -53,9 +53,9 @@ public class TestNoOverCommitPolicy { step = 1000L; initTime = System.currentTimeMillis(); - minAlloc = Resource.newInstance(1024, 1); + minAlloc = Resource.newInstance(1024, 1, 1); res = new DefaultResourceCalculator(); - maxAlloc = Resource.newInstance(1024 * 8, 8); + maxAlloc = Resource.newInstance(1024 * 8, 8, 8); mAgent = mock(ReservationAgent.class); ReservationSystemTestUtil testUtil = new ReservationSystemTestUtil(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestRLESparseResourceAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestRLESparseResourceAllocation.java index c7301c7..cf29144 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestRLESparseResourceAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestRLESparseResourceAllocation.java @@ -40,7 +40,7 @@ public class TestRLESparseResourceAllocation { @Test public void testBlocks() { ResourceCalculator resCalc = new DefaultResourceCalculator(); - Resource minAlloc = Resource.newInstance(1, 1); + Resource minAlloc = Resource.newInstance(1, 1, 1); RLESparseResourceAllocation rleSparseVector = new RLESparseResourceAllocation(resCalc, minAlloc); @@ -53,22 +53,22 @@ public class TestRLESparseResourceAllocation { } LOG.info(rleSparseVector.toString()); Assert.assertFalse(rleSparseVector.isEmpty()); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(99)); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + alloc.length + 1)); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i])), + Assert.assertEquals(Resource.newInstance(1024 * (alloc[i]), (alloc[i]), (alloc[i])), rleSparseVector.getCapacityAtTime(start + i)); } - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + alloc.length + 2)); for (Entry ip : inputs) { rleSparseVector.removeInterval(ip.getKey(), ip.getValue()); } LOG.info(rleSparseVector.toString()); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + i)); } Assert.assertTrue(rleSparseVector.isEmpty()); @@ -77,7 +77,7 @@ public class TestRLESparseResourceAllocation { @Test public void testSteps() { ResourceCalculator resCalc = new DefaultResourceCalculator(); - Resource minAlloc = Resource.newInstance(1, 1); + Resource minAlloc = Resource.newInstance(1, 1, 1); RLESparseResourceAllocation rleSparseVector = new RLESparseResourceAllocation(resCalc, minAlloc); @@ -90,23 +90,23 @@ public class TestRLESparseResourceAllocation { } LOG.info(rleSparseVector.toString()); Assert.assertFalse(rleSparseVector.isEmpty()); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(99)); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + alloc.length + 1)); for (int i = 0; i < alloc.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i)), + Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i), (alloc[i] + i)), rleSparseVector.getCapacityAtTime(start + i)); } - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + alloc.length + 2)); for (Entry ip : inputs) { rleSparseVector.removeInterval(ip.getKey(), ip.getValue()); } LOG.info(rleSparseVector.toString()); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + i)); } Assert.assertTrue(rleSparseVector.isEmpty()); @@ -115,7 +115,7 @@ public class TestRLESparseResourceAllocation { @Test public void testSkyline() { ResourceCalculator resCalc = new DefaultResourceCalculator(); - Resource minAlloc = Resource.newInstance(1, 1); + Resource minAlloc = Resource.newInstance(1, 1, 1); RLESparseResourceAllocation rleSparseVector = new RLESparseResourceAllocation(resCalc, minAlloc); @@ -128,23 +128,23 @@ public class TestRLESparseResourceAllocation { } LOG.info(rleSparseVector.toString()); Assert.assertFalse(rleSparseVector.isEmpty()); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(99)); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + alloc.length + 1)); for (int i = 0; i < alloc.length; i++) { Assert.assertEquals( - Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i)), + Resource.newInstance(1024 * (alloc[i] + i), (alloc[i] + i), (alloc[i] + i)), rleSparseVector.getCapacityAtTime(start + i)); } - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + alloc.length + 2)); for (Entry ip : inputs) { rleSparseVector.removeInterval(ip.getKey(), ip.getValue()); } LOG.info(rleSparseVector.toString()); for (int i = 0; i < alloc.length; i++) { - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(start + i)); } Assert.assertTrue(rleSparseVector.isEmpty()); @@ -153,13 +153,13 @@ public class TestRLESparseResourceAllocation { @Test public void testZeroAlloaction() { ResourceCalculator resCalc = new DefaultResourceCalculator(); - Resource minAlloc = Resource.newInstance(1, 1); + Resource minAlloc = Resource.newInstance(1, 1, 1); RLESparseResourceAllocation rleSparseVector = new RLESparseResourceAllocation(resCalc, minAlloc); rleSparseVector.addInterval(new ReservationInterval(0, Long.MAX_VALUE), - ReservationRequest.newInstance(Resource.newInstance(0, 0), (0))); + ReservationRequest.newInstance(Resource.newInstance(0, 0, 0), (0))); LOG.info(rleSparseVector.toString()); - Assert.assertEquals(Resource.newInstance(0, 0), + Assert.assertEquals(Resource.newInstance(0, 0, 0), rleSparseVector.getCapacityAtTime(new Random().nextLong())); Assert.assertTrue(rleSparseVector.isEmpty()); } @@ -177,7 +177,7 @@ public class TestRLESparseResourceAllocation { } req.put(new ReservationInterval(startTime + i, startTime + i + 1), - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), (numContainers))); } return req; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationInputValidator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationInputValidator.java index 93adf74..d4d1938 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationInputValidator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationInputValidator.java @@ -74,7 +74,7 @@ public class TestReservationInputValidator { rrValidator = new ReservationInputValidator(clock); when(clock.getTime()).thenReturn(1L); ResourceCalculator rCalc = new DefaultResourceCalculator(); - Resource resource = Resource.newInstance(10240, 10); + Resource resource = Resource.newInstance(10240, 10, 10); when(plan.getResourceCalculator()).thenReturn(rCalc); when(plan.getTotalCapacity()).thenReturn(resource); when(rSystem.getQueueForReservation(any(ReservationId.class))).thenReturn( @@ -248,7 +248,7 @@ public class TestReservationInputValidator { public void testSubmitReservationExceedsGangSize() { ReservationSubmissionRequest request = createSimpleReservationSubmissionRequest(1, 1, 1, 5, 4); - Resource resource = Resource.newInstance(512, 1); + Resource resource = Resource.newInstance(512, 1, 1); when(plan.getTotalCapacity()).thenReturn(resource); Plan plan = null; try { @@ -429,7 +429,7 @@ public class TestReservationInputValidator { public void testUpdateReservationExceedsGangSize() { ReservationUpdateRequest request = createSimpleReservationUpdateRequest(1, 1, 1, 5, 4); - Resource resource = Resource.newInstance(512, 1); + Resource resource = Resource.newInstance(512, 1, 1); when(plan.getTotalCapacity()).thenReturn(resource); Plan plan = null; try { @@ -537,7 +537,7 @@ public class TestReservationInputValidator { rDef.setReservationRequests(reqs); if (numContainers > 0) { ReservationRequest r = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), numContainers, 1, duration); reqs.setReservationResources(Collections.singletonList(r)); @@ -562,7 +562,7 @@ public class TestReservationInputValidator { rDef.setReservationRequests(reqs); if (numContainers > 0) { ReservationRequest r = - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), numContainers, 1, duration); reqs.setReservationResources(Collections.singletonList(r)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSchedulerPlanFollowerBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSchedulerPlanFollowerBase.java index 50df8fe..a3f14a7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSchedulerPlanFollowerBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSchedulerPlanFollowerBase.java @@ -45,8 +45,8 @@ public abstract class TestSchedulerPlanFollowerBase { protected Clock mClock = null; protected ResourceScheduler scheduler = null; protected ReservationAgent mAgent; - protected Resource minAlloc = Resource.newInstance(GB, 1); - protected Resource maxAlloc = Resource.newInstance(GB * 8, 8); + protected Resource minAlloc = Resource.newInstance(GB, 1, 1); + protected Resource maxAlloc = Resource.newInstance(GB * 8, 8, 8); protected CapacityOverTimePolicy policy = new CapacityOverTimePolicy(); protected Plan plan; private ResourceCalculator res = new DefaultResourceCalculator(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSimpleCapacityReplanner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSimpleCapacityReplanner.java index 1ca9f2e..9cba032 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSimpleCapacityReplanner.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestSimpleCapacityReplanner.java @@ -42,9 +42,9 @@ @Test public void testReplanningPlanCapacityLoss() throws PlanningException { - Resource clusterCapacity = Resource.newInstance(100 * 1024, 10); - Resource minAlloc = Resource.newInstance(1024, 1); - Resource maxAlloc = Resource.newInstance(1024 * 8, 8); + Resource clusterCapacity = Resource.newInstance(100 * 1024, 10, 10); + Resource minAlloc = Resource.newInstance(1024, 1, 1); + Resource maxAlloc = Resource.newInstance(1024 * 8, 8, 8); ResourceCalculator res = new DefaultResourceCalculator(); long step = 1L; @@ -118,7 +116,7 @@ public class TestSimpleCapacityReplanner { minAlloc))); // remove some of the resources (requires replanning) - plan.setTotalCapacity(Resource.newInstance(70 * 1024, 70)); + plan.setTotalCapacity(Resource.newInstance(70 * 1024, 70, 70)); when(clock.getTime()).thenReturn(0L); @@ -152,7 +150,7 @@ public class TestSimpleCapacityReplanner { new TreeMap(); for (int i = 0; i < alloc.length; i++) { req.put(new ReservationInterval(startTime + i, startTime + i + 1), - ReservationRequest.newInstance(Resource.newInstance(1024, 1), + ReservationRequest.newInstance(Resource.newInstance(1024, 1, 1), alloc[i])); } return req; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java index f420b9e..4c2fd00 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java @@ -28,19 +28,25 @@ public class TestResourceWeights { ResourceWeights rw1 = new ResourceWeights(); Assert.assertEquals("Default CPU weight should be 0.0f.", 0.0f, rw1.getWeight(ResourceType.CPU), 0.00001f); + Assert.assertEquals("Default GPU weight should be 0.0f.", 0.0f, + rw1.getWeight(ResourceType.GPU), 0.00001f); Assert.assertEquals("Default memory weight should be 0.0f", 0.0f, rw1.getWeight(ResourceType.MEMORY), 0.00001f); ResourceWeights rw2 = new ResourceWeights(2.0f); Assert.assertEquals("The CPU weight should be 2.0f.", 2.0f, rw2.getWeight(ResourceType.CPU), 0.00001f); + Assert.assertEquals("The GPU weight should be 2.0f.", 2.0f, + rw2.getWeight(ResourceType.GPU), 0.00001f); Assert.assertEquals("The memory weight should be 2.0f", 2.0f, rw2.getWeight(ResourceType.MEMORY), 0.00001f); // set each individually - ResourceWeights rw3 = new ResourceWeights(1.5f, 2.0f); + ResourceWeights rw3 = new ResourceWeights(1.5f, 2.0f, 2.5f); Assert.assertEquals("The CPU weight should be 2.0f", 2.0f, rw3.getWeight(ResourceType.CPU), 0.00001f); + Assert.assertEquals("The GPU weight should be 2.5f", 2.5f, + rw3.getWeight(ResourceType.GPU), 0.00001f); Assert.assertEquals("The memory weight should be 1.5f", 1.5f, rw3.getWeight(ResourceType.MEMORY), 0.00001f); @@ -48,6 +54,9 @@ public class TestResourceWeights { rw3.setWeight(ResourceType.CPU, 2.5f); Assert.assertEquals("The CPU weight should be set to 2.5f.", 2.5f, rw3.getWeight(ResourceType.CPU), 0.00001f); + rw3.setWeight(ResourceType.GPU, 5.0f); + Assert.assertEquals("The GPU weight should be set to 5.0f.", 5.0f, + rw3.getWeight(ResourceType.GPU), 0.00001f); rw3.setWeight(ResourceType.MEMORY, 4.0f); Assert.assertEquals("The memory weight should be set to 4.0f.", 4.0f, rw3.getWeight(ResourceType.MEMORY), 0.00001f); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java index ae98660..3f63f41 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java @@ -24,20 +24,20 @@ public class TestResources { @Test(timeout=1000) public void testFitsIn() { - assertTrue(fitsIn(createResource(1, 1), createResource(2, 2))); - assertTrue(fitsIn(createResource(2, 2), createResource(2, 2))); - assertFalse(fitsIn(createResource(2, 2), createResource(1, 1))); - assertFalse(fitsIn(createResource(1, 2), createResource(2, 1))); - assertFalse(fitsIn(createResource(2, 1), createResource(1, 2))); + assertTrue(fitsIn(createResource(1, 1, 1), createResource(2, 2, 2))); + assertTrue(fitsIn(createResource(2, 2, 2), createResource(2, 2, 2))); + assertFalse(fitsIn(createResource(2, 2, 2), createResource(1, 1, 1))); + assertFalse(fitsIn(createResource(1, 2, 2), createResource(2, 1, 1))); + assertFalse(fitsIn(createResource(2, 1, 1), createResource(1, 2, 2))); } @Test(timeout=1000) public void testComponentwiseMin() { - assertEquals(createResource(1, 1), - componentwiseMin(createResource(1, 1), createResource(2, 2))); - assertEquals(createResource(1, 1), - componentwiseMin(createResource(2, 2), createResource(1, 1))); - assertEquals(createResource(1, 1), - componentwiseMin(createResource(1, 2), createResource(2, 1))); + assertEquals(createResource(1, 1, 1), + componentwiseMin(createResource(1, 1, 1), createResource(2, 2, 2))); + assertEquals(createResource(1, 1, 1), + componentwiseMin(createResource(2, 2, 2), createResource(1, 1, 1))); + assertEquals(createResource(1, 1, 1), + componentwiseMin(createResource(1, 2, 2), createResource(2, 1, 1))); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java index c837450..c18ed8f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java @@ -132,7 +132,7 @@ public class TestNMExpiry { String hostname1 = "localhost1"; String hostname2 = "localhost2"; String hostname3 = "localhost3"; - Resource capability = BuilderUtils.newResource(1024, 1); + Resource capability = BuilderUtils.newResource(1024, 1, 1); RegisterNodeManagerRequest request1 = recordFactory .newRecordInstance(RegisterNodeManagerRequest.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java index d16d551..2455c9e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java @@ -102,7 +102,7 @@ public class TestNMReconnect { @Test public void testReconnect() throws Exception { String hostname1 = "localhost1"; - Resource capability = BuilderUtils.newResource(1024, 1); + Resource capability = BuilderUtils.newResource(1024, 1, 1); RegisterNodeManagerRequest request1 = recordFactory .newRecordInstance(RegisterNodeManagerRequest.class); @@ -121,7 +121,7 @@ public class TestNMReconnect { rmNodeEvents.clear(); resourceTrackerService.registerNodeManager(request1); - capability = BuilderUtils.newResource(1024, 2); + capability = BuilderUtils.newResource(1024, 2, 2); request1.setResource(capability); Assert.assertEquals(RMNodeEventType.RECONNECTED, rmNodeEvents.get(0).getType()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java index 4f94695..4927f0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java @@ -94,7 +94,7 @@ public class TestRMNMRPCResponseId { @Test public void testRPCResponseId() throws IOException, YarnException { String node = "localhost"; - Resource capability = BuilderUtils.newResource(1024, 1); + Resource capability = BuilderUtils.newResource(1024, 1, 1); RegisterNodeManagerRequest request = recordFactory.newRecordInstance(RegisterNodeManagerRequest.class); nodeId = NodeId.newInstance(node, 1234); request.setNodeId(nodeId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 3ecab2a..bc5ac30 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -299,6 +299,7 @@ public class TestRMAppAttemptTransitions { mock(ApplicationResourceUsageReport.class); when(appResUsgRpt.getMemorySeconds()).thenReturn(0L); when(appResUsgRpt.getVcoreSeconds()).thenReturn(0L); + when(appResUsgRpt.getGcoreSeconds()).thenReturn(0L); when(resourceScheduler .getAppResourceUsageReport((ApplicationAttemptId)Matchers.any())) .thenReturn(appResUsgRpt); @@ -310,7 +311,7 @@ public class TestRMAppAttemptTransitions { final String queue = MockApps.newQueue(); submissionContext = mock(ApplicationSubmissionContext.class); when(submissionContext.getQueue()).thenReturn(queue); - Resource resource = BuilderUtils.newResource(1536, 1); + Resource resource = BuilderUtils.newResource(1536, 1, 1); ContainerLaunchContext amContainerSpec = BuilderUtils.newContainerLaunchContext(null, null, null, null, null, null); @@ -634,7 +635,7 @@ public class TestRMAppAttemptTransitions { // Mock the allocation of AM container Container container = mock(Container.class); - Resource resource = BuilderUtils.newResource(2048, 1); + Resource resource = BuilderUtils.newResource(2048, 1, 1); when(container.getId()).thenReturn( BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 1)); when(container.getResource()).thenReturn(resource); @@ -753,6 +754,7 @@ public class TestRMAppAttemptTransitions { mock(ApplicationResourceUsageReport.class); when(appResUsgRpt.getMemorySeconds()).thenReturn(123456L); when(appResUsgRpt.getVcoreSeconds()).thenReturn(55544L); + when(appResUsgRpt.getGcoreSeconds()).thenReturn(55544L); when(scheduler.getAppResourceUsageReport(any(ApplicationAttemptId.class))) .thenReturn(appResUsgRpt); @@ -768,10 +770,12 @@ public class TestRMAppAttemptTransitions { applicationAttempt.getApplicationResourceUsageReport(); Assert.assertEquals(123456L, report.getMemorySeconds()); Assert.assertEquals(55544L, report.getVcoreSeconds()); + Assert.assertEquals(55544L, report.getGcoreSeconds()); // finish app attempt and remove it from scheduler when(appResUsgRpt.getMemorySeconds()).thenReturn(223456L); when(appResUsgRpt.getVcoreSeconds()).thenReturn(75544L); + when(appResUsgRpt.getGcoreSeconds()).thenReturn(75544L); sendAttemptUpdateSavedEvent(applicationAttempt); NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( @@ -784,6 +788,7 @@ public class TestRMAppAttemptTransitions { report = applicationAttempt.getApplicationResourceUsageReport(); Assert.assertEquals(223456, report.getMemorySeconds()); Assert.assertEquals(75544, report.getVcoreSeconds()); + Assert.assertEquals(75544, report.getGcoreSeconds()); } @Test @@ -1517,7 +1522,7 @@ public class TestRMAppAttemptTransitions { (ResourceRequest) ((List) invocation.getArguments()[1]).get(0); // capacity shouldn't changed - assertEquals(Resource.newInstance(3333, 1), rr.getCapability()); + assertEquals(Resource.newInstance(3333, 1, 1), rr.getCapability()); assertEquals("label-expression", rr.getNodeLabelExpression()); // priority, #container, relax-locality will be changed @@ -1537,7 +1542,7 @@ public class TestRMAppAttemptTransitions { new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler, masterService, submissionContext, new Configuration(), true, ResourceRequest.newInstance( - Priority.UNDEFINED, "host1", Resource.newInstance(3333, 1), 3, + Priority.UNDEFINED, "host1", Resource.newInstance(3333, 1, 1), 3, false, "label-expression")); new RMAppAttemptImpl.ScheduleTransition().transition( (RMAppAttemptImpl) applicationAttempt, null); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java index 21aba3b..bf14d7f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java @@ -88,7 +81,7 @@ public class TestRMContainerImpl { ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class); - Resource resource = BuilderUtils.newResource(512, 1); + Resource resource = BuilderUtils.newResource(512, 1, 1); Priority priority = BuilderUtils.newPriority(5); Container container = BuilderUtils.newContainer(containerId, nodeId, @@ -184,7 +177,7 @@ public class TestRMContainerImpl { ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class); - Resource resource = BuilderUtils.newResource(512, 1); + Resource resource = BuilderUtils.newResource(512, 1, 1); Priority priority = BuilderUtils.newPriority(5); Container container = BuilderUtils.newContainer(containerId, nodeId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java index 48ce822..5f29230 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java @@ -185,7 +185,7 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { Assert.assertEquals(expectedMaxVCores[0], maxVCores); RMNode node1 = MockNodes.newNodeInfo( - 0, Resources.createResource(1024, node1MaxVCores), 1, "127.0.0.2"); + 0, Resources.createResource(1024, node1MaxVCores, node1MaxVCores), 1, "127.0.0.2"); scheduler.handle(new NodeAddedSchedulerEvent(node1)); Assert.assertEquals(1, scheduler.getNumClusterNodes()); maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores(); @@ -197,14 +197,14 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { Assert.assertEquals(expectedMaxVCores[2], maxVCores); RMNode node2 = MockNodes.newNodeInfo( - 0, Resources.createResource(1024, node2MaxVCores), 2, "127.0.0.3"); + 0, Resources.createResource(1024, node2MaxVCores, node2MaxVCores), 2, "127.0.0.3"); scheduler.handle(new NodeAddedSchedulerEvent(node2)); Assert.assertEquals(1, scheduler.getNumClusterNodes()); maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores(); Assert.assertEquals(expectedMaxVCores[3], maxVCores); RMNode node3 = MockNodes.newNodeInfo( - 0, Resources.createResource(1024, node3MaxVCores), 3, "127.0.0.4"); + 0, Resources.createResource(1024, node3MaxVCores, node3MaxVCores), 3, "127.0.0.4"); scheduler.handle(new NodeAddedSchedulerEvent(node3)); Assert.assertEquals(2, scheduler.getNumClusterNodes()); maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores(); @@ -224,7 +224,7 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { final int configuredMaxVCores = 20; final int configuredMaxMemory = 10 * 1024; Resource configuredMaximumResource = Resource.newInstance - (configuredMaxMemory, configuredMaxVCores); + (configuredMaxMemory, configuredMaxVCores, configuredMaxVCores); configureScheduler(); YarnConfiguration conf = getConf(); @@ -242,9 +242,9 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm .getResourceScheduler(); - Resource emptyResource = Resource.newInstance(0, 0); - Resource fullResource1 = Resource.newInstance(1024, 5); - Resource fullResource2 = Resource.newInstance(2048, 10); + Resource emptyResource = Resource.newInstance(0, 0, 0); + Resource fullResource1 = Resource.newInstance(1024, 5, 5); + Resource fullResource2 = Resource.newInstance(2048, 10, 10); SchedulerNode mockNode1 = mock(SchedulerNode.class); when(mockNode1.getNodeID()).thenReturn(NodeId.newInstance("foo", 8080)); @@ -285,7 +285,7 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { final int configuredMaxVCores = 20; final int configuredMaxMemory = 10 * 1024; Resource configuredMaximumResource = Resource.newInstance - (configuredMaxMemory, configuredMaxVCores); + (configuredMaxMemory, configuredMaxVCores, configuredMaxVCores); configureScheduler(); YarnConfiguration conf = getConf(); @@ -304,10 +304,10 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { .getResourceScheduler(); verifyMaximumResourceCapability(configuredMaximumResource, scheduler); - Resource resource1 = Resource.newInstance(2048, 5); - Resource resource2 = Resource.newInstance(4096, 10); - Resource resource3 = Resource.newInstance(512, 1); - Resource resource4 = Resource.newInstance(1024, 2); + Resource resource1 = Resource.newInstance(2048, 5, 5); + Resource resource2 = Resource.newInstance(4096, 10, 10); + Resource resource3 = Resource.newInstance(512, 1, 1); + Resource resource4 = Resource.newInstance(1024, 2, 2); RMNode node1 = MockNodes.newNodeInfo( 0, resource1, 1, "127.0.0.2"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java index 8ad71d2..7ed913e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java @@ -72,20 +70,20 @@ public class TestQueueMetrics { metrics.submitAppAttempt(user); checkApps(queueSource, 1, 1, 0, 0, 0, 0, true); - metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100)); - metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); + metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100, 100)); + metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 100*GB, 100, 100, 15*GB, 15, 15, 5, 0, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); - metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2, 2), true); + checkResources(queueSource, 6*GB, 6, 6, 3, 3, 0, 100*GB, 100, 100, 9*GB, 9, 9, 2, 0, 0, 0, 0); - metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2, 2)); + checkResources(queueSource, 4*GB, 4, 4, 2, 3, 1, 100*GB, 100, 100, 9*GB, 9, 9, 2, 0, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -169,25 +167,25 @@ public class TestQueueMetrics { checkApps(queueSource, 1, 1, 0, 0, 0, 0, true); checkApps(userSource, 1, 1, 0, 0, 0, 0, true); - metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100)); - metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); - metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); + metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100, 100)); + metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10, 10)); + metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 100*GB, 100, 100, 15*GB, 15, 15, 5, 0, 0, 0, 0); + checkResources(userSource, 0, 0, 0, 0, 0, 0, 10*GB, 10, 10, 15*GB, 15, 15, 5, 0, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); checkApps(userSource, 1, 0, 1, 0, 0, 0, true); - metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2, 2), true); + checkResources(queueSource, 6*GB, 6, 6, 3, 3, 0, 100*GB, 100, 100, 9*GB, 9, 9, 2, 0, 0, 0, 0); + checkResources(userSource, 6*GB, 6, 6, 3, 3, 0, 10*GB, 10, 10, 9*GB, 9, 9, 2, 0, 0, 0, 0); - metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2, 2)); + checkResources(queueSource, 4*GB, 4, 4, 2, 3, 1, 100*GB, 100, 100, 9*GB, 9, 9, 2, 0, 0, 0, 0); + checkResources(userSource, 4*GB, 4, 4, 2, 3, 1, 10*GB, 10, 10, 9*GB, 9, 9, 2, 0, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -228,35 +226,35 @@ public class TestQueueMetrics { checkApps(userSource, 1, 1, 0, 0, 0, 0, true); checkApps(parentUserSource, 1, 1, 0, 0, 0, 0, true); - parentMetrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100)); - metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100)); - parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); - metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); - metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(parentQueueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); - checkResources(parentUserSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + parentMetrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100, 100)); + metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100, 100)); + parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10, 10)); + metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10, 10)); + metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3, 3)); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 100*GB, 100, 100, 15*GB, 15, 15, 5, 0, 0, 0, 0); + checkResources(parentQueueSource, 0, 0, 0, 0, 0, 0, 100*GB, 100, 100, 15*GB, 15, 15, 5, 0, 0, 0, 0); + checkResources(userSource, 0, 0, 0, 0, 0, 0, 10*GB, 10, 10, 15*GB, 15, 15, 5, 0, 0, 0, 0); + checkResources(parentUserSource, 0, 0, 0, 0, 0, 0, 10*GB, 10, 10, 15*GB, 15, 15, 5, 0, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); checkApps(userSource, 1, 0, 1, 0, 0, 0, true); - metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - metrics.reserveResource(user, Resources.createResource(3*GB, 3)); + metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2, 2), true); + metrics.reserveResource(user, Resources.createResource(3*GB, 3, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(parentQueueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(parentUserSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); - - metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - metrics.unreserveResource(user, Resources.createResource(3*GB, 3)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(parentQueueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); - checkResources(parentUserSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 6*GB, 6, 6, 3, 3, 0, 100*GB, 100, 100, 9*GB, 9, 9, 2, 3*GB, 3, 3, 1); + checkResources(parentQueueSource, 6*GB, 6, 6, 3, 3, 0, 100*GB, 100, 100, 9*GB, 9, 9, 2, 3*GB, 3, 3, 1); + checkResources(userSource, 6*GB, 6, 6, 3, 3, 0, 10*GB, 10, 10, 9*GB, 9, 9, 2, 3*GB, 3, 3, 1); + checkResources(parentUserSource, 6*GB, 6, 6, 3, 3, 0, 10*GB, 10, 10, 9*GB, 9, 9, 2, 3*GB, 3, 3, 1); + + metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2, 2)); + metrics.unreserveResource(user, Resources.createResource(3*GB, 3, 3)); + checkResources(queueSource, 4*GB, 4, 4, 2, 3, 1, 100*GB, 100, 100, 9*GB, 9, 9, 2, 0, 0, 0, 0); + checkResources(parentQueueSource, 4*GB, 4, 4, 2, 3, 1, 100*GB, 100, 100, 9*GB, 9, 9, 2, 0, 0, 0, 0); + checkResources(userSource, 4*GB, 4, 4, 2, 3, 1, 10*GB, 10, 10, 9*GB, 9, 9, 2, 0, 0, 0, 0); + checkResources(parentUserSource, 4*GB, 4, 4, 2, 3, 1, 10*GB, 10, 10, 9*GB, 9, 9, 2, 0, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -347,23 +345,27 @@ public class TestQueueMetrics { } public static void checkResources(MetricsSource source, int allocatedMB, - int allocatedCores, int allocCtnrs, long aggreAllocCtnrs, - long aggreReleasedCtnrs, int availableMB, int availableCores, int pendingMB, - int pendingCores, int pendingCtnrs, int reservedMB, int reservedCores, - int reservedCtnrs) { + int allocatedCores, int allocatedGCores, int allocCtnrs, long aggreAllocCtnrs, + long aggreReleasedCtnrs, int availableMB, int availableCores, int availableGCores, int pendingMB, + int pendingCores, int pendingGCores, int pendingCtnrs, int reservedMB, int reservedCores, + int reservedGCores, int reservedCtnrs) { MetricsRecordBuilder rb = getMetrics(source); assertGauge("AllocatedMB", allocatedMB, rb); assertGauge("AllocatedVCores", allocatedCores, rb); + assertGauge("AllocatedGCores", allocatedGCores, rb); assertGauge("AllocatedContainers", allocCtnrs, rb); assertCounter("AggregateContainersAllocated", aggreAllocCtnrs, rb); assertCounter("AggregateContainersReleased", aggreReleasedCtnrs, rb); assertGauge("AvailableMB", availableMB, rb); assertGauge("AvailableVCores", availableCores, rb); + assertGauge("AvailableGCores", availableGCores, rb); assertGauge("PendingMB", pendingMB, rb); assertGauge("PendingVCores", pendingCores, rb); + assertGauge("PendingGCores", pendingGCores, rb); assertGauge("PendingContainers", pendingCtnrs, rb); assertGauge("ReservedMB", reservedMB, rb); assertGauge("ReservedVCores", reservedCores, rb); + assertGauge("ReservedGCores", reservedGCores, rb); assertGauge("ReservedContainers", reservedCtnrs, rb); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestResourceUsage.java index f0bf892..ba66518 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestResourceUsage.java @@ -112,15 +112,15 @@ public class TestResourceUsage { check(0, 0, res); // Add 1,1 should returns 1,1 - inc(usage, suffix, Resource.newInstance(1, 1), label); + inc(usage, suffix, Resource.newInstance(1, 1, 1), label); check(1, 1, get(usage, suffix, label)); // Set 2,2 - set(usage, suffix, Resource.newInstance(2, 2), label); + set(usage, suffix, Resource.newInstance(2, 2, 2), label); check(2, 2, get(usage, suffix, label)); // dec 2,2 - dec(usage, suffix, Resource.newInstance(2, 2), label); + dec(usage, suffix, Resource.newInstance(2, 2, 2), label); check(0, 0, get(usage, suffix, label)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java index c648b83..9eeadd5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java @@ -72,7 +73,7 @@ public class TestSchedulerApplicationAttempt { assertEquals(0x30000000001L, app.getNewContainerId()); // Resource request - Resource requestedResource = Resource.newInstance(1536, 2); + Resource requestedResource = Resource.newInstance(1536, 2, 2); Priority requestedPriority = Priority.newInstance(2); ResourceRequest request = ResourceRequest.newInstance(requestedPriority, ResourceRequest.ANY, requestedResource, 3); @@ -87,7 +88,7 @@ public class TestSchedulerApplicationAttempt { // Reserved container Priority prio1 = Priority.newInstance(1); - Resource reservedResource = Resource.newInstance(2048, 3); + Resource reservedResource = Resource.newInstance(2048, 3, 3); RMContainer container2 = createReservedRMContainer(appAttId, 1, reservedResource, node.getNodeID(), prio1); Map reservations = new HashMap(); @@ -95,28 +96,31 @@ public class TestSchedulerApplicationAttempt { app.reservedContainers.put(prio1, reservations); oldMetrics.reserveResource(user, reservedResource); - checkQueueMetrics(oldMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); - checkQueueMetrics(newMetrics, 0, 0, 0, 0, 0, 0, 0, 0); - checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); + checkQueueMetrics(oldMetrics, 1, 1, 1536, 2, 2, 2048, 3, 3, 3072, 4, 4); + checkQueueMetrics(newMetrics, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2, 2048, 3, 3, 3072, 4, 4); app.move(newQueue); - checkQueueMetrics(oldMetrics, 0, 0, 0, 0, 0, 0, 0, 0); - checkQueueMetrics(newMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); - checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); + checkQueueMetrics(oldMetrics, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + checkQueueMetrics(newMetrics, 1, 1, 1536, 2, 2, 2048, 3, 3, 3072, 4, 4); + checkQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2, 2048, 3, 3, 3072, 4, 4); } private void checkQueueMetrics(QueueMetrics metrics, int activeApps, - int runningApps, int allocMb, int allocVcores, int reservedMb, - int reservedVcores, int pendingMb, int pendingVcores) { + int runningApps, int allocMb, int allocVcores, int allocGcores, int reservedMb, + int reservedVcores, int reservedGcores, int pendingMb, int pendingVcores, int pendingGcores) { assertEquals(activeApps, metrics.getActiveApps()); assertEquals(runningApps, metrics.getAppsRunning()); assertEquals(allocMb, metrics.getAllocatedMB()); assertEquals(allocVcores, metrics.getAllocatedVirtualCores()); + assertEquals(allocGcores, metrics.getAllocatedGpuCores()); assertEquals(reservedMb, metrics.getReservedMB()); assertEquals(reservedVcores, metrics.getReservedVirtualCores()); + assertEquals(reservedGcores, metrics.getReservedGpuCores()); assertEquals(pendingMb, metrics.getPendingMB()); assertEquals(pendingVcores, metrics.getPendingVirtualCores()); + assertEquals(pendingGcores, metrics.getPendingGpuCores()); } private SchedulerNode createNode() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java index 479f981..d146b82 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java @@ -100,8 +97,8 @@ public class TestSchedulerUtils { final int minMemory = 1024; final int maxMemory = 8192; - Resource minResource = Resources.createResource(minMemory, 0); - Resource maxResource = Resources.createResource(maxMemory, 0); + Resource minResource = Resources.createResource(minMemory, 0, 0); + Resource maxResource = Resources.createResource(maxMemory, 0, 0); ResourceRequest ask = new ResourceRequestPBImpl(); @@ -142,7 +139,7 @@ public class TestSchedulerUtils { assertEquals(maxMemory, ask.getCapability().getMemory()); // max is not a multiple of min - maxResource = Resources.createResource(maxMemory - 10, 0); + maxResource = Resources.createResource(maxMemory - 10, 0, 0); ask.setCapability(Resources.createResource(maxMemory - 100)); // multiple of minMemory > maxMemory, then reduce to maxMemory SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, @@ -150,7 +147,7 @@ public class TestSchedulerUtils { assertEquals(maxResource.getMemory(), ask.getCapability().getMemory()); // ask is more than max - maxResource = Resources.createResource(maxMemory, 0); + maxResource = Resources.createResource(maxMemory, 0, 0); ask.setCapability(Resources.createResource(maxMemory + 100)); SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, maxResource); @@ -161,33 +158,35 @@ public class TestSchedulerUtils { public void testNormalizeRequestWithDominantResourceCalculator() { ResourceCalculator resourceCalculator = new DominantResourceCalculator(); - Resource minResource = Resources.createResource(1024, 1); - Resource maxResource = Resources.createResource(10240, 10); - Resource clusterResource = Resources.createResource(10 * 1024, 10); + Resource minResource = Resources.createResource(1024, 1, 0); + Resource maxResource = Resources.createResource(10240, 10, 10); + Resource clusterResource = Resources.createResource(10 * 1024, 10, 10); ResourceRequest ask = new ResourceRequestPBImpl(); - // case negative memory/vcores - ask.setCapability(Resources.createResource(-1024, -1)); + // case negative memory/vcores/gcores + ask.setCapability(Resources.createResource(-1024, -1, -1)); SchedulerUtils.normalizeRequest( ask, resourceCalculator, clusterResource, minResource, maxResource); assertEquals(minResource, ask.getCapability()); - // case zero memory/vcores - ask.setCapability(Resources.createResource(0, 0)); + // case zero memory/vcores/gcores + ask.setCapability(Resources.createResource(0, 0, 0)); SchedulerUtils.normalizeRequest( ask, resourceCalculator, clusterResource, minResource, maxResource); assertEquals(minResource, ask.getCapability()); assertEquals(1, ask.getCapability().getVirtualCores()); assertEquals(1024, ask.getCapability().getMemory()); + assertEquals(0, ask.getCapability().getGpuCores()); // case non-zero memory & zero cores - ask.setCapability(Resources.createResource(1536, 0)); + ask.setCapability(Resources.createResource(1536, 0, 0)); SchedulerUtils.normalizeRequest( ask, resourceCalculator, clusterResource, minResource, maxResource); - assertEquals(Resources.createResource(2048, 1), ask.getCapability()); + assertEquals(Resources.createResource(2048, 1, 0), ask.getCapability()); assertEquals(1, ask.getCapability().getVirtualCores()); assertEquals(2048, ask.getCapability().getMemory()); + assertEquals(0, ask.getCapability().getGpuCores()); } @Test (timeout = 30000) @@ -204,7 +203,8 @@ public class TestSchedulerUtils { Resource maxResource = Resources.createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); // queue has labels, success cases try { @@ -215,7 +215,8 @@ public class TestSchedulerUtils { ImmutableSet.of("x", "y")); Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("x"); @@ -249,7 +250,8 @@ public class TestSchedulerUtils { queueAccessibleNodeLabels.addAll(Arrays.asList("x", "y")); Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("x"); @@ -271,7 +273,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("z"); @@ -295,7 +298,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("x && y"); @@ -316,7 +320,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", @@ -344,7 +349,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("x"); @@ -368,7 +374,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("x"); @@ -398,7 +405,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); resReq.setNodeLabelExpression("x"); @@ -418,7 +426,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), "rack", resource, 1); resReq.setNodeLabelExpression("x"); @@ -443,7 +452,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), "rack", resource, 1); resReq.setNodeLabelExpression("x"); @@ -464,13 +474,15 @@ public class TestSchedulerUtils { Resource maxResource = Resources.createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); // zero memory try { Resource resource = Resources.createResource(0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -484,7 +496,24 @@ public class TestSchedulerUtils { try { Resource resource = Resources.createResource( - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 0); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 0, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); + ResourceRequest resReq = + BuilderUtils.newResourceRequest(mock(Priority.class), + ResourceRequest.ANY, resource, 1); + SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, null, + mockScheduler, rmContext); + } catch (InvalidResourceRequestException e) { + fail("Zero vcores should be accepted"); + } + + // zero gcores + try { + Resource resource = + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + 0); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -499,7 +528,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -514,7 +544,8 @@ public class TestSchedulerUtils { Resource resource = Resources.createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -524,11 +555,28 @@ public class TestSchedulerUtils { fail("Max vcores should not be accepted"); } + // max gcores + try { + Resource resource = + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); + ResourceRequest resReq = + BuilderUtils.newResourceRequest(mock(Priority.class), + ResourceRequest.ANY, resource, 1); + SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, null, + mockScheduler, rmContext); + } catch (InvalidResourceRequestException e) { + fail("Max gcores should not be accepted"); + } + // negative memory try { Resource resource = Resources.createResource(-1, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -543,7 +591,8 @@ public class TestSchedulerUtils { try { Resource resource = Resources.createResource( - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -554,12 +603,30 @@ public class TestSchedulerUtils { // expected } + // negative gcores + try { + Resource resource = + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + -1); + ResourceRequest resReq = + BuilderUtils.newResourceRequest(mock(Priority.class), + ResourceRequest.ANY, resource, 1); + SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, null, + mockScheduler, rmContext); + fail("Negative gcores should not be accepted"); + } catch (InvalidResourceRequestException e) { + // expected + } + // more than max memory try { Resource resource = Resources.createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB + 1, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -576,7 +643,8 @@ public class TestSchedulerUtils { Resources .createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES + 1); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES + 1, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest(mock(Priority.class), ResourceRequest.ANY, resource, 1); @@ -586,6 +654,24 @@ public class TestSchedulerUtils { } catch (InvalidResourceRequestException e) { // expected } + + // more than max gcores + try { + Resource resource = + Resources + .createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES + 1); + ResourceRequest resReq = + BuilderUtils.newResourceRequest(mock(Priority.class), + ResourceRequest.ANY, resource, 1); + SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, null, + mockScheduler, rmContext); + fail("More than max gcores should not be accepted"); + } catch (InvalidResourceRequestException e) { + // expected + } } @Test @@ -705,7 +791,8 @@ public class TestSchedulerUtils { Resource maxResource = Resources.createResource( YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES); // queue has labels, success cases try { @@ -716,7 +803,8 @@ public class TestSchedulerUtils { ImmutableSet.of("x", "y")); Resource resource = Resources.createResource( 0, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES); ResourceRequest resReq = BuilderUtils.newResourceRequest( mock(Priority.class), ResourceRequest.ANY, resource, 1); SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java index 23b31fa..f0f2f4e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java @@ -88,11 +83,11 @@ public class TestChildQueueOrder { when(csContext.getConf()).thenReturn(conf); when(csContext.getConfiguration()).thenReturn(csConf); when(csContext.getMinimumResourceCapability()).thenReturn( - Resources.createResource(GB, 1)); + Resources.createResource(GB, 1, 1)); when(csContext.getMaximumResourceCapability()).thenReturn( - Resources.createResource(16*GB, 32)); + Resources.createResource(16*GB, 32, 32)); when(csContext.getClusterResource()). - thenReturn(Resources.createResource(100 * 16 * GB, 100 * 32)); + thenReturn(Resources.createResource(100 * 16 * GB, 100 * 32, 100 * 32)); when(csContext.getApplicationComparator()). thenReturn(CapacityScheduler.applicationComparator); when(csContext.getQueueComparator()). @@ -105,7 +100,7 @@ public class TestChildQueueOrder { private FiCaSchedulerApp getMockApplication(int appId, String user) { FiCaSchedulerApp application = mock(FiCaSchedulerApp.class); doReturn(user).when(application).getUser(); - doReturn(Resources.createResource(0, 0)).when(application).getHeadroom(); + doReturn(Resources.createResource(0, 0, 0)).when(application).getHeadroom(); return application; } @@ -233,7 +228,7 @@ public class TestChildQueueOrder { final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode*GB), - numNodes * coresPerNode); + numNodes * coresPerNode, numNodes * coresPerNode); when(csContext.getNumClusterNodes()).thenReturn(numNodes); // Start testing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index cdf1fa4..5f4111e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -92,8 +91,8 @@ public class TestContainerAllocation { rm.start(); // Register node1 - MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4); - MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4, 4); + MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4, 4); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java index 7da1c97..65da776 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java @@ -85,11 +76,11 @@ public class TestParentQueue { when(csContext.getConf()).thenReturn(conf); when(csContext.getConfiguration()).thenReturn(csConf); when(csContext.getMinimumResourceCapability()).thenReturn( - Resources.createResource(GB, 1)); + Resources.createResource(GB, 1, 1)); when(csContext.getMaximumResourceCapability()).thenReturn( - Resources.createResource(16*GB, 32)); + Resources.createResource(16*GB, 32, 32)); when(csContext.getClusterResource()). - thenReturn(Resources.createResource(100 * 16 * GB, 100 * 32)); + thenReturn(Resources.createResource(100 * 16 * GB, 100 * 32, 100 * 32)); when(csContext.getApplicationComparator()). thenReturn(CapacityScheduler.applicationComparator); when(csContext.getQueueComparator()). @@ -118,7 +109,7 @@ public class TestParentQueue { private FiCaSchedulerApp getMockApplication(int appId, String user) { FiCaSchedulerApp application = mock(FiCaSchedulerApp.class); doReturn(user).when(application).getUser(); - doReturn(Resources.createResource(0, 0)).when(application).getHeadroom(); + doReturn(Resources.createResource(0, 0, 0)).when(application).getHeadroom(); return application; } @@ -221,7 +212,7 @@ public class TestParentQueue { final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode*GB), - numNodes * coresPerNode); + numNodes * coresPerNode, numNodes * coresPerNode); when(csContext.getNumClusterNodes()).thenReturn(numNodes); // Start testing @@ -443,7 +434,7 @@ public class TestParentQueue { final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode*GB), - numNodes * coresPerNode); + numNodes * coresPerNode, numNodes * coresPerNode); when(csContext.getNumClusterNodes()).thenReturn(numNodes); // Start testing @@ -616,7 +607,7 @@ public class TestParentQueue { final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode*GB), - numNodes * coresPerNode); + numNodes * coresPerNode, numNodes * coresPerNode); when(csContext.getNumClusterNodes()).thenReturn(numNodes); // Start testing @@ -685,7 +676,7 @@ public class TestParentQueue { final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode*GB), - numNodes * coresPerNode); + numNodes * coresPerNode, numNodes * coresPerNode); when(csContext.getNumClusterNodes()).thenReturn(numNodes); // Start testing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java index 4e6c73d..c41a3ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java @@ -53,11 +53,11 @@ public class TestReservationQueue { when(csContext.getConfiguration()).thenReturn(csConf); when(csContext.getConf()).thenReturn(conf); when(csContext.getMinimumResourceCapability()).thenReturn( - Resources.createResource(GB, 1)); + Resources.createResource(GB, 1, 1)); when(csContext.getMaximumResourceCapability()).thenReturn( - Resources.createResource(16 * GB, 32)); + Resources.createResource(16 * GB, 32, 32)); when(csContext.getClusterResource()).thenReturn( - Resources.createResource(100 * 16 * GB, 100 * 32)); + Resources.createResource(100 * 16 * GB, 100 * 32, 100 * 32)); when(csContext.getResourceCalculator()).thenReturn(resourceCalculator); RMContext mockRMContext = TestUtils.getMockRMContext(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index 00dbfab..4ce3747 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -97,15 +97,15 @@ public class FairSchedulerTestBase { protected ResourceRequest createResourceRequest( int memory, String host, int priority, int numContainers, boolean relaxLocality) { - return createResourceRequest(memory, 1, host, priority, numContainers, + return createResourceRequest(memory, 1, 0, host, priority, numContainers, relaxLocality); } protected ResourceRequest createResourceRequest( - int memory, int vcores, String host, int priority, int numContainers, + int memory, int vcores, int gcores, String host, int priority, int numContainers, boolean relaxLocality) { ResourceRequest request = recordFactory.newRecordInstance(ResourceRequest.class); - request.setCapability(BuilderUtils.newResource(memory, vcores)); + request.setCapability(BuilderUtils.newResource(memory, vcores, gcores)); request.setResourceName(host); request.setNumContainers(numContainers); Priority prio = recordFactory.newRecordInstance(Priority.class); @@ -126,8 +126,8 @@ public class FairSchedulerTestBase { } protected ApplicationAttemptId createSchedulingRequest( - int memory, int vcores, String queueId, String userId) { - return createSchedulingRequest(memory, vcores, queueId, userId, 1); + int memory, int vcores, int gcores, String queueId, String userId) { + return createSchedulingRequest(memory, vcores, gcores, queueId, userId, 1); } protected ApplicationAttemptId createSchedulingRequest( @@ -136,18 +136,18 @@ public class FairSchedulerTestBase { } protected ApplicationAttemptId createSchedulingRequest( - int memory, int vcores, String queueId, String userId, int numContainers) { - return createSchedulingRequest(memory, vcores, queueId, userId, numContainers, 1); + int memory, int vcores, int gcores, String queueId, String userId, int numContainers) { + return createSchedulingRequest(memory, vcores, gcores, queueId, userId, numContainers, 1); } protected ApplicationAttemptId createSchedulingRequest( int memory, String queueId, String userId, int numContainers, int priority) { - return createSchedulingRequest(memory, 1, queueId, userId, numContainers, + return createSchedulingRequest(memory, 1, 0, queueId, userId, numContainers, priority); } protected ApplicationAttemptId createSchedulingRequest( - int memory, int vcores, String queueId, String userId, int numContainers, + int memory, int vcores, int gcores, String queueId, String userId, int numContainers, int priority) { ApplicationAttemptId id = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); scheduler.addApplication(id.getApplicationId(), queueId, userId, false); @@ -157,7 +157,7 @@ public class FairSchedulerTestBase { scheduler.addApplicationAttempt(id, false, false); } List ask = new ArrayList(); - ResourceRequest request = createResourceRequest(memory, vcores, ResourceRequest.ANY, + ResourceRequest request = createResourceRequest(memory, vcores, gcores, ResourceRequest.ANY, priority, numContainers, true); ask.add(request); @@ -204,8 +204,8 @@ public class FairSchedulerTestBase { } protected void createSchedulingRequestExistingApplication( - int memory, int vcores, int priority, ApplicationAttemptId attId) { - ResourceRequest request = createResourceRequest(memory, vcores, ResourceRequest.ANY, + int memory, int vcores, int gcores, int priority, ApplicationAttemptId attId) { + ResourceRequest request = createResourceRequest(memory, vcores, gcores, ResourceRequest.ANY, priority, 1, true); createSchedulingRequestExistingApplication(request, attId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java index 5a170cf..5725389 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java @@ -59,14 +59,14 @@ public class FakeSchedulable implements Schedulable { public FakeSchedulable(int minShare, int maxShare, double weight, int fairShare, int usage, long startTime) { - this(Resources.createResource(minShare, 0), Resources.createResource(maxShare, 0), - new ResourceWeights((float)weight), Resources.createResource(fairShare, 0), - Resources.createResource(usage, 0), startTime); + this(Resources.createResource(minShare, 0, 0), Resources.createResource(maxShare, 0, 0), + new ResourceWeights((float)weight), Resources.createResource(fairShare, 0, 0), + Resources.createResource(usage, 0, 0), startTime); } public FakeSchedulable(Resource minShare, ResourceWeights weights) { - this(minShare, Resources.createResource(Integer.MAX_VALUE, Integer.MAX_VALUE), - weights, Resources.createResource(0, 0), Resources.createResource(0, 0), 0); + this(minShare, Resources.createResource(Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE), + weights, Resources.createResource(0, 0, 0), Resources.createResource(0, 0, 0), 0); } public FakeSchedulable(Resource minShare, Resource maxShare, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestAllocationFileLoaderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestAllocationFileLoaderService.java index 3c166a5..204b7a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestAllocationFileLoaderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestAllocationFileLoaderService.java @@ -163,11 +163,11 @@ public class TestAllocationFileLoaderService { out.println(""); // Give queue A a minimum of 1024 M out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); // Give queue B a minimum of 2048 M out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println("alice,bob admins"); out.println("fair"); out.println(""); @@ -233,9 +233,9 @@ public class TestAllocationFileLoaderService { assertEquals(Resources.createResource(0), queueConf.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME)); - assertEquals(Resources.createResource(1024, 0), + assertEquals(Resources.createResource(1024, 0, 0), queueConf.getMinResources("root.queueA")); - assertEquals(Resources.createResource(2048, 0), + assertEquals(Resources.createResource(2048, 0, 0), queueConf.getMinResources("root.queueB")); assertEquals(Resources.createResource(0), queueConf.getMinResources("root.queueC")); @@ -356,11 +356,11 @@ public class TestAllocationFileLoaderService { out.println(""); // Give queue A a minimum of 1024 M out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); // Give queue B a minimum of 2048 M out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println("alice,bob admins"); out.println(""); // Give queue C no minimum @@ -406,9 +406,9 @@ public class TestAllocationFileLoaderService { assertEquals(Resources.createResource(0), queueConf.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME)); - assertEquals(Resources.createResource(1024, 0), + assertEquals(Resources.createResource(1024, 0, 0), queueConf.getMinResources("root.queueA")); - assertEquals(Resources.createResource(2048, 0), + assertEquals(Resources.createResource(2048, 0, 0), queueConf.getMinResources("root.queueB")); assertEquals(Resources.createResource(0), queueConf.getMinResources("root.queueC")); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java index 9d8dd07..d30c831 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java @@ -177,18 +176,36 @@ public class TestComputeFairShares { */ @Test public void testCPU() { - scheds.add(new FakeSchedulable(Resources.createResource(0, 20), + scheds.add(new FakeSchedulable(Resources.createResource(0, 20, 0), new ResourceWeights(2.0f))); - scheds.add(new FakeSchedulable(Resources.createResource(0, 0), + scheds.add(new FakeSchedulable(Resources.createResource(0, 0, 0), new ResourceWeights(1.0f))); - scheds.add(new FakeSchedulable(Resources.createResource(0, 5), + scheds.add(new FakeSchedulable(Resources.createResource(0, 5, 0), new ResourceWeights(1.0f))); - scheds.add(new FakeSchedulable(Resources.createResource(0, 15), + scheds.add(new FakeSchedulable(Resources.createResource(0, 15, 0), new ResourceWeights(0.5f))); ComputeFairShares.computeShares(scheds, - Resources.createResource(0, 45), ResourceType.CPU); + Resources.createResource(0, 45, 0), ResourceType.CPU); verifyCPUShares(20, 5, 5, 15); } + + /** + * Test that GPU works as well as memory + */ + @Test + public void testGPU() { + scheds.add(new FakeSchedulable(Resources.createResource(0, 0, 20), + new ResourceWeights(2.0f))); + scheds.add(new FakeSchedulable(Resources.createResource(0, 0, 0), + new ResourceWeights(1.0f))); + scheds.add(new FakeSchedulable(Resources.createResource(0, 0, 5), + new ResourceWeights(1.0f))); + scheds.add(new FakeSchedulable(Resources.createResource(0, 0, 15), + new ResourceWeights(0.5f))); + ComputeFairShares.computeShares(scheds, + Resources.createResource(0, 0, 45), ResourceType.GPU); + verifyGPUShares(20, 5, 5, 15); + } /** * Check that a given list of shares have been assigned to this.scheds. @@ -209,4 +226,14 @@ public class TestComputeFairShares { Assert.assertEquals(shares[i], scheds.get(i).getFairShare().getVirtualCores()); } } + + /** + * Check that a given list of shares have been assigned to this.scheds. + */ + private void verifyGPUShares(int... shares) { + Assert.assertEquals(scheds.size(), shares.length); + for (int i = 0; i < shares.length; i ++) { + Assert.assertEquals(shares[i], scheds.get(i).getFairShare().getGpuCores()); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java index a72e393..488f885 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java @@ -87,7 +81,7 @@ public class TestContinuousScheduling extends FairSchedulerTestBase { // Add one node String host = "127.0.0.1"; RMNode node1 = MockNodes.newNodeInfo( - 1, Resources.createResource(4096, 4), 1, host); + 1, Resources.createResource(4096, 4, 4), 1, host); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); NodeUpdateSchedulerEvent nodeUpdateEvent = new NodeUpdateSchedulerEvent(node1); @@ -102,7 +96,7 @@ public class TestContinuousScheduling extends FairSchedulerTestBase { scheduler.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", false); scheduler.addApplicationAttempt(appAttemptId, false, false); List ask = new ArrayList<>(); - ask.add(createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true)); + ask.add(createResourceRequest(1024, 1, 1, ResourceRequest.ANY, 1, 1, true)); scheduler.allocate( appAttemptId, ask, new ArrayList(), null, null); FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java index 43fe186..53aa1f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java @@ -199,17 +199,17 @@ public class TestFSAppAttempt extends FairSchedulerTestBase { final FSLeafQueue mockQueue = Mockito.mock(FSLeafQueue.class); - final Resource queueMaxResources = Resource.newInstance(5 * 1024, 3); - final Resource queueFairShare = Resources.createResource(4096, 2); - final Resource queueUsage = Resource.newInstance(2048, 2); + final Resource queueMaxResources = Resource.newInstance(5 * 1024, 3, 3); + final Resource queueFairShare = Resources.createResource(4096, 2, 2); + final Resource queueUsage = Resource.newInstance(2048, 2, 2); final Resource queueStarvation = Resources.subtract(queueFairShare, queueUsage); final Resource queueMaxResourcesAvailable = Resources.subtract(queueMaxResources, queueUsage); - final Resource clusterResource = Resources.createResource(8192, 8); - final Resource clusterUsage = Resources.createResource(2048, 2); + final Resource clusterResource = Resources.createResource(8192, 8, 8); + final Resource clusterUsage = Resources.createResource(2048, 2, 2); final Resource clusterAvailable = Resources.subtract(clusterResource, clusterUsage); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java index 385ea0b..4dad511 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java @@ -121,7 +118,7 @@ public class TestFSLeafQueue extends FairSchedulerTestBase { // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4), 1, + MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -182,7 +179,7 @@ public class TestFSLeafQueue extends FairSchedulerTestBase { // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(10 * 1024, 10), 1, + MockNodes.newNodeInfo(1, Resources.createResource(10 * 1024, 10, 10), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java index 82b50a6..ebee5f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java @@ -31,20 +31,22 @@ public class TestFairSchedulerConfiguration { @Test public void testParseResourceConfigValue() throws Exception { - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue("2 vcores, 1024 mb")); - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue("1024 mb, 2 vcores")); - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue("2vcores,1024mb")); - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue("1024mb,2vcores")); - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue("1024 mb, 2 vcores")); - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue("1024 Mb, 2 vCores")); - assertEquals(BuilderUtils.newResource(1024, 2), - parseResourceConfigValue(" 1024 mb, 2 vcores ")); + assertEquals(BuilderUtils.newResource(1024, 2, 4), + parseResourceConfigValue("2 vcores, 1024 mb, 4 gcores")); + assertEquals(BuilderUtils.newResource(1024, 2, 4), + parseResourceConfigValue("1024 mb, 2 vcores, 4 gcores")); + assertEquals(BuilderUtils.newResource(1024, 2, 4), + parseResourceConfigValue("4 gcores, 1024 mb, 2 vcores")); + assertEquals(BuilderUtils.newResource(1024, 2, 4), + parseResourceConfigValue("2vcores,1024mb,4gcores")); + assertEquals(BuilderUtils.newResource(1024, 2, 2), + parseResourceConfigValue("1024mb,2vcores,2gcores")); + assertEquals(BuilderUtils.newResource(1024, 2, 2), + parseResourceConfigValue("1024 mb, 2 vcores, 2 gcores")); + assertEquals(BuilderUtils.newResource(1024, 2, 2), + parseResourceConfigValue("1024 Mb, 2 vCores, 2 gCores")); + assertEquals(BuilderUtils.newResource(1024, 2, 2), + parseResourceConfigValue(" 1024 mb, 2 vcores, 2 gcores ")); } @Test(expected = AllocationConfigurationException.class) @@ -61,7 +58,12 @@ public class TestFairSchedulerConfiguration { public void testOnlyCPU() throws Exception { parseResourceConfigValue("1024vcores"); } - + + @Test(expected = AllocationConfigurationException.class) + public void testOnlyGPU() throws Exception { + parseResourceConfigValue("1024gcores"); + } + @Test(expected = AllocationConfigurationException.class) public void testGibberish() throws Exception { parseResourceConfigValue("1o24vc0res"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java index ab8fcbc..a0b60f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java @@ -59,10 +59,10 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { private void createClusterWithQueuesAndOneNode(int mem, String policy) throws IOException { - createClusterWithQueuesAndOneNode(mem, 0, policy); + createClusterWithQueuesAndOneNode(mem, 0, 0, policy); } - private void createClusterWithQueuesAndOneNode(int mem, int vCores, + private void createClusterWithQueuesAndOneNode(int mem, int vCores, int gCores, String policy) throws IOException { PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); out.println(""); @@ -91,7 +91,7 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { scheduler = (FairScheduler) resourceManager.getResourceScheduler(); RMNode node1 = MockNodes.newNodeInfo(1, - Resources.createResource(mem, vCores), 1, "127.0.0.1"); + Resources.createResource(mem, vCores, gCores), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); } @@ -272,7 +272,8 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { throws IOException { int nodeMem = 16 * 1024; int nodeVCores = 10; - createClusterWithQueuesAndOneNode(nodeMem, nodeVCores, "drf"); + int nodeGCores = 10; + createClusterWithQueuesAndOneNode(nodeMem, nodeVCores, nodeGCores, "drf"); // Run apps in childA1,childA2 which are under parentA createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java index 458b06d..2ff3fe5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java @@ -97,9 +97,9 @@ public class TestFairSchedulerPreemption extends FairSchedulerTestBase { } private void registerNodeAndSubmitApp( - int memory, int vcores, int appContainers, int appMemory) { + int memory, int vcores, int gcores, int appContainers, int appMemory) { RMNode node1 = MockNodes.newNodeInfo( - 1, Resources.createResource(memory, vcores), 1, "node1"); + 1, Resources.createResource(memory, vcores, gcores), 1, "node1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -143,7 +143,7 @@ public class TestFairSchedulerPreemption extends FairSchedulerTestBase { startResourceManager(0f); // Create node with 4GB memory and 4 vcores - registerNodeAndSubmitApp(4 * 1024, 4, 2, 1024); + registerNodeAndSubmitApp(4 * 1024, 4, 4, 2, 1024); // Verify submitting another request triggers preemption createSchedulingRequest(1024, "queueB", "user1", 1, 1); @@ -159,7 +159,7 @@ public class TestFairSchedulerPreemption extends FairSchedulerTestBase { startResourceManager(0.8f); // Create node with 4GB memory and 4 vcores - registerNodeAndSubmitApp(4 * 1024, 4, 3, 1024); + registerNodeAndSubmitApp(4 * 1024, 4, 4, 3, 1024); // Verify submitting another request doesn't trigger preemption createSchedulingRequest(1024, "queueB", "user1", 1, 1); @@ -175,7 +175,7 @@ public class TestFairSchedulerPreemption extends FairSchedulerTestBase { startResourceManager(0.7f); // Create node with 4GB memory and 4 vcores - registerNodeAndSubmitApp(4 * 1024, 4, 3, 1024); + registerNodeAndSubmitApp(4 * 1024, 4, 4, 3, 1024); // Verify submitting another request triggers preemption createSchedulingRequest(1024, "queueB", "user1", 1, 1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index b662ef1..f6275fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -288,7 +288,7 @@ public class TestFifoScheduler { scheduler.start(); scheduler.reinitialize(new Configuration(), rmContext); RMNode node0 = MockNodes.newNodeInfo(1, - Resources.createResource(2048, 4), 1, "127.0.0.1"); + Resources.createResource(2048, 4, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0); scheduler.handle(nodeEvent1); @@ -298,7 +298,7 @@ public class TestFifoScheduler { (Map) method.invoke(scheduler); assertEquals(schedulerNodes.values().size(), 1); - Resource newResource = Resources.createResource(1024, 4); + Resource newResource = Resources.createResource(1024, 4, 4); NodeResourceUpdateSchedulerEvent node0ResourceUpdate = new NodeResourceUpdateSchedulerEvent(node0, ResourceOption.newInstance( @@ -370,14 +370,14 @@ public class TestFifoScheduler { String host_0 = "host_0"; org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_0 = registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK, - Resources.createResource(4 * GB, 1)); + Resources.createResource(4 * GB, 1, 1)); nm_0.heartbeat(); // Register node2 String host_1 = "host_1"; org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_1 = registerNode(host_1, 1234, 2345, NetworkTopology.DEFAULT_RACK, - Resources.createResource(2 * GB, 1)); + Resources.createResource(2 * GB, 1, 1)); nm_1.heartbeat(); // ResourceRequest priorities diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java index f2c0a30..7f6579a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java @@ -844,7 +839,7 @@ public class TestDelegationTokenRenewer { ApplicationSubmissionContext.newInstance( ApplicationId.newInstance(1234121, 0), "BOGUS", "default", Priority.UNDEFINED, amContainer, false, - true, 1, Resource.newInstance(1024, 1), "BOGUS"); + true, 1, Resource.newInstance(1024, 1, 1), "BOGUS"); SubmitApplicationRequest request = SubmitApplicationRequest.newInstance(appSubContext); try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java index 8c7b14d..0a68bf9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java @@ -64,7 +63,7 @@ public class TestAppPage { when(app.getFinishTime()).thenReturn(0L); when(app.createApplicationState()).thenReturn(YarnApplicationState.FAILED); - RMAppMetrics appMetrics = new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0); + RMAppMetrics appMetrics = new RMAppMetrics(Resource.newInstance(0, 0, 0), 0, 0, 0, 0, 0); when(app.getRMAppMetrics()).thenReturn(appMetrics); // initialize RM Context, and create RMApp, without creating RMAppAttempt diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java index 06fa0d4..8d17690 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java @@ -142,7 +142,7 @@ public class TestRMWebAppFairScheduler { MockRMApp app = new MockRMApp(i, i, state) { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0, 0), 0, 0, 0, 0, 0); } @Override public YarnApplicationState createApplicationState() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java index 8e5e601..b036aa0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java @@ -780,6 +785,7 @@ public class TestRMWebServicesAppsModification extends JerseyTestBase { appInfo.getContainerLaunchContextInfo().setCredentials(credentials); appInfo.getResource().setMemory(1024); appInfo.getResource().setvCores(1); + appInfo.getResource().setgCores(1); appInfo.setApplicationTags(tags); ClientResponse response = @@ -878,6 +884,7 @@ public class TestRMWebServicesAppsModification extends JerseyTestBase { YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB) + 1); appInfo.getResource().setvCores(1); + appInfo.getResource().setgCores(1); response = this.constructWebResource(urlPath).accept(acceptMedia) .entity(appInfo, contentMedia).post(ClientResponse.class); @@ -888,6 +895,10 @@ public class TestRMWebServicesAppsModification extends JerseyTestBase { rm.getConfig().getInt( YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES) + 1); + appInfo.getResource().setgCores( + rm.getConfig().getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES) + 1); appInfo.getResource().setMemory(CONTAINER_MB); response = this.constructWebResource(urlPath).accept(acceptMedia) @@ -923,6 +934,7 @@ public class TestRMWebServicesAppsModification extends JerseyTestBase { appInfo.getContainerLaunchContextInfo().setResources(lr); appInfo.getResource().setMemory(1024); appInfo.getResource().setvCores(1); + appInfo.getResource().setgCores(1); String body = "0 151730 103 + 103 application_1326815542473_0002 @@ -1162,6 +1165,7 @@ Response Body: 0 640064 442 + 442 ``` @@ -1360,7 +1364,8 @@ Response Body: "trackingUrl" : "http://host.domain.com:8088/proxy/application_1326821518301_0005/jobhistory/job/job_1326821518301_5_5", "queue" : "a1", "memorySeconds" : 151730, - "vcoreSeconds" : 103 + "vcoreSeconds" : 103, + "gcoreSeconds" : 103 } } ``` @@ -1402,6 +1407,7 @@ Response Body: host.domain.com:8042 151730 103 + 103 ``` diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java index d6e9e45..540b54c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java @@ -29,6 +29,12 @@ private float[] weights = new float[ResourceType.values().length]; + public ResourceWeights(float memoryWeight, float cpuWeight, float gpuWeight) { + weights[ResourceType.MEMORY.ordinal()] = memoryWeight; + weights[ResourceType.CPU.ordinal()] = cpuWeight; + weights[ResourceType.GPU.ordinal()] = gpuWeight; + } + public ResourceWeights(float memoryWeight, float cpuWeight) { weights[ResourceType.MEMORY.ordinal()] = memoryWeight; weights[ResourceType.CPU.ordinal()] = cpuWeight; @@ -69,3 +75,4 @@ public String toString() { return sb.toString(); } } + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java index d1f0ede..9404825 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java @@ -286,10 +286,10 @@ public void testResourceTypes() throws Exception { driver.put(conf, EnumSet.of(SchedulerResourceTypes.MEMORY)); driver.put(testCapacityDRConf, - EnumSet.of(SchedulerResourceTypes.CPU, SchedulerResourceTypes.MEMORY)); + EnumSet.of(SchedulerResourceTypes.CPU, SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.GPU)); driver.put(testCapacityDefConf, EnumSet.of(SchedulerResourceTypes.MEMORY)); driver.put(testFairDefConf, - EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU)); + EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU, SchedulerResourceTypes.GPU)); for (Map.Entry> entry : driver .entrySet()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java index c917f79..17e769b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java @@ -50,6 +50,7 @@ private NodeId nodeId; private final int memory; private final int vCores; + private final int gCores; private ResourceTrackerService resourceTracker; private int httpPort = 2; private MasterKey currentContainerTokenMasterKey; @@ -66,13 +67,18 @@ public MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTrack public MockNM(String nodeIdStr, int memory, int vcores, ResourceTrackerService resourceTracker) { - this(nodeIdStr, memory, vcores, resourceTracker, YarnVersionInfo.getVersion()); + this(nodeIdStr, memory, vcores, 0, resourceTracker); } - public MockNM(String nodeIdStr, int memory, int vcores, + public MockNM(String nodeIdStr, int memory, int vcores, int gcores, ResourceTrackerService resourceTracker) { + this(nodeIdStr, memory, vcores, gcores, resourceTracker, YarnVersionInfo.getVersion()); + } + + public MockNM(String nodeIdStr, int memory, int vcores, int gcores, ResourceTrackerService resourceTracker, String version) { this.memory = memory; this.vCores = vcores; + this.gCores = gcores; this.resourceTracker = resourceTracker; this.version = version; String[] splits = nodeIdStr.split(":"); @@ -119,7 +125,7 @@ public RegisterNodeManagerResponse registerNode( RegisterNodeManagerRequest.class); req.setNodeId(nodeId); req.setHttpPort(httpPort); - Resource resource = BuilderUtils.newResource(memory, vCores); + Resource resource = BuilderUtils.newResource(memory, vCores, gCores); req.setResource(resource); req.setContainerStatuses(containerReports); req.setNMVersion(version); @@ -202,4 +208,8 @@ public int getMemory() { public int getvCores() { return vCores; } + + public int getgCores() { + return gCores; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index 4e10a2b..edcff5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -48,8 +48,8 @@ // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. - final int numberOfThInMetricsTable = 20; - final int numberOfActualTableHeaders = 13; + final int numberOfThInMetricsTable = 23; + final int numberOfActualTableHeaders = 15; private Injector injector; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java index 298246c..ae5c196 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java @@ -401,6 +401,10 @@ public void verifyClusterMetricsXML(String xml) throws JSONException, WebServicesTestUtils.getXmlInt(element, "availableVirtualCores"), WebServicesTestUtils.getXmlInt(element, "allocatedVirtualCores"), WebServicesTestUtils.getXmlInt(element, "totalVirtualCores"), + WebServicesTestUtils.getXmlInt(element, "reservedGpuCores"), + WebServicesTestUtils.getXmlInt(element, "availableGpuCores"), + WebServicesTestUtils.getXmlInt(element, "allocatedGpuCores"), + WebServicesTestUtils.getXmlInt(element, "totalGpuCores"), WebServicesTestUtils.getXmlInt(element, "containersAllocated"), WebServicesTestUtils.getXmlInt(element, "totalMB"), WebServicesTestUtils.getXmlInt(element, "totalNodes"), @@ -416,13 +420,15 @@ public void verifyClusterMetricsJSON(JSONObject json) throws JSONException, Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject clusterinfo = json.getJSONObject("clusterMetrics"); - assertEquals("incorrect number of elements", 23, clusterinfo.length()); + assertEquals("incorrect number of elements", 27, clusterinfo.length()); verifyClusterMetrics( clusterinfo.getInt("appsSubmitted"), clusterinfo.getInt("appsCompleted"), clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"), clusterinfo.getInt("allocatedMB"), clusterinfo.getInt("reservedVirtualCores"), clusterinfo.getInt("availableVirtualCores"), clusterinfo.getInt("allocatedVirtualCores"), clusterinfo.getInt("totalVirtualCores"), + clusterinfo.getInt("reservedGpuCores"), clusterinfo.getInt("availableGpuCores"), + clusterinfo.getInt("allocatedGpuCores"), clusterinfo.getInt("totalGpuCores"), clusterinfo.getInt("containersAllocated"), clusterinfo.getInt("totalMB"), clusterinfo.getInt("totalNodes"), clusterinfo.getInt("lostNodes"), clusterinfo.getInt("unhealthyNodes"), @@ -434,6 +440,8 @@ public void verifyClusterMetrics(int submittedApps, int completedApps, int reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores, int allocVirtualCores, int totalVirtualCores, + int reservedGpuCores, int availableGpuCores, + int allocGpuCores, int totalGpuCores, int containersAlloc, int totalMB, int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int rebootedNodes, int activeNodes) throws JSONException, Exception { @@ -446,6 +454,8 @@ public void verifyClusterMetrics(int submittedApps, int completedApps, metrics.getAvailableMB() + metrics.getAllocatedMB(); long totalVirtualCoresExpect = metrics.getAvailableVirtualCores() + metrics.getAllocatedVirtualCores(); + long totalGpuCoresExpect = + metrics.getAvailableGpuCores() + metrics.getAllocatedGpuCores(); assertEquals("appsSubmitted doesn't match", metrics.getAppsSubmitted(), submittedApps); assertEquals("appsCompleted doesn't match", @@ -462,6 +472,12 @@ public void verifyClusterMetrics(int submittedApps, int completedApps, metrics.getAvailableVirtualCores(), availableVirtualCores); assertEquals("allocatedVirtualCores doesn't match", totalVirtualCoresExpect, allocVirtualCores); + assertEquals("reservedGpuCores doesn't match", + metrics.getReservedGpuCores(), reservedGpuCores); + assertEquals("availableGpuCores doesn't match", + metrics.getAvailableGpuCores(), availableGpuCores); + assertEquals("allocatedGpuCores doesn't match", + totalGpuCoresExpect, allocGpuCores); assertEquals("containersAllocated doesn't match", 0, containersAlloc); assertEquals("totalMB doesn't match", totalMBExpect, totalMB); assertEquals( @@ -642,3 +658,4 @@ public void testAppsRace() throws Exception { assertTrue(appsInfo.getApps().isEmpty()); } } + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index c60a584..9223641 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1303,9 +1303,11 @@ public void verifyAppsXML(NodeList nodes, RMApp app) throws JSONException, WebServicesTestUtils.getXmlString(element, "amContainerLogs"), WebServicesTestUtils.getXmlInt(element, "allocatedMB"), WebServicesTestUtils.getXmlInt(element, "allocatedVCores"), + WebServicesTestUtils.getXmlInt(element, "allocatedGCores"), WebServicesTestUtils.getXmlInt(element, "runningContainers"), WebServicesTestUtils.getXmlInt(element, "preemptedResourceMB"), WebServicesTestUtils.getXmlInt(element, "preemptedResourceVCores"), + WebServicesTestUtils.getXmlInt(element, "preemptedResourceGCores"), WebServicesTestUtils.getXmlInt(element, "numNonAMContainerPreempted"), WebServicesTestUtils.getXmlInt(element, "numAMContainerPreempted")); } @@ -1314,7 +1316,7 @@ public void verifyAppsXML(NodeList nodes, RMApp app) throws JSONException, public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException, Exception { - assertEquals("incorrect number of elements", 27, info.length()); + assertEquals("incorrect number of elements", 30, info.length()); verifyAppInfoGeneric(app, info.getString("id"), info.getString("user"), info.getString("name"), info.getString("applicationType"), @@ -1324,10 +1326,10 @@ public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException, info.getLong("clusterId"), info.getLong("startedTime"), info.getLong("finishedTime"), info.getLong("elapsedTime"), info.getString("amHostHttpAddress"), info.getString("amContainerLogs"), - info.getInt("allocatedMB"), info.getInt("allocatedVCores"), + info.getInt("allocatedMB"), info.getInt("allocatedVCores"), info.getInt("allocatedGCores"), info.getInt("runningContainers"), info.getInt("preemptedResourceMB"), - info.getInt("preemptedResourceVCores"), + info.getInt("preemptedResourceVCores"), info.getInt("preemptedResourceGCores"), info.getInt("numNonAMContainerPreempted"), info.getInt("numAMContainerPreempted")); } @@ -1337,8 +1339,8 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user, String finalStatus, float progress, String trackingUI, String diagnostics, long clusterId, long startedTime, long finishedTime, long elapsedTime, String amHostHttpAddress, String amContainerLogs, - int allocatedMB, int allocatedVCores, int numContainers, - int preemptedResourceMB, int preemptedResourceVCores, + int allocatedMB, int allocatedVCores, int allocatedGCores, int numContainers, + int preemptedResourceMB, int preemptedResourceVCores, int preemptedResourceGCores, int numNonAMContainerPreempted, int numAMContainerPreempted) throws JSONException, Exception { @@ -1373,6 +1375,7 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user, amContainerLogs.endsWith("/" + app.getUser())); assertEquals("allocatedMB doesn't match", 1024, allocatedMB); assertEquals("allocatedVCores doesn't match", 1, allocatedVCores); + assertEquals("allocatedGCores doesn't match", 0, allocatedGCores); assertEquals("numContainers doesn't match", 1, numContainers); assertEquals("preemptedResourceMB doesn't match", app .getRMAppMetrics().getResourcePreempted().getMemory(), @@ -1380,6 +1383,9 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user, assertEquals("preemptedResourceVCores doesn't match", app .getRMAppMetrics().getResourcePreempted().getVirtualCores(), preemptedResourceVCores); + assertEquals("preemptedResourceGCores doesn't match", app + .getRMAppMetrics().getResourcePreempted().getGpuCores(), + preemptedResourceGCores); assertEquals("numNonAMContainerPreempted doesn't match", app .getRMAppMetrics().getNumNonAMContainersPreempted(), numNonAMContainerPreempted); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java index eb42679..1cbbbb0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java @@ -579,10 +579,10 @@ public void testPerUserResourcesJSON() throws Exception { @Test public void testResourceInfo() { - Resource res = Resources.createResource(10, 1); + Resource res = Resources.createResource(10, 1, 1); // If we add a new resource (e.g disks), then // CapacitySchedulerPage and these RM WebServices + docs need to be updated // eg. ResourceInfo - assertEquals("", res.toString()); + assertEquals("", res.toString()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java index f507e17..474c611 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java @@ -651,13 +651,15 @@ public void verifyNodesXML(NodeList nodes, MockNM nm) throws JSONException, WebServicesTestUtils.getXmlLong(element, "availMemoryMB"), WebServicesTestUtils.getXmlLong(element, "usedVirtualCores"), WebServicesTestUtils.getXmlLong(element, "availableVirtualCores"), + WebServicesTestUtils.getXmlLong(element, "usedGpuCores"), + WebServicesTestUtils.getXmlLong(element, "availableGpuCores"), WebServicesTestUtils.getXmlString(element, "version")); } } public void verifyNodeInfo(JSONObject nodeInfo, MockNM nm) throws JSONException, Exception { - assertEquals("incorrect number of elements", 13, nodeInfo.length()); + assertEquals("incorrect number of elements", 15, nodeInfo.length()); verifyNodeInfoGeneric(nm, nodeInfo.getString("state"), nodeInfo.getString("rack"), @@ -667,6 +669,7 @@ public void verifyNodeInfo(JSONObject nodeInfo, MockNM nm) nodeInfo.getString("healthReport"), nodeInfo.getInt("numContainers"), nodeInfo.getLong("usedMemoryMB"), nodeInfo.getLong("availMemoryMB"), nodeInfo.getLong("usedVirtualCores"), nodeInfo.getLong("availableVirtualCores"), + nodeInfo.getLong("usedGpuCores"), nodeInfo.getLong("availableGpuCores"), nodeInfo.getString("version")); } @@ -675,7 +678,7 @@ public void verifyNodeInfoGeneric(MockNM nm, String state, String rack, String id, String nodeHostName, String nodeHTTPAddress, long lastHealthUpdate, String healthReport, int numContainers, long usedMemoryMB, long availMemoryMB, long usedVirtualCores, - long availVirtualCores, String version) + long availVirtualCores, long usedGpuCores, long availGpuCores, String version) throws JSONException, Exception { RMNode node = rm.getRMContext().getRMNodes().get(nm.getNodeId()); @@ -713,7 +716,12 @@ public void verifyNodeInfoGeneric(MockNM nm, String state, String rack, .getUsedResource().getVirtualCores(), usedVirtualCores); assertEquals("availVirtualCores doesn't match: " + availVirtualCores, report .getAvailableResource().getVirtualCores(), availVirtualCores); + assertEquals("usedGpuCores doesn't match: " + usedGpuCores, report + .getUsedResource().getGpuCores(), usedGpuCores); + assertEquals("availGpuCores doesn't match: " + availGpuCores, report + .getAvailableResource().getGpuCores(), availGpuCores); } } } + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java index 86d503b..0d098a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java @@ -96,10 +96,13 @@ public Resource getHeadroom(Resource queueFairShare, Resource queueUsage, int queueAvailableCPU = Math.max(queueFairShare.getVirtualCores() - queueUsage .getVirtualCores(), 0); + int queueAvailableGPU = + Math.max(queueFairShare.getGpuCores() - queueUsage.getGpuCores(), 0); Resource headroom = Resources.createResource( Math.min(maxAvailable.getMemory(), queueAvailableMemory), Math.min(maxAvailable.getVirtualCores(), - queueAvailableCPU)); + queueAvailableCPU), + Math.min(maxAvailable.getGpuCores(), queueAvailableGPU)); return headroom; } @@ -174,15 +177,39 @@ void calculateShares(Resource resource, Resource pool, (pool.getMemory() * weights.getWeight(MEMORY))); shares.setWeight(CPU, (float)resource.getVirtualCores() / (pool.getVirtualCores() * weights.getWeight(CPU))); + shares.setWeight(GPU, (float) resource.getGpuCores() / + (pool.getGpuCores() * weights.getWeight(GPU))); // sort order vector by resource share if (resourceOrder != null) { - if (shares.getWeight(MEMORY) > shares.getWeight(CPU)) { - resourceOrder[0] = MEMORY; - resourceOrder[1] = CPU; - } else { + int position = 0; + + resourceOrder[0] = MEMORY; + position ++; + + if (position == 0) { resourceOrder[0] = CPU; - resourceOrder[1] = MEMORY; + } else { + if (shares.getWeight(MEMORY) >= shares.getWeight(CPU)) { + resourceOrder[1] = CPU; + } else { + resourceOrder[0] = CPU; + resourceOrder[1] = MEMORY; + } + } + position ++; + + int startIndex = 0; + while (startIndex < position) { + if (shares.getWeight(GPU) >= + shares.getWeight(resourceOrder[startIndex])) { + break; + } + startIndex ++; } + for (int i = position; i > startIndex; i --) { + resourceOrder[i] = resourceOrder[i-1]; + } + resourceOrder[startIndex] = GPU; } } @@ -199,3 +226,4 @@ private int compareShares(ResourceWeights shares1, ResourceWeights shares2, } } } + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 0e114e1..256655f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -176,6 +176,20 @@ public void testConfValidation() throws Exception { e.getMessage().startsWith( "Invalid resource scheduler vcores")); } + + conf = new YarnConfiguration(); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, 2); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, 1); + try { + scheduler.serviceInit(conf); + fail("Exception is expected because the min gcores allocation is" + + " larger than the max gcores allocation."); + } catch (YarnRuntimeException e) { + // Exception is expected. + assertTrue("The thrown exception is not the expected one.", + e.getMessage().startsWith( + "Invalid resource scheduler gcores")); + } } // TESTS @@ -199,6 +213,9 @@ public void testLoadConfigurationOnInitialize() throws IOException { conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); conf.setInt(FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 128); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_GCORES, 5); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, 1); + conf.setInt(FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES, 1); scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); @@ -224,16 +241,21 @@ public void testNonMinZeroResourcesSettings() throws IOException { YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 256); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 1); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, 1); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); + conf.setInt( + FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES, 1); scheduler.init(conf); scheduler.reinitialize(conf, null); Assert.assertEquals(256, scheduler.getMinimumResourceCapability().getMemory()); Assert.assertEquals(1, scheduler.getMinimumResourceCapability().getVirtualCores()); + Assert.assertEquals(1, scheduler.getMinimumResourceCapability().getGpuCores()); Assert.assertEquals(512, scheduler.getIncrementResourceCapability().getMemory()); Assert.assertEquals(2, scheduler.getIncrementResourceCapability().getVirtualCores()); + Assert.assertEquals(1, scheduler.getIncrementResourceCapability().getGpuCores()); } @Test @@ -242,16 +264,21 @@ public void testMinZeroResourcesSettings() throws IOException { YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 0); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 0); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GCORES, 0); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); + conf.setInt( + FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_GCORES, 1); scheduler.init(conf); scheduler.reinitialize(conf, null); Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getMemory()); Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getVirtualCores()); + Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getGpuCores()); Assert.assertEquals(512, scheduler.getIncrementResourceCapability().getMemory()); Assert.assertEquals(2, scheduler.getIncrementResourceCapability().getVirtualCores()); + Assert.assertEquals(1, scheduler.getIncrementResourceCapability().getGpuCores()); } @Test @@ -342,7 +369,7 @@ public void testFairShareWithMaxResources() throws IOException { // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -386,7 +413,7 @@ public void testFairShareWithZeroWeight() throws IOException { // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -417,11 +444,11 @@ public void testFairShareWithZeroWeightNoneZeroMinRes() throws IOException { out.println(""); out.println(""); out.println(""); - out.println("1 mb 1 vcores"); + out.println("1 mb 1 vcores 1 gcores"); out.println("0.0"); out.println(""); out.println(""); - out.println("1 mb 1 vcores"); + out.println("1 mb 1 vcores 1 gcores"); out.println("0.0"); out.println(""); out.println(""); @@ -433,7 +460,7 @@ public void testFairShareWithZeroWeightNoneZeroMinRes() throws IOException { // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -467,11 +494,11 @@ public void testFairShareWithNoneZeroWeightNoneZeroMinRes() out.println(""); out.println(""); out.println(""); - out.println("1024 mb 1 vcores"); + out.println("1024 mb 1 vcores 1 gcores"); out.println("0.5"); out.println(""); out.println(""); - out.println("1024 mb 1 vcores"); + out.println("1024 mb 1 vcores 1 gcores"); out.println("0.5"); out.println(""); out.println(""); @@ -483,7 +510,7 @@ public void testFairShareWithNoneZeroWeightNoneZeroMinRes() // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -529,7 +556,7 @@ public void testQueueInfo() throws IOException { // Add one big node (only care about aggregate capacity) RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -688,17 +715,17 @@ public void testSimpleContainerAllocation() throws IOException { // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(1024, 4), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(1024, 4, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); // Add another node RMNode node2 = - MockNodes.newNodeInfo(1, Resources.createResource(512, 2), 2, "127.0.0.2"); + MockNodes.newNodeInfo(1, Resources.createResource(512, 2, 2), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); scheduler.handle(nodeEvent2); - createSchedulingRequest(512, 2, "queue1", "user1", 2); + createSchedulingRequest(512, 2, 2, "queue1", "user1", 2); scheduler.update(); @@ -793,10 +820,10 @@ public void testContainerReservationAttemptExceedingQueueMax() out.println(""); out.println(""); out.println(""); - out.println("2048mb,5vcores"); + out.println("2048mb,5vcores,5gcores"); out.println(""); out.println(""); - out.println("2048mb,10vcores"); + out.println("2048mb,10vcores,10gcores"); out.println(""); out.println(""); out.println(""); @@ -809,7 +836,7 @@ public void testContainerReservationAttemptExceedingQueueMax() // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(3072, 5), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(3072, 5, 5), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -850,10 +877,10 @@ public void testContainerReservationNotExceedingQueueMax() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("3072mb,10vcores"); + out.println("3072mb,10vcores,10gcores"); out.println(""); out.println(""); - out.println("2048mb,10vcores"); + out.println("2048mb,10vcores,10gcores"); out.println(""); out.println(""); out.println(""); @@ -866,7 +893,7 @@ public void testContainerReservationNotExceedingQueueMax() throws Exception { // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(3072, 5), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(3072, 5, 5), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -911,10 +938,10 @@ public void testContainerReservationNotExceedingQueueMax() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("2048mb,10vcores"); + out.println("2048mb,10vcores,10gcores"); out.println(""); out.println(""); - out.println("2048mb,10vcores"); + out.println("2048mb,10vcores,10gcores"); out.println(""); out.println(""); out.println(""); @@ -1125,10 +1152,10 @@ public void testFairShareWithMinAlloc() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println(""); out.println(""); out.close(); @@ -1171,7 +1198,7 @@ public void testNestedUserQueue() throws IOException { out.println(""); out.println(""); out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(""); @@ -1203,7 +1230,7 @@ public void testFairShareAndWeightsInNestedUserQueueRule() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(""); @@ -1468,15 +1495,15 @@ public void testHierarchicalQueueAllocationFileParsing() throws IOException, SAX out.println(""); out.println(""); out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println(""); out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println(""); out.println(""); - out.println("2048mb,0vcores"); + out.println("2048mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(""); @@ -1508,10 +1535,10 @@ public void testConfigureRootQueue() throws Exception { out.println(""); out.println(" drf"); out.println(" "); - out.println(" 1024mb,1vcores"); + out.println(" 1024mb,1vcores,1gcores"); out.println(" "); out.println(" "); - out.println(" 1024mb,4vcores"); + out.println(" 1024mb,4vcores,4gcores"); out.println(" "); out.println(" 100"); out.println(" 120"); @@ -1576,31 +1603,31 @@ public void testChoiceOfPreemptedContainers() throws Exception { // Create two nodes RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4), 1, + MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); RMNode node2 = - MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4), 2, + MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4, 4), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); scheduler.handle(nodeEvent2); // Queue A and B each request two applications ApplicationAttemptId app1 = - createSchedulingRequest(1 * 1024, 1, "queueA", "user1", 1, 1); - createSchedulingRequestExistingApplication(1 * 1024, 1, 2, app1); + createSchedulingRequest(1 * 1024, 1, 1, "queueA", "user1", 1, 1); + createSchedulingRequestExistingApplication(1 * 1024, 1, 1, 2, app1); ApplicationAttemptId app2 = - createSchedulingRequest(1 * 1024, 1, "queueA", "user1", 1, 3); - createSchedulingRequestExistingApplication(1 * 1024, 1, 4, app2); + createSchedulingRequest(1 * 1024, 1, 1, "queueA", "user1", 1, 3); + createSchedulingRequestExistingApplication(1 * 1024, 1, 1, 4, app2); ApplicationAttemptId app3 = - createSchedulingRequest(1 * 1024, 1, "queueB", "user1", 1, 1); - createSchedulingRequestExistingApplication(1 * 1024, 1, 2, app3); + createSchedulingRequest(1 * 1024, 1, 1, "queueB", "user1", 1, 1); + createSchedulingRequestExistingApplication(1 * 1024, 1, 1, 2, app3); ApplicationAttemptId app4 = - createSchedulingRequest(1 * 1024, 1, "queueB", "user1", 1, 3); - createSchedulingRequestExistingApplication(1 * 1024, 1, 4, app4); + createSchedulingRequest(1 * 1024, 1, 1, "queueB", "user1", 1, 3); + createSchedulingRequestExistingApplication(1 * 1024, 1, 1, 4, app4); scheduler.update(); @@ -1623,10 +1650,10 @@ public void testChoiceOfPreemptedContainers() throws Exception { assertEquals(2, scheduler.getSchedulerApp(app4).getLiveContainers().size()); // Now new requests arrive from queueC and default - createSchedulingRequest(1 * 1024, 1, "queueC", "user1", 1, 1); - createSchedulingRequest(1 * 1024, 1, "queueC", "user1", 1, 1); - createSchedulingRequest(1 * 1024, 1, "default", "user1", 1, 1); - createSchedulingRequest(1 * 1024, 1, "default", "user1", 1, 1); + createSchedulingRequest(1 * 1024, 1, 1, "queueC", "user1", 1, 1); + createSchedulingRequest(1 * 1024, 1, 1, "queueC", "user1", 1, 1); + createSchedulingRequest(1 * 1024, 1, 1, "default", "user1", 1, 1); + createSchedulingRequest(1 * 1024, 1, 1, "default", "user1", 1, 1); scheduler.update(); // We should be able to claw back one container from queueA and queueB each. @@ -1734,15 +1761,15 @@ public void testPreemptionIsNotDelayedToNextRound() throws Exception { // Add a node of 8G RMNode node1 = MockNodes.newNodeInfo(1, - Resources.createResource(8 * 1024, 8), 1, "127.0.0.1"); + Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); // Run apps in queueA.A1 and queueB - ApplicationAttemptId app1 = createSchedulingRequest(1 * 1024, 1, + ApplicationAttemptId app1 = createSchedulingRequest(1 * 1024, 1, 1, "queueA.queueA1", "user1", 7, 1); // createSchedulingRequestExistingApplication(1 * 1024, 1, 2, app1); - ApplicationAttemptId app2 = createSchedulingRequest(1 * 1024, 1, "queueB", + ApplicationAttemptId app2 = createSchedulingRequest(1 * 1024, 1, 1, "queueB", "user2", 1, 1); scheduler.update(); @@ -1757,7 +1784,7 @@ public void testPreemptionIsNotDelayedToNextRound() throws Exception { assertEquals(1, scheduler.getSchedulerApp(app2).getLiveContainers().size()); // Now submit an app in queueA.queueA2 - ApplicationAttemptId app3 = createSchedulingRequest(1 * 1024, 1, + ApplicationAttemptId app3 = createSchedulingRequest(1 * 1024, 1, 1, "queueA.queueA2", "user3", 7, 1); scheduler.update(); @@ -1789,23 +1816,23 @@ public void testPreemptionDecision() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("0mb,0vcores"); + out.println("0mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(".25"); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(".25"); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(".25"); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); out.println(".25"); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println("5"); out.println("10"); @@ -1819,19 +1846,19 @@ public void testPreemptionDecision() throws Exception { // Create four nodes RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(2 * 1024, 2), 1, + MockNodes.newNodeInfo(1, Resources.createResource(2 * 1024, 2, 2), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); RMNode node2 = - MockNodes.newNodeInfo(1, Resources.createResource(2 * 1024, 2), 2, + MockNodes.newNodeInfo(1, Resources.createResource(2 * 1024, 2, 2), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); scheduler.handle(nodeEvent2); RMNode node3 = - MockNodes.newNodeInfo(1, Resources.createResource(2 * 1024, 2), 3, + MockNodes.newNodeInfo(1, Resources.createResource(2 * 1024, 2, 2), 3, "127.0.0.3"); NodeAddedSchedulerEvent nodeEvent3 = new NodeAddedSchedulerEvent(node3); scheduler.handle(nodeEvent3); @@ -1922,28 +1949,28 @@ public void testPreemptionDecisionWithVariousTimeout() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("0mb,0vcores"); + out.println("0mb,0vcores,0gcores"); out.println(""); out.println(""); out.println("1"); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.println(""); out.println("2"); out.println("10"); out.println("25"); out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println("5"); out.println(""); out.println(""); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println("20"); out.println(""); out.println(""); out.println(""); out.println("1"); - out.println("1024mb,0vcores"); + out.println("1024mb,0vcores,0gcores"); out.println(""); out.print("15"); out.print("30"); @@ -1987,7 +2014,7 @@ public void testPreemptionDecisionWithVariousTimeout() throws Exception { // Create one big node RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(6 * 1024, 6), 1, + MockNodes.newNodeInfo(1, Resources.createResource(6 * 1024, 6, 6), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -2259,7 +2286,7 @@ public void testUserMaxRunningApps() throws Exception { // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(8192, 8), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(8192, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -2361,7 +2388,7 @@ private void testIncreaseQueueSettingOnTheFlyInternal(String allocBefore, // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(8192, 8), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(8192, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -2504,7 +2531,7 @@ private void testDecreaseQueueSettingOnTheFlyInternal(String allocBefore, // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(8192, 8), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(8192, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -2610,11 +2637,11 @@ public void testReservationWhileMultiplePriorities() throws IOException { // Add a node RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(1024, 4), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(1024, 4, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); - ApplicationAttemptId attId = createSchedulingRequest(1024, 4, "queue1", + ApplicationAttemptId attId = createSchedulingRequest(1024, 4, 4, "queue1", "user1", 1, 2); scheduler.update(); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1); @@ -2627,7 +2654,7 @@ public void testReservationWhileMultiplePriorities() throws IOException { .getLiveContainers().iterator().next().getContainerId(); // Cause reservation to be created - createSchedulingRequestExistingApplication(1024, 4, 2, attId); + createSchedulingRequestExistingApplication(1024, 4, 4, 2, attId); scheduler.update(); scheduler.handle(updateEvent); @@ -2636,7 +2663,7 @@ public void testReservationWhileMultiplePriorities() throws IOException { assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores()); // Create request at higher priority - createSchedulingRequestExistingApplication(1024, 4, 1, attId); + createSchedulingRequestExistingApplication(1024, 4, 4, 1, attId); scheduler.update(); scheduler.handle(updateEvent); @@ -2767,7 +2794,7 @@ public void testFifoWithinQueue() throws Exception { RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(3072, 3), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(3072, 3, 3), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -2812,7 +2839,7 @@ public void testMaxAssign() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); RMNode node = - MockNodes.newNodeInfo(1, Resources.createResource(16384, 16), 0, + MockNodes.newNodeInfo(1, Resources.createResource(16384, 16, 16), 0, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); @@ -2847,14 +2874,14 @@ public void testMaxAssignWithZeroMemoryContainers() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); RMNode node = - MockNodes.newNodeInfo(1, Resources.createResource(16384, 16), 0, + MockNodes.newNodeInfo(1, Resources.createResource(16384, 16, 16), 0, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); scheduler.handle(nodeEvent); ApplicationAttemptId attId = - createSchedulingRequest(0, 1, "root.default", "user", 8); + createSchedulingRequest(0, 1, 1, "root.default", "user", 8); FSAppAttempt app = scheduler.getSchedulerApp(attId); // set maxAssign to 2: only 2 containers should be allocated @@ -2899,10 +2926,10 @@ public void testAssignContainer() throws Exception { RMNode node1 = MockNodes - .newNodeInfo(1, Resources.createResource(8192, 8), 1, "127.0.0.1"); + .newNodeInfo(1, Resources.createResource(8192, 8, 8), 1, "127.0.0.1"); RMNode node2 = MockNodes - .newNodeInfo(1, Resources.createResource(8192, 8), 2, "127.0.0.2"); + .newNodeInfo(1, Resources.createResource(8192, 8, 8), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); @@ -3030,9 +3057,9 @@ public void testRemoveNodeUpdatesRootQueueMetrics() throws IOException { scheduler.reinitialize(conf, resourceManager.getRMContext()); assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); - assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores()); + assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores()); - RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4), 1, + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent addEvent = new NodeAddedSchedulerEvent(node1); scheduler.handle(addEvent); @@ -3195,12 +3222,12 @@ public void testNoMoreCpuOnNode() throws IOException { scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); - RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(2048, 1), + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(2048, 1, 1), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); - ApplicationAttemptId attId = createSchedulingRequest(1024, 1, "default", + ApplicationAttemptId attId = createSchedulingRequest(1024, 1, 1, "default", "user1", 2); FSAppAttempt app = scheduler.getSchedulerApp(attId); scheduler.update(); @@ -3218,14 +3245,14 @@ public void testBasicDRFAssignment() throws Exception { scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); - RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(8192, 5)); + RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(8192, 5, 5)); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); scheduler.handle(nodeEvent); - ApplicationAttemptId appAttId1 = createSchedulingRequest(2048, 1, "queue1", + ApplicationAttemptId appAttId1 = createSchedulingRequest(2048, 1, 1, "queue1", "user1", 2); FSAppAttempt app1 = scheduler.getSchedulerApp(appAttId1); - ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 2, "queue1", + ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 2, 2, "queue1", "user1", 2); FSAppAttempt app2 = scheduler.getSchedulerApp(appAttId2); @@ -3260,18 +3287,18 @@ public void testBasicDRFWithQueues() throws Exception { scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); - RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(8192, 7), + RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(8192, 7, 7), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); scheduler.handle(nodeEvent); - ApplicationAttemptId appAttId1 = createSchedulingRequest(3072, 1, "queue1", + ApplicationAttemptId appAttId1 = createSchedulingRequest(3072, 1, 1, "queue1", "user1", 2); FSAppAttempt app1 = scheduler.getSchedulerApp(appAttId1); - ApplicationAttemptId appAttId2 = createSchedulingRequest(2048, 2, "queue1", + ApplicationAttemptId appAttId2 = createSchedulingRequest(2048, 2, 2, "queue1", "user1", 2); FSAppAttempt app2 = scheduler.getSchedulerApp(appAttId2); - ApplicationAttemptId appAttId3 = createSchedulingRequest(1024, 2, "queue2", + ApplicationAttemptId appAttId3 = createSchedulingRequest(1024, 2, 2, "queue2", "user1", 2); FSAppAttempt app3 = scheduler.getSchedulerApp(appAttId3); @@ -3298,24 +3325,24 @@ public void testDRFHierarchicalQueues() throws Exception { scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); - RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(12288, 12), + RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(12288, 12, 12), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); scheduler.handle(nodeEvent); - ApplicationAttemptId appAttId1 = createSchedulingRequest(3074, 1, "queue1.subqueue1", + ApplicationAttemptId appAttId1 = createSchedulingRequest(3074, 1, 1, "queue1.subqueue1", "user1", 2); Thread.sleep(3); // so that start times will be different FSAppAttempt app1 = scheduler.getSchedulerApp(appAttId1); - ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 3, "queue1.subqueue1", + ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 3, 3, "queue1.subqueue1", "user1", 2); Thread.sleep(3); // so that start times will be different FSAppAttempt app2 = scheduler.getSchedulerApp(appAttId2); - ApplicationAttemptId appAttId3 = createSchedulingRequest(2048, 2, "queue1.subqueue2", + ApplicationAttemptId appAttId3 = createSchedulingRequest(2048, 2, 2, "queue1.subqueue2", "user1", 2); Thread.sleep(3); // so that start times will be different FSAppAttempt app3 = scheduler.getSchedulerApp(appAttId3); - ApplicationAttemptId appAttId4 = createSchedulingRequest(1024, 2, "queue2", + ApplicationAttemptId appAttId4 = createSchedulingRequest(1024, 2, 2, "queue2", "user1", 2); Thread.sleep(3); // so that start times will be different FSAppAttempt app4 = scheduler.getSchedulerApp(appAttId4); @@ -3495,7 +3522,7 @@ public void testQueueMaxAMShare() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); RMNode node = - MockNodes.newNodeInfo(1, Resources.createResource(20480, 20), + MockNodes.newNodeInfo(1, Resources.createResource(20480, 20, 20), 0, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); @@ -3510,14 +3537,14 @@ public void testQueueMaxAMShare() throws Exception { scheduler.update(); scheduler.handle(updateEvent); - Resource amResource1 = Resource.newInstance(1024, 1); - Resource amResource2 = Resource.newInstance(2048, 2); - Resource amResource3 = Resource.newInstance(1860, 2); + Resource amResource1 = Resource.newInstance(1024, 1, 1); + Resource amResource2 = Resource.newInstance(2048, 2, 2); + Resource amResource3 = Resource.newInstance(1860, 2, 2); int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority(); // Exceeds no limits ApplicationAttemptId attId1 = createAppAttemptId(1, 1); createApplicationWithAMResource(attId1, "queue1", "user1", amResource1); - createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1); + createSchedulingRequestExistingApplication(1024, 1, 1, amPriority, attId1); FSAppAttempt app1 = scheduler.getSchedulerApp(attId1); scheduler.update(); scheduler.handle(updateEvent); @@ -3531,7 +3558,7 @@ public void testQueueMaxAMShare() throws Exception { // Exceeds no limits ApplicationAttemptId attId2 = createAppAttemptId(2, 1); createApplicationWithAMResource(attId2, "queue1", "user1", amResource1); - createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2); + createSchedulingRequestExistingApplication(1024, 1, 1, amPriority, attId2); FSAppAttempt app2 = scheduler.getSchedulerApp(attId2); scheduler.update(); scheduler.handle(updateEvent); @@ -3545,7 +3572,7 @@ public void testQueueMaxAMShare() throws Exception { // Exceeds queue limit ApplicationAttemptId attId3 = createAppAttemptId(3, 1); createApplicationWithAMResource(attId3, "queue1", "user1", amResource1); - createSchedulingRequestExistingApplication(1024, 1, amPriority, attId3); + createSchedulingRequestExistingApplication(1024, 1, 1, amPriority, attId3); FSAppAttempt app3 = scheduler.getSchedulerApp(attId3); scheduler.update(); scheduler.handle(updateEvent); @@ -3581,7 +3608,7 @@ public void testQueueMaxAMShare() throws Exception { // Exceeds queue limit ApplicationAttemptId attId4 = createAppAttemptId(4, 1); createApplicationWithAMResource(attId4, "queue1", "user1", amResource2); - createSchedulingRequestExistingApplication(2048, 2, amPriority, attId4); + createSchedulingRequestExistingApplication(2048, 2, 2, amPriority, attId4); FSAppAttempt app4 = scheduler.getSchedulerApp(attId4); scheduler.update(); scheduler.handle(updateEvent); @@ -3595,7 +3622,7 @@ public void testQueueMaxAMShare() throws Exception { // Exceeds queue limit ApplicationAttemptId attId5 = createAppAttemptId(5, 1); createApplicationWithAMResource(attId5, "queue1", "user1", amResource2); - createSchedulingRequestExistingApplication(2048, 2, amPriority, attId5); + createSchedulingRequestExistingApplication(2048, 2, 2, amPriority, attId5); FSAppAttempt app5 = scheduler.getSchedulerApp(attId5); scheduler.update(); scheduler.handle(updateEvent); @@ -3638,7 +3665,7 @@ public void testQueueMaxAMShare() throws Exception { // Check amResource normalization ApplicationAttemptId attId6 = createAppAttemptId(6, 1); createApplicationWithAMResource(attId6, "queue1", "user1", amResource3); - createSchedulingRequestExistingApplication(1860, 2, amPriority, attId6); + createSchedulingRequestExistingApplication(1860, 2, 2, amPriority, attId6); FSAppAttempt app6 = scheduler.getSchedulerApp(attId6); scheduler.update(); scheduler.handle(updateEvent); @@ -3687,7 +3714,7 @@ public void testQueueMaxAMShareDefault() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); RMNode node = - MockNodes.newNodeInfo(1, Resources.createResource(8192, 20), + MockNodes.newNodeInfo(1, Resources.createResource(8192, 20, 20), 0, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); @@ -3723,14 +3750,14 @@ public void testQueueMaxAMShareDefault() throws Exception { scheduler.handle(updateEvent); } - Resource amResource1 = Resource.newInstance(1024, 1); + Resource amResource1 = Resource.newInstance(1024, 1, 1); int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority(); // The fair share is 2048 MB, and the default maxAMShare is 0.5f, // so the AM is accepted. ApplicationAttemptId attId1 = createAppAttemptId(1, 1); createApplicationWithAMResource(attId1, "queue1", "test1", amResource1); - createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1); + createSchedulingRequestExistingApplication(1024, 1, 1, amPriority, attId1); FSAppAttempt app1 = scheduler.getSchedulerApp(attId1); scheduler.update(); scheduler.handle(updateEvent); @@ -3745,7 +3772,7 @@ public void testQueueMaxAMShareDefault() throws Exception { // so the AM is not accepted. ApplicationAttemptId attId2 = createAppAttemptId(2, 1); createApplicationWithAMResource(attId2, "queue2", "test1", amResource1); - createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2); + createSchedulingRequestExistingApplication(1024, 1, 1, amPriority, attId2); FSAppAttempt app2 = scheduler.getSchedulerApp(attId2); scheduler.update(); scheduler.handle(updateEvent); @@ -3849,12 +3876,12 @@ public void testContinuousScheduling() throws Exception { // Add two nodes RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); RMNode node2 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 2, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); scheduler.handle(nodeEvent2); @@ -3862,6 +3889,7 @@ public void testContinuousScheduling() throws Exception { // available resource Assert.assertEquals(scheduler.getClusterResource().getMemory(), 16 * 1024); Assert.assertEquals(scheduler.getClusterResource().getVirtualCores(), 16); + Assert.assertEquals(scheduler.getClusterResource().getGpuCores(), 16); // send application request ApplicationAttemptId appAttemptId = @@ -3872,7 +3900,7 @@ public void testContinuousScheduling() throws Exception { scheduler.addApplicationAttempt(appAttemptId, false, false); List ask = new ArrayList(); ResourceRequest request = - createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); + createResourceRequest(1024, 1, 1, ResourceRequest.ANY, 1, 1, true); ask.add(request); scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); @@ -3887,20 +3915,22 @@ public void testContinuousScheduling() throws Exception { // check consumption Assert.assertEquals(1024, app.getCurrentConsumption().getMemory()); Assert.assertEquals(1, app.getCurrentConsumption().getVirtualCores()); + Assert.assertEquals(1, app.getCurrentConsumption().getGpuCores()); // another request request = - createResourceRequest(1024, 1, ResourceRequest.ANY, 2, 1, true); + createResourceRequest(1024, 1, 1, ResourceRequest.ANY, 2, 1, true); ask.clear(); ask.add(request); scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); // Wait until app gets resources while (app.getCurrentConsumption() - .equals(Resources.createResource(1024, 1))) { } + .equals(Resources.createResource(1024, 1, 1))) { } Assert.assertEquals(2048, app.getCurrentConsumption().getMemory()); Assert.assertEquals(2, app.getCurrentConsumption().getVirtualCores()); + Assert.assertEquals(2, app.getCurrentConsumption().getGpuCores()); // 2 containers should be assigned to 2 nodes Set nodes = new HashSet(); @@ -3921,12 +3951,12 @@ public void testContinuousSchedulingWithNodeRemoved() throws Exception { // Add two nodes RMNode node1 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); RMNode node2 = - MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 2, + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); scheduler.handle(nodeEvent2); @@ -4007,7 +4037,7 @@ public void testSchedulingOnRemovedNode() throws Exception { List ask1 = new ArrayList<>(); ResourceRequest request1 = - createResourceRequest(1024, 8, ResourceRequest.ANY, 1, 1, true); + createResourceRequest(1024, 8, 8, ResourceRequest.ANY, 1, 1, true); ask1.add(request1); scheduler.allocate(id11, ask1, new ArrayList(), null, @@ -4015,7 +4045,7 @@ public void testSchedulingOnRemovedNode() throws Exception { String hostName = "127.0.0.1"; RMNode node1 = MockNodes.newNodeInfo(1, - Resources.createResource(8 * 1024, 8), 1, hostName); + Resources.createResource(8 * 1024, 8, 8), 1, hostName); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); @@ -4081,17 +4111,17 @@ public void testRecoverRequestAfterPreemption() throws Exception { // Create Node and raised Node Added event RMNode node = MockNodes.newNodeInfo(1, - Resources.createResource(16 * 1024, 4), 0, host); + Resources.createResource(16 * 1024, 4, 4), 0, host); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); scheduler.handle(nodeEvent); // Create 3 container requests and place it in ask List ask = new ArrayList(); - ResourceRequest nodeLocalRequest = createResourceRequest(GB, 1, host, + ResourceRequest nodeLocalRequest = createResourceRequest(GB, 1, 1, host, priority.getPriority(), 1, true); - ResourceRequest rackLocalRequest = createResourceRequest(GB, 1, + ResourceRequest rackLocalRequest = createResourceRequest(GB, 1, 1, node.getRackName(), priority.getPriority(), 1, true); - ResourceRequest offRackRequest = createResourceRequest(GB, 1, + ResourceRequest offRackRequest = createResourceRequest(GB, 1, 1, ResourceRequest.ANY, priority.getPriority(), 1, true); ask.add(nodeLocalRequest); ask.add(rackLocalRequest); @@ -4156,7 +4186,7 @@ public void testBlacklistNodes() throws Exception { final int GB = 1024; String host = "127.0.0.1"; RMNode node = - MockNodes.newNodeInfo(1, Resources.createResource(16 * GB, 16), + MockNodes.newNodeInfo(1, Resources.createResource(16 * GB, 16, 16), 0, host); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); @@ -4208,11 +4238,11 @@ public void testGetAppsInQueue() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId appAttId1 = - createSchedulingRequest(1024, 1, "queue1.subqueue1", "user1"); + createSchedulingRequest(1024, 1, 1, "queue1.subqueue1", "user1"); ApplicationAttemptId appAttId2 = - createSchedulingRequest(1024, 1, "queue1.subqueue2", "user1"); + createSchedulingRequest(1024, 1, 1, "queue1.subqueue2", "user1"); ApplicationAttemptId appAttId3 = - createSchedulingRequest(1024, 1, "default", "user1"); + createSchedulingRequest(1024, 1, 1, "default", "user1"); List apps = scheduler.getAppsInQueue("queue1.subqueue1"); @@ -4259,7 +4289,7 @@ public void testMoveRunnableApp() throws Exception { FSLeafQueue targetQueue = queueMgr.getLeafQueue("queue2", true); ApplicationAttemptId appAttId = - createSchedulingRequest(1024, 1, "queue1", "user1", 3); + createSchedulingRequest(1024, 1, 0, "queue1", "user1", 3); ApplicationId appId = appAttId.getApplicationId(); RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(1024)); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); @@ -4267,24 +4297,24 @@ public void testMoveRunnableApp() throws Exception { scheduler.handle(nodeEvent); scheduler.handle(updateEvent); - assertEquals(Resource.newInstance(1024, 1), oldQueue.getResourceUsage()); + assertEquals(Resource.newInstance(1024, 1, 0), oldQueue.getResourceUsage()); scheduler.update(); - assertEquals(Resource.newInstance(3072, 3), oldQueue.getDemand()); + assertEquals(Resource.newInstance(3072, 3, 0), oldQueue.getDemand()); scheduler.moveApplication(appId, "queue2"); FSAppAttempt app = scheduler.getSchedulerApp(appAttId); assertSame(targetQueue, app.getQueue()); assertFalse(oldQueue.isRunnableApp(app)); assertTrue(targetQueue.isRunnableApp(app)); - assertEquals(Resource.newInstance(0, 0), oldQueue.getResourceUsage()); - assertEquals(Resource.newInstance(1024, 1), targetQueue.getResourceUsage()); + assertEquals(Resource.newInstance(0, 0, 0), oldQueue.getResourceUsage()); + assertEquals(Resource.newInstance(1024, 1, 0), targetQueue.getResourceUsage()); assertEquals(0, oldQueue.getNumRunnableApps()); assertEquals(1, targetQueue.getNumRunnableApps()); assertEquals(1, queueMgr.getRootQueue().getNumRunnableApps()); scheduler.update(); - assertEquals(Resource.newInstance(0, 0), oldQueue.getDemand()); - assertEquals(Resource.newInstance(3072, 3), targetQueue.getDemand()); + assertEquals(Resource.newInstance(0, 0, 0), oldQueue.getDemand()); + assertEquals(Resource.newInstance(3072, 3, 0), targetQueue.getDemand()); } @Test @@ -4300,7 +4330,7 @@ public void testMoveNonRunnableApp() throws Exception { scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue2", 0); ApplicationAttemptId appAttId = - createSchedulingRequest(1024, 1, "queue1", "user1", 3); + createSchedulingRequest(1024, 1, 1, "queue1", "user1", 3); assertEquals(0, oldQueue.getNumRunnableApps()); scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); @@ -4321,7 +4351,7 @@ public void testMoveMakesAppRunnable() throws Exception { scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue1", 0); ApplicationAttemptId appAttId = - createSchedulingRequest(1024, 1, "queue1", "user1", 3); + createSchedulingRequest(1024, 1, 1, "queue1", "user1", 3); FSAppAttempt app = scheduler.getSchedulerApp(appAttId); assertTrue(oldQueue.isNonRunnableApp(app)); @@ -4345,7 +4375,7 @@ public void testMoveWouldViolateMaxAppsConstraints() throws Exception { scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue2", 0); ApplicationAttemptId appAttId = - createSchedulingRequest(1024, 1, "queue1", "user1", 3); + createSchedulingRequest(1024, 1, 1, "queue1", "user1", 3); scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); } @@ -4360,18 +4390,18 @@ public void testMoveWouldViolateMaxResourcesConstraints() throws Exception { FSLeafQueue oldQueue = queueMgr.getLeafQueue("queue1", true); queueMgr.getLeafQueue("queue2", true); scheduler.getAllocationConfiguration().maxQueueResources.put("root.queue2", - Resource.newInstance(1024, 1)); + Resource.newInstance(1024, 1, 1)); ApplicationAttemptId appAttId = - createSchedulingRequest(1024, 1, "queue1", "user1", 3); - RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(2048, 2)); + createSchedulingRequest(1024, 1, 1, "queue1", "user1", 3); + RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(2048, 2, 2)); NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); scheduler.handle(nodeEvent); scheduler.handle(updateEvent); scheduler.handle(updateEvent); - assertEquals(Resource.newInstance(2048, 2), oldQueue.getResourceUsage()); + assertEquals(Resource.newInstance(2048, 2, 2), oldQueue.getResourceUsage()); scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); } @@ -4384,7 +4414,7 @@ public void testMoveToNonexistentQueue() throws Exception { scheduler.getQueueManager().getLeafQueue("queue1", true); ApplicationAttemptId appAttId = - createSchedulingRequest(1024, 1, "queue1", "user1", 3); + createSchedulingRequest(1024, 1, 1, "queue1", "user1", 3); scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); } @@ -4489,3 +4519,4 @@ public void testPerfMetricsInited() { collector.getRecords().size()); } } + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java index a5c20c1..0e4ca11 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java @@ -31,35 +31,53 @@ import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Test; +import java.util.HashSet; +import java.util.Set; + /** * comparator.compare(sched1, sched2) < 0 means that sched1 should get a * container before sched2 */ public class TestDominantResourceFairnessPolicy { + private Comparator createComparator(Resource capacity) { + Set enabledResourceTypes = new HashSet(); + for (ResourceType type : ResourceType.values()) { + enabledResourceTypes.add(type); + } + return createComparator(capacity, enabledResourceTypes); + } + + private Comparator createComparator(Resource capacity, + Set enabledResourceTypes) { + DominantResourceFairnessPolicy policy = new DominantResourceFairnessPolicy(); + policy.initialize(capacity); + return policy.getComparator(); + } + private Comparator createComparator(int clusterMem, int clusterCpu) { DominantResourceFairnessPolicy policy = new DominantResourceFairnessPolicy(); policy.initialize(BuilderUtils.newResource(clusterMem, clusterCpu)); return policy.getComparator(); } - + private Schedulable createSchedulable(int memUsage, int cpuUsage) { return createSchedulable(memUsage, cpuUsage, ResourceWeights.NEUTRAL, 0, 0); } - + private Schedulable createSchedulable(int memUsage, int cpuUsage, int minMemShare, int minCpuShare) { return createSchedulable(memUsage, cpuUsage, ResourceWeights.NEUTRAL, minMemShare, minCpuShare); } - + private Schedulable createSchedulable(int memUsage, int cpuUsage, ResourceWeights weights) { return createSchedulable(memUsage, cpuUsage, weights, 0, 0); } - + private Schedulable createSchedulable(int memUsage, int cpuUsage, ResourceWeights weights, int minMemShare, int minCpuShare) { Resource usage = BuilderUtils.newResource(memUsage, cpuUsage); @@ -68,28 +86,28 @@ private Schedulable createSchedulable(int memUsage, int cpuUsage, Resources.createResource(Integer.MAX_VALUE, Integer.MAX_VALUE), weights, Resources.none(), usage, 0l); } - + @Test public void testSameDominantResource() { assertTrue(createComparator(8000, 4).compare( createSchedulable(1000, 1), createSchedulable(2000, 1)) < 0); } - + @Test public void testDifferentDominantResource() { assertTrue(createComparator(8000, 8).compare( createSchedulable(4000, 3), createSchedulable(2000, 5)) < 0); } - + @Test public void testOneIsNeedy() { assertTrue(createComparator(8000, 8).compare( createSchedulable(2000, 5, 0, 6), createSchedulable(4000, 3, 0, 0)) < 0); } - + @Test public void testBothAreNeedy() { assertTrue(createComparator(8000, 100).compare( @@ -103,7 +121,7 @@ public void testBothAreNeedy() { // dominant min share is 4/5 createSchedulable(4000, 3, 5000, 4)) < 0); } - + @Test public void testEvenWeightsSameDominantResource() { assertTrue(createComparator(8000, 8).compare( @@ -113,7 +131,7 @@ public void testEvenWeightsSameDominantResource() { createSchedulable(1000, 3, new ResourceWeights(2.0f)), createSchedulable(1000, 2)) < 0); } - + @Test public void testEvenWeightsDifferentDominantResource() { assertTrue(createComparator(8000, 8).compare( @@ -123,7 +141,7 @@ public void testEvenWeightsDifferentDominantResource() { createSchedulable(3000, 1, new ResourceWeights(2.0f)), createSchedulable(1000, 2)) < 0); } - + @Test public void testUnevenWeightsSameDominantResource() { assertTrue(createComparator(8000, 8).compare( @@ -133,7 +151,7 @@ public void testUnevenWeightsSameDominantResource() { createSchedulable(1000, 3, new ResourceWeights(1.0f, 2.0f)), createSchedulable(1000, 2)) < 0); } - + @Test public void testUnevenWeightsDifferentDominantResource() { assertTrue(createComparator(8000, 8).compare( @@ -146,18 +164,20 @@ public void testUnevenWeightsDifferentDominantResource() { @Test public void testCalculateShares() { - Resource used = Resources.createResource(10, 5); - Resource capacity = Resources.createResource(100, 10); - ResourceType[] resourceOrder = new ResourceType[2]; + Resource used = Resources.createResource(10, 5, 8); + Resource capacity = Resources.createResource(100, 10, 20); + ResourceType[] resourceOrder = new ResourceType[3]; ResourceWeights shares = new ResourceWeights(); - DominantResourceFairnessPolicy.DominantResourceFairnessComparator comparator = - new DominantResourceFairnessPolicy.DominantResourceFairnessComparator(); - comparator.calculateShares(used, capacity, shares, resourceOrder, - ResourceWeights.NEUTRAL); + ((DominantResourceFairnessPolicy.DominantResourceFairnessComparator) + createComparator(capacity)).calculateShares( + used, capacity, shares, resourceOrder, ResourceWeights.NEUTRAL); assertEquals(.1, shares.getWeight(ResourceType.MEMORY), .00001); assertEquals(.5, shares.getWeight(ResourceType.CPU), .00001); + assertEquals(.4, shares.getWeight(ResourceType.GPU), .00001); assertEquals(ResourceType.CPU, resourceOrder[0]); - assertEquals(ResourceType.MEMORY, resourceOrder[1]); + assertEquals(ResourceType.GPU, resourceOrder[1]); + assertEquals(ResourceType.MEMORY, resourceOrder[2]); } } +