diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 4e364f7..5280575 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -61,7 +61,11 @@ @Metric("# of apps failed") MutableCounterInt appsFailed; @Metric("Allocated memory in MB") MutableGaugeLong allocatedMB; + @Metric("Aggregate size of allocated memory in MB") + private MutableCounterLong aggregateAllocatedMB; @Metric("Allocated CPU in virtual cores") MutableGaugeInt allocatedVCores; + @Metric("Aggregate size of allocated vcores") + private MutableCounterLong aggregateAllocatedVCores; @Metric("# of allocated containers") MutableGaugeInt allocatedContainers; @Metric("Aggregate # of allocated containers") MutableCounterLong aggregateContainersAllocated; @Metric("Aggregate # of allocated node-local containers") @@ -409,7 +413,10 @@ public void allocateResources(String user, int containers, Resource res, aggregateContainersAllocated.incr(containers); allocatedMB.incr(res.getMemorySize() * containers); + aggregateAllocatedMB.incr(res.getMemorySize() * containers); allocatedVCores.incr(res.getVirtualCores() * containers); + aggregateAllocatedVCores.incr(res.getVirtualCores() * + Math.max(containers, 1)); if (decrPending) { _decrPendingResources(containers, res); } @@ -640,4 +647,12 @@ public long getAggregateOffSwitchContainersAllocated() { public long getAggegatedReleasedContainers() { return aggregateContainersReleased.value(); } + + public long getAggregateAllocatedMB() { + return aggregateAllocatedMB.value(); + } + + public long getAggregateAllocatedVCores() { + return aggregateAllocatedVCores.value(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java index cb1104b..0610b90 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java @@ -76,23 +76,23 @@ public void setUp() { metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); metrics.incrPendingResources(user, 0, Resources.createResource(2 * GB, 2)); - checkResources(queueSource, 4 * GB, 4, 2, 3, 1, 100 * GB, 100, 9 * GB, 9, 2, + checkResources(queueSource, 4 * GB, 4, 2, 3, 6*GB, 6, 1, 100 * GB, 100, 9 * GB, 9, 2, 0, 0, 0); metrics.decrPendingResources(user, 0, Resources.createResource(2 * GB, 2)); - checkResources(queueSource, 4 * GB, 4, 2, 3, 1, 100 * GB, 100, 9 * GB, 9, 2, + checkResources(queueSource, 4 * GB, 4, 2, 3, 6*GB, 6, 1, 100 * GB, 100, 9 * GB, 9, 2, 0, 0, 0); metrics.finishAppAttempt( @@ -182,20 +182,20 @@ public void testQueueAppMetricsForMultipleFailures() { metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); + checkResources(userSource, 0, 0, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); checkApps(userSource, 1, 0, 1, 0, 0, 0, true); metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(userSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(userSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -288,10 +288,10 @@ public void testQueueAppMetricsForMultipleFailures() { parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(parentQueueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); - checkResources(parentUserSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); + checkResources(parentQueueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); + checkResources(userSource, 0, 0, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + checkResources(parentUserSource, 0, 0, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); @@ -301,17 +301,17 @@ public void testQueueAppMetricsForMultipleFailures() { metrics.reserveResource(user, Resources.createResource(3*GB, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(parentQueueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(parentUserSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(queueSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(parentQueueSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(userSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(parentUserSource, 6*GB, 6, 3, 3, 6*GB, 6, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); metrics.unreserveResource(user, Resources.createResource(3*GB, 3)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(parentQueueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); - checkResources(parentUserSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(parentQueueSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(userSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(parentUserSource, 4*GB, 4, 2, 3, 6*GB, 6, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -402,13 +402,15 @@ public static void checkApps(MetricsSource source, int submitted, int pending, } public static void checkResources(MetricsSource source, long allocatedMB, - int allocatedCores, int allocCtnrs, long aggreAllocCtnrs, - long aggreReleasedCtnrs, long availableMB, int availableCores, long pendingMB, - int pendingCores, int pendingCtnrs, long reservedMB, int reservedCores, - int reservedCtnrs) { + int allocatedCores, int allocCtnrs, long aggreAllocCtnrs, long aggreAllocMB, + long aggreAllocCores, long aggreReleasedCtnrs, long availableMB, + int availableCores, long pendingMB, int pendingCores, int pendingCtnrs, + long reservedMB, int reservedCores, int reservedCtnrs) { MetricsRecordBuilder rb = getMetrics(source); assertGauge("AllocatedMB", allocatedMB, rb); + assertCounter("AggregateAllocatedMB", aggreAllocMB, rb); assertGauge("AllocatedVCores", allocatedCores, rb); + assertCounter("AggregateAllocatedVCores", aggreAllocCores, rb); assertGauge("AllocatedContainers", allocCtnrs, rb); assertCounter("AggregateContainersAllocated", aggreAllocCtnrs, rb); assertCounter("AggregateContainersReleased", aggreReleasedCtnrs, rb);