diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index d94b621..2e671b1 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -61,7 +61,11 @@ @Metric("# of apps failed") MutableCounterInt appsFailed; @Metric("Allocated memory in MB") MutableGaugeInt allocatedMB; + @Metric("Aggregate size of allocated memory in MB") + private MutableCounterLong aggregateAllocatedMB; @Metric("Allocated CPU in virtual cores") MutableGaugeInt allocatedVCores; + @Metric("Aggregate size of allocated vcores") private MutableCounterLong + aggregateAllocatedVCores; @Metric("# of allocated containers") MutableGaugeInt allocatedContainers; @Metric("Aggregate # of allocated containers") MutableCounterLong aggregateContainersAllocated; @Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased; @@ -386,7 +390,10 @@ public void allocateResources(String user, int containers, Resource res, aggregateContainersAllocated.incr(containers); allocatedMB.incr(res.getMemory() * Math.max(containers, 1)); + aggregateAllocatedMB.incr(res.getMemory() * Math.max(containers, 1)); allocatedVCores.incr(res.getVirtualCores() * Math.max(containers, 1)); + aggregateAllocatedVCores.incr(res.getVirtualCores() * + Math.max(containers, 1)); if (decrPending) { _decrPendingResources(containers, res); } @@ -565,4 +572,12 @@ public long getAggregateAllocatedContainers() { public long getAggegatedReleasedContainers() { return aggregateContainersReleased.value(); } + + public long getAggregateAllocatedMB() { + return aggregateAllocatedMB.value(); + } + + public long getAggregateAllocatedVCores() { + return aggregateAllocatedVCores.value(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java index 8ad71d2..9078d19 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java @@ -76,16 +76,19 @@ public void setUp() { metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, + 15, 5, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, 100*GB, 100, 9*GB, + 9, 2, 0, 0, 0); metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 100*GB, 100, 9*GB, + 9, 2, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -174,20 +177,26 @@ public void testQueueAppMetricsForMultipleFailures() { metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, + 15*GB, 15, 5, 0, 0, 0); + checkResources(userSource, 0, 0, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, + 5, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); checkApps(userSource, 1, 0, 1, 0, 0, 0, true); metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2), true); - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, 100*GB, 100, + 9*GB, 9, 2, 0, 0, 0); + checkResources(userSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, 10*GB, 10, 9*GB, + 9, 2, 0, 0, 0); metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 100*GB, 100, + 9*GB, 9, 2, 0, 0, 0); + checkResources(userSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 10*GB, 10, + 9*GB, 9, 2, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -233,10 +242,14 @@ public void testQueueAppMetricsForMultipleFailures() { parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10)); metrics.incrPendingResources(user, 5, Resources.createResource(3*GB, 3)); - checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(parentQueueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0); - checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); - checkResources(parentUserSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0); + checkResources(queueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, + 15*GB, 15, 5, 0, 0, 0); + checkResources(parentQueueSource, 0, 0, 0, 0, 0, 0, 0, 100*GB, 100, + 15*GB, 15, 5, 0, 0, 0); + checkResources(userSource, 0, 0, 0, 0, 0, 0, 0, 10*GB, 10, + 15*GB, 15, 5, 0, 0, 0); + checkResources(parentUserSource, 0, 0, 0, 0, 0, 0, 0, 10*GB, + 10, 15*GB, 15, 5, 0, 0, 0); metrics.runAppAttempt(app.getApplicationId(), user); checkApps(queueSource, 1, 0, 1, 0, 0, 0, true); @@ -246,17 +259,25 @@ public void testQueueAppMetricsForMultipleFailures() { metrics.reserveResource(user, Resources.createResource(3*GB, 3)); // Available resources is set externally, as it depends on dynamic // configurable cluster/queue resources - checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(parentQueueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); - checkResources(parentUserSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(queueSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, 100*GB, + 100, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(parentQueueSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, + 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(userSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, 10*GB, + 10, 9*GB, 9, 2, 3*GB, 3, 1); + checkResources(parentUserSource, 6*GB, 6, 3, 6*GB, 6, 3, 0, + 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1); metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2)); metrics.unreserveResource(user, Resources.createResource(3*GB, 3)); - checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(parentQueueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0); - checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); - checkResources(parentUserSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0); + checkResources(queueSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 100*GB, 100, + 9*GB, 9, 2, 0, 0, 0); + checkResources(parentQueueSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 100*GB, + 100, 9*GB, 9, 2, 0, 0, 0); + checkResources(userSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 10*GB, 10, 9*GB, + 9, 2, 0, 0, 0); + checkResources(parentUserSource, 4*GB, 4, 2, 6*GB, 6, 3, 1, 10*GB, 10, + 9*GB, 9, 2, 0, 0, 0); metrics.finishAppAttempt( app.getApplicationId(), app.isPending(), app.getUser()); @@ -347,13 +368,16 @@ public static void checkApps(MetricsSource source, int submitted, int pending, } public static void checkResources(MetricsSource source, int allocatedMB, - int allocatedCores, int allocCtnrs, long aggreAllocCtnrs, - long aggreReleasedCtnrs, int availableMB, int availableCores, int pendingMB, + int allocatedCores, int allocCtnrs, long aggreAllocMB, + long aggreAllocCores, long aggreAllocCtnrs, long aggreReleasedCtnrs, + int availableMB, int availableCores, int pendingMB, int pendingCores, int pendingCtnrs, int reservedMB, int reservedCores, int reservedCtnrs) { MetricsRecordBuilder rb = getMetrics(source); assertGauge("AllocatedMB", allocatedMB, rb); + assertCounter("AggregateAllocatedMB", aggreAllocMB, rb); assertGauge("AllocatedVCores", allocatedCores, rb); + assertCounter("AggregateAllocatedVCores", aggreAllocCores, rb); assertGauge("AllocatedContainers", allocCtnrs, rb); assertCounter("AggregateContainersAllocated", aggreAllocCtnrs, rb); assertCounter("AggregateContainersReleased", aggreReleasedCtnrs, rb);