diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 550c6aa..d89855a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -43,7 +43,6 @@ import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; @@ -67,7 +66,7 @@ final Resource minimumAllocation; Resource maximumAllocation; QueueState state; - final QueueMetrics metrics; + final CSQueueMetrics metrics; protected final PrivilegedEntity queueEntity; final ResourceCalculator resourceCalculator; @@ -100,10 +99,10 @@ public AbstractCSQueue(CapacitySchedulerContext cs, this.resourceCalculator = cs.getResourceCalculator(); // must be called after parent and queueName is set - this.metrics = old != null ? old.getMetrics() : - QueueMetrics.forQueue(getQueuePath(), parent, - cs.getConfiguration().getEnableUserMetrics(), - cs.getConf()); + this.metrics = + old != null ? (CSQueueMetrics) old.getMetrics() : CSQueueMetrics + .forQueue(getQueuePath(), parent, cs.getConfiguration() + .getEnableUserMetrics(), cs.getConf()); this.csContext = cs; this.minimumAllocation = csContext.getMinimumResourceCapability(); @@ -171,7 +170,7 @@ public synchronized QueueState getState() { } @Override - public QueueMetrics getMetrics() { + public CSQueueMetrics getMetrics() { return metrics; } @@ -333,6 +332,13 @@ public QueueStatistics getQueueStatistics() { stats.setAllocatedVCores(getMetrics().getAllocatedVirtualCores()); stats.setPendingVCores(getMetrics().getPendingVirtualCores()); stats.setReservedVCores(getMetrics().getReservedVirtualCores()); + stats.setReservedVCores(getMetrics().getAMResourceLimitMB()); + stats.setReservedVCores(getMetrics().getAMResourceLimitVCores()); + stats.setReservedVCores(getMetrics().getUsedAMResourceMB()); + stats.setReservedVCores(getMetrics().getUsedAMResourceVCores()); + stats.setReservedVCores(getMetrics().getUserAMResourceLimitMB()); + stats.setReservedVCores(getMetrics().getUserAMResourceLimitVCores()); + return stats; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java new file mode 100644 index 0000000..2b76e05 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; + +@Metrics(context = "yarn") +public class CSQueueMetrics extends QueueMetrics { + + @Metric("AM memory limit in MB") MutableGaugeInt AMResourceLimitMB; + @Metric("AM CPU limit in virtual cores") MutableGaugeInt AMResourceLimitVCores; + @Metric("Used AM memory limit in MB") MutableGaugeInt usedAMResourceMB; + @Metric("Used AM CPU limit in virtual cores") MutableGaugeInt usedAMResourceVCores; + @Metric("User AM memory limit in MB") MutableGaugeInt userAMResourceLimitMB; + @Metric("User AM CPU limit in virtual cores") MutableGaugeInt userAMResourceLimitVCores; + + CSQueueMetrics(MetricsSystem ms, String queueName, Queue parent, + boolean enableUserMetrics, Configuration conf) { + super(ms, queueName, parent, enableUserMetrics, conf); + } + + public int getAMResourceLimitMB() { + return AMResourceLimitMB.value(); + } + + public int getUsedAMResourceMB() { + return usedAMResourceMB.value(); + } + + public int getUserAMResourceLimitMB() { + return userAMResourceLimitMB.value(); + } + + public int getAMResourceLimitVCores() { + return AMResourceLimitVCores.value(); + } + + public int getUsedAMResourceVCores() { + return usedAMResourceVCores.value(); + } + + public int getUserAMResourceLimitVCores() { + return userAMResourceLimitVCores.value(); + } + + public void setAMResouceLimit(Resource res) { + AMResourceLimitMB.set(res.getMemory()); + AMResourceLimitVCores.set(res.getVirtualCores()); + } + + public void setUserAMResouceLimit(Resource res) { + userAMResourceLimitMB.set(res.getMemory()); + userAMResourceLimitVCores.set(res.getVirtualCores()); + } + + public void incAMUsed(Resource res) { + usedAMResourceMB.incr(res.getMemory()); + usedAMResourceVCores.incr(res.getVirtualCores()); + } + + public void decAMUsed(Resource res) { + usedAMResourceMB.decr(res.getMemory()); + usedAMResourceVCores.decr(res.getVirtualCores()); + } + + public synchronized static CSQueueMetrics forQueue(String queueName, + Queue parent, boolean enableUserMetrics, Configuration conf) { + MetricsSystem ms = DefaultMetricsSystem.instance(); + QueueMetrics metrics = queueMetrics.get(queueName); + if (metrics == null) { + metrics = + new CSQueueMetrics(ms, queueName, parent, enableUserMetrics, conf) + .tag(QUEUE_INFO, queueName); + + // Register with the MetricsSystems + if (ms != null) { + metrics = + ms.register(sourceName(queueName).toString(), "Metrics for queue: " + + queueName, metrics); + } + queueMetrics.put(queueName, metrics); + } + + return (CSQueueMetrics) metrics; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index f860574..35c5130 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -140,6 +140,10 @@ public LeafQueue(CapacitySchedulerContext cs, this.activeApplications = new TreeSet(applicationComparator); setupQueueConfigs(cs.getClusterResource()); + + // update static CSQueueMetrics + this.metrics.setAMResouceLimit(getAMResourceLimit()); + this.metrics.setUserAMResouceLimit(getUserAMResourceLimit()); } protected synchronized void setupQueueConfigs(Resource clusterResource) @@ -640,6 +644,7 @@ private synchronized void activateApplications() { activeApplications.add(application); queueUsage.incAMUsed(application.getAMResource()); user.getResourceUsage().incAMUsed(application.getAMResource()); + metrics.incAMUsed(application.getAMResource()); i.remove(); LOG.info("Application " + application.getApplicationId() + " from user: " + application.getUser() + @@ -692,6 +697,7 @@ public synchronized void removeApplicationAttempt( } else { queueUsage.decAMUsed(application.getAMResource()); user.getResourceUsage().decAMUsed(application.getAMResource()); + metrics.decAMUsed(application.getAMResource()); } applicationAttemptMap.remove(application.getApplicationAttemptId()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index a41fdfa..4ba83b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -277,9 +277,22 @@ public void testLimitsComputation() throws Exception { " UserAMResourceLimit=" + queue.getUserAMResourceLimit()); - assertEquals(queue.getAMResourceLimit(), Resource.newInstance(160*GB, 1)); - assertEquals(queue.getUserAMResourceLimit(), - Resource.newInstance(80*GB, 1)); + Resource amResourceLimit = Resource.newInstance(160 * GB, 1); + Resource userAMResourceLimit = Resource.newInstance(80 * GB, 1); + + assertEquals(queue.getAMResourceLimit(), amResourceLimit); + assertEquals(queue.getUserAMResourceLimit(), userAMResourceLimit); + + // Assert in metrics + assertEquals(queue.getMetrics().getAMResourceLimitMB(), + amResourceLimit.getMemory()); + assertEquals(queue.getMetrics().getAMResourceLimitVCores(), + amResourceLimit.getVirtualCores()); + + assertEquals(queue.getMetrics().getUserAMResourceLimitMB(), + userAMResourceLimit.getMemory()); + assertEquals(queue.getMetrics().getUserAMResourceLimitVCores(), + userAMResourceLimit.getVirtualCores()); assertEquals( (int)(clusterResource.getMemory() * queue.getAbsoluteCapacity()), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 0a19604..cbd3b04 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -415,10 +415,16 @@ public void testAppAttemptMetrics() throws Exception { .getMockApplicationAttemptId(0, 2); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null, spyRMContext); + app_1.setAMResource(Resource.newInstance(100, 1)); a.submitApplicationAttempt(app_1, user_0); // same user assertEquals(1, a.getMetrics().getAppsSubmitted()); assertEquals(1, a.getMetrics().getAppsPending()); + assertEquals(1, a.getUser(user_0).getActiveApplications()); + assertEquals(app_1.getAMResource().getMemory(), a.getMetrics() + .getUsedAMResourceMB()); + assertEquals(app_1.getAMResource().getVirtualCores(), a.getMetrics() + .getUsedAMResourceVCores()); event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.FINISHED, false); -- 1.9.2.msysgit.0