diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index fe7379a5abe..581bc4c37d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -347,6 +347,11 @@ public SchedulerHealth getSchedulerHealth() { return this.schedulerHealth; } + @VisibleForTesting + public void setSchedulerHealth(SchedulerHealth schedulerHealth) { + this.schedulerHealth = schedulerHealth; + } + protected void setLastNodeUpdateTime(long time) { this.lastNodeUpdateTime = time; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index d6fb544ee6b..bc52ad00a07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Set; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -172,6 +173,12 @@ void containerCompleted(RMContainer rmContainer, this.attemptResourceUsage.decUsed(containerResource); getQueue().decUsedResource(containerResource); + scheduler.getSchedulerHealth().updateRelease( + scheduler.getClock().getTime(), rmContainer.getNodeId(), + rmContainer.getContainerId(), rmContainer.getQueueName() + ); + scheduler.getSchedulerHealth().updateSchedulerReleaseCounts(1); + // Clear resource utilization metrics cache. lastMemoryAggregateAllocationUpdateTime = -1; } finally { @@ -463,6 +470,12 @@ public RMContainer allocate(NodeType type, FSSchedulerNode node, // Update resource requests related to "request" and store in RMContainer ((RMContainerImpl) rmContainer).setContainerRequest(containerRequest); + // Update scheduler health metrics for container allocation + scheduler.getSchedulerHealth().updateAllocation( + scheduler.getClock().getTime(), rmContainer.getNodeId(), + rmContainer.getContainerId(), queue.getQueueName()); + scheduler.getSchedulerHealth().updateSchedulerAllocationCounts(1); + // Inform the container rmContainer.handle( new RMContainerEvent(container.getId(), RMContainerEventType.START)); @@ -687,8 +700,9 @@ public synchronized void recoverContainer(SchedulerNode node, * in {@link FSSchedulerNode}.. * return whether reservation was possible with the current threshold limits */ - private boolean reserve(Resource perAllocationResource, FSSchedulerNode node, - Container reservedContainer, NodeType type, + @VisibleForTesting + protected boolean reserve(Resource perAllocationResource, + FSSchedulerNode node, Container reservedContainer, NodeType type, SchedulerRequestKey schedulerKey) { RMContainer nodeReservedContainer = node.getReservedContainer(); @@ -708,11 +722,22 @@ private boolean reserve(Resource perAllocationResource, FSSchedulerNode node, super.reserve(node, schedulerKey, null, reservedContainer); node.reserveResource(this, schedulerKey, rmContainer); setReservation(node); + scheduler.getSchedulerHealth().updateReservation( + scheduler.getClock().getTime(), node.getNodeID(), + reservedContainer.getId(), getQueue().getName() + ); + scheduler.getSchedulerHealth().updateSchedulerReservationCounts(1); + } else { RMContainer rmContainer = node.getReservedContainer(); super.reserve(node, schedulerKey, rmContainer, reservedContainer); node.reserveResource(this, schedulerKey, rmContainer); setReservation(node); + scheduler.getSchedulerHealth().updateReservation( + scheduler.getClock().getTime(), node.getNodeID(), + reservedContainer.getId(), getQueue().getName() + ); + scheduler.getSchedulerHealth().updateSchedulerReservationCounts(1); } return true; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java index e664725af31..c86b973f03a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java @@ -273,6 +273,12 @@ public void run() { ContainerStatus status = SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); + // Updates scheduler health metrcis for preemption + scheduler.getSchedulerHealth().updatePreemption( + scheduler.getClock().getTime(), container.getNodeId(), + container.getContainerId(), container.getQueueName()); + scheduler.getSchedulerHealth().updateSchedulerPreemptionCounts(1); + LOG.info("Killing container " + container); scheduler.completedContainer( container, status, RMContainerEventType.KILL); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 151a7ab0867..b1f5c4c440e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -730,6 +730,8 @@ protected void completedContainerInternal( + " released container " + container.getId() + " on node: " + (node == null ? nodeID : node) + " with event: " + event); } + schedulerHealth.updateRelease(getClock().getTime(), nodeID, + container.getId(), application.getQueue().getName()); } finally { writeLock.unlock(); } @@ -1112,6 +1114,7 @@ void attemptScheduling(FSSchedulerNode node) { boolean validReservation = false; if (reservedAppSchedulable != null) { validReservation = reservedAppSchedulable.assignReservedContainer(node); + schedulerHealth.updateSchedulerFulfilledReservationCounts(1); } if (!validReservation) { // No reservation, schedule at queue which is farthest below fair share @@ -1135,6 +1138,10 @@ void attemptScheduling(FSSchedulerNode node) { break; } } + + // TODO: Reserved resource + schedulerHealth.updateSchedulerRunDetails(getClock().getTime(), + assignedResource, Resources.none()); } updateRootQueueMetrics(); } finally { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java index 7f31defa066..e4de6c8c555 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java @@ -21,14 +21,18 @@ import static org.apache.hadoop.yarn.util.StringHelper.join; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerLeafQueueInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerQueueInfo; import org.apache.hadoop.yarn.server.webapp.WebPageUtils; +import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.ResponseInfo; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; @@ -171,6 +175,117 @@ public void render(Block html) { ul.__(); } } + + /** + * Scheduler health block + */ + public static class HealthBlock extends HtmlBlock { + + private final FairScheduler fs; + + @Inject + HealthBlock(ResourceManager rm) { + fs = (FairScheduler) rm.getResourceScheduler(); + } + + @Override + public void render(HtmlBlock.Block html) { + SchedulerHealth healthInfo = fs.getSchedulerHealth(); + UL