Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java (date 1540538396000) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java (date 1542962249000) @@ -22,7 +22,7 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.*; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import static org.junit.Assert.assertEquals; @@ -31,13 +31,15 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.PendingAsk; import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity @@ -49,6 +51,7 @@ import org.apache.hadoop.yarn.util.ControlledClock; import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; @@ -161,7 +164,6 @@ // manually set back to node local schedulerApp.resetAllowedLocalityLevel(prio, NodeType.NODE_LOCAL); - schedulerApp.resetSchedulingOpportunities(prio, clock.getTime()); assertEquals(NodeType.NODE_LOCAL, schedulerApp.getAllowedLocalityLevelByTime(prio, nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); @@ -341,6 +343,37 @@ assertEquals(clusterResource, spyApp.getHeadroom()); } + @Test + public void testHealthMetric() { + //SETUP + FSLeafQueue queue = Mockito.mock(FSLeafQueue.class); + FSQueueMetrics queueMetrics = Mockito.mock(FSQueueMetrics.class); + when(queue.getMetrics()).thenReturn(queueMetrics); + + ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1); + RMContext rmContext = resourceManager.getRMContext(); + FSAppAttempt schedulerApp = + new FSAppAttempt(scheduler, applicationAttemptId, "user1", queue, + null, rmContext); + scheduler.setSchedulerHealth(new SchedulerHealth()); + Resource perAllocationResource = Resource.newInstance(1024,1); + + FSSchedulerNode node = Mockito.mock(FSSchedulerNode.class); + RMNode rmNode = Mockito.mock(RMNode.class); + Mockito.when(rmNode.getNodeID()).thenReturn(NodeId.newInstance("host", + 1234)); + Mockito.when(node.getRMNode()).thenReturn(rmNode); + SchedulerRequestKey schedulerKey = Mockito.mock(SchedulerRequestKey.class); + + //ACT + schedulerApp.reserve(perAllocationResource, node, null, + NodeType.NODE_LOCAL, schedulerKey); + + //ASSERT + Assert.assertEquals(Long.valueOf(1), + scheduler.getSchedulerHealth().getReservationCount()); + } + private static long min(long value1, long value2, long value3) { return Math.min(Math.min(value1, value2), value3); } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java (date 1540538396000) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java (date 1542120675000) @@ -253,6 +253,12 @@ ContainerStatus status = SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); + // Updates scheduler health metrcis for preemption + scheduler.getSchedulerHealth().updatePreemption( + scheduler.getClock().getTime(), container.getNodeId(), + container.getContainerId(), container.getQueueName()); + scheduler.getSchedulerHealth().updateSchedulerPreemptionCounts(1); + LOG.info("Killing container " + container); scheduler.completedContainer( container, status, RMContainerEventType.KILL); Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java (date 1540538396000) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java (date 1542617415000) @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Set; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -171,6 +172,9 @@ this.attemptResourceUsage.decUsed(containerResource); getQueue().decUsedResource(containerResource); + scheduler.getSchedulerHealth().updateRelease(scheduler.getClock().getTime(), rmContainer.getNodeId(), rmContainer.getContainerId(), rmContainer.getQueueName()); + scheduler.getSchedulerHealth().updateSchedulerReleaseCounts(1); + // Clear resource utilization metrics cache. lastMemoryAggregateAllocationUpdateTime = -1; } finally { @@ -468,6 +472,12 @@ // Update resource requests related to "request" and store in RMContainer ((RMContainerImpl) rmContainer).setContainerRequest(containerRequest); + // Update scheduler health metrics for container allocation + scheduler.getSchedulerHealth().updateAllocation( + scheduler.getClock().getTime(), rmContainer.getNodeId(), + rmContainer.getContainerId(), queue.getQueueName()); + scheduler.getSchedulerHealth().updateSchedulerAllocationCounts(1); + // Inform the container rmContainer.handle( new RMContainerEvent(container.getId(), RMContainerEventType.START)); @@ -693,7 +703,8 @@ * in {@link FSSchedulerNode}.. * return whether reservation was possible with the current threshold limits */ - private boolean reserve(Resource perAllocationResource, FSSchedulerNode node, + @VisibleForTesting + protected boolean reserve(Resource perAllocationResource, FSSchedulerNode node, Container reservedContainer, NodeType type, SchedulerRequestKey schedulerKey) { @@ -714,11 +725,22 @@ super.reserve(node, schedulerKey, null, reservedContainer); node.reserveResource(this, schedulerKey, rmContainer); setReservation(node); + scheduler.getSchedulerHealth().updateReservation( + scheduler.getClock().getTime(), node.getNodeID(), + reservedContainer.getId(), getQueue().getName() + ); + scheduler.getSchedulerHealth().updateSchedulerReservationCounts(1); + } else { RMContainer rmContainer = node.getReservedContainer(); super.reserve(node, schedulerKey, rmContainer, reservedContainer); node.reserveResource(this, schedulerKey, rmContainer); setReservation(node); + scheduler.getSchedulerHealth().updateReservation( + scheduler.getClock().getTime(), node.getNodeID(), + reservedContainer.getId(), getQueue().getName() + ); + scheduler.getSchedulerHealth().updateSchedulerReservationCounts(1); } return true; } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java (date 1540538396000) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java (date 1542617720000) @@ -332,6 +332,11 @@ return this.schedulerHealth; } + @VisibleForTesting + public void setSchedulerHealth(SchedulerHealth schedulerHealth) { + this.schedulerHealth = schedulerHealth; + } + protected void setLastNodeUpdateTime(long time) { this.lastNodeUpdateTime = time; } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java (date 1540538396000) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java (date 1542120675000) @@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; @@ -43,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.SchedulingRequest; +import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions .SchedulerInvalidResoureRequestException; @@ -78,6 +80,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils.MaxResourceValidationResult; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.AssignmentInformation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -764,6 +768,8 @@ + " released container " + container.getId() + " on node: " + (node == null ? nodeID : node) + " with event: " + event); } + schedulerHealth.updateRelease(getClock().getTime(), nodeID, + container.getId(), application.getQueue().getName()); } finally { writeLock.unlock(); } @@ -1146,6 +1152,7 @@ boolean validReservation = false; if (reservedAppSchedulable != null) { validReservation = reservedAppSchedulable.assignReservedContainer(node); + schedulerHealth.updateSchedulerFulfilledReservationCounts(1); } if (!validReservation) { // No reservation, schedule at queue which is farthest below fair share @@ -1171,6 +1178,10 @@ break; } } + + // TODO: Reserved resource + schedulerHealth.updateSchedulerRunDetails(getClock().getTime(), + assignedResource, Resources.none()); } updateRootQueueMetrics(); } finally { Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java (date 1540538396000) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java (date 1542120675000) @@ -21,14 +21,18 @@ import static org.apache.hadoop.yarn.util.StringHelper.join; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerLeafQueueInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerQueueInfo; import org.apache.hadoop.yarn.server.webapp.WebPageUtils; +import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.ResponseInfo; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; @@ -171,6 +175,106 @@ ul.__(); } } + + public static class HealthBlock extends HtmlBlock { + + final FairScheduler fs; + + @Inject + HealthBlock(ResourceManager rm) { + fs = (FairScheduler) rm.getResourceScheduler(); + } + + @Override + public void render(HtmlBlock.Block html) { + SchedulerHealth healthInfo = fs.getSchedulerHealth(); + UL