From 3e1f6ea4d1b3bd99848ea47dbbe0a76fbe87d6ec Mon Sep 17 00:00:00 2001 From: Sunil G Date: Fri, 27 Jul 2018 14:16:54 +0530 Subject: [PATCH] YARN-7494 --- .../resourcemanager/RMActiveServiceContext.java | 16 +++ .../yarn/server/resourcemanager/RMContext.java | 8 +- .../yarn/server/resourcemanager/RMContextImpl.java | 14 +- .../server/resourcemanager/ResourceManager.java | 12 ++ .../scheduler/AppSchedulingInfo.java | 11 +- .../scheduler/ClusterNodeTracker.java | 61 ++++++++ .../server/resourcemanager/scheduler/Queue.java | 6 + .../scheduler/activities/ActivitiesLogger.java | 28 ++-- .../scheduler/activities/ActivitiesManager.java | 8 +- .../scheduler/capacity/AbstractCSQueue.java | 16 ++- .../scheduler/capacity/CapacityScheduler.java | 72 ++++++++-- .../capacity/CapacitySchedulerConfiguration.java | 107 ++++++++++++++ .../scheduler/capacity/LeafQueue.java | 49 ++++--- .../scheduler/capacity/ParentQueue.java | 4 +- .../allocator/RegularContainerAllocator.java | 35 ++--- .../common/ApplicationSchedulingConfig.java | 6 + .../scheduler/common/fica/FiCaSchedulerApp.java | 11 ++ .../scheduler/fair/FSLeafQueue.java | 5 + .../scheduler/fair/FSParentQueue.java | 6 + .../scheduler/fifo/FifoScheduler.java | 6 + .../placement/DefaultMultiNodeLookupPolicy.java | 80 +++++++++++ .../placement/LocalityAppPlacementAllocator.java | 32 ++++- .../scheduler/placement/MultiNodeLookupPolicy.java | 67 +++++++++ .../scheduler/placement/MultiNodePolicySpec.java | 48 +++++++ .../scheduler/placement/MultiNodeSorter.java | 149 +++++++++++++++++++ .../placement/MultiNodeSortingManager.java | 98 +++++++++++++ .../ResourceUsageBasedMultiNodeLookupPolicy.java | 85 +++++++++++ .../reservation/ReservationSystemTestUtil.java | 3 + .../capacity/CapacitySchedulerTestBase.java | 13 ++ .../scheduler/capacity/TestCapacityScheduler.java | 23 +-- .../capacity/TestCapacitySchedulerMultiNodes.java | 158 +++++++++++++++++++++ .../TestCapacitySchedulerNodeLabelUpdate.java | 72 ++++++++++ 32 files changed, 1214 insertions(+), 95 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/DefaultMultiNodeLookupPolicy.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeLookupPolicy.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodePolicySpec.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSorter.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSortingManager.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/ResourceUsageBasedMultiNodeLookupPolicy.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerMultiNodes.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java index 66065e33bae..8fb0de63fdf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java @@ -43,9 +43,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTagsManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed.QueueLimitCalculator; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer; @@ -113,6 +115,7 @@ private AllocationTagsManager allocationTagsManager; private PlacementConstraintManager placementConstraintManager; private ResourceProfilesManager resourceProfilesManager; + private MultiNodeSortingManager multiNodeSortingManager; public RMActiveServiceContext() { queuePlacementManager = new PlacementManager(); @@ -441,6 +444,19 @@ public void setRMDelegatedNodeLabelsUpdater( rmDelegatedNodeLabelsUpdater = nodeLablesUpdater; } + @Private + @Unstable + public MultiNodeSortingManager getMultiNodeSortingManager() { + return multiNodeSortingManager; + } + + @Private + @Unstable + public void setMultiNodeSortingManager( + MultiNodeSortingManager multiNodeSortingManager) { + this.multiNodeSortingManager = multiNodeSortingManager; + } + @Private @Unstable public void setSchedulerRecoveryStartAndWaitTime(long waitTime) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index eb91a311a3a..a30ff76a6ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -42,10 +42,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; - +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTagsManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed.QueueLimitCalculator; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer; @@ -177,4 +178,9 @@ void setRMDelegatedNodeLabelsUpdater( void setPlacementConstraintManager( PlacementConstraintManager placementConstraintManager); + + MultiNodeSortingManager getMultiNodeSortingManager(); + + void setMultiNodeSortingManager( + MultiNodeSortingManager multiNodeSortingManager); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index 84e0f6f6b58..cb1d56f34fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -48,10 +48,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; - +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTagsManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed.QueueLimitCalculator; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer; @@ -538,6 +539,17 @@ public void setRMDelegatedNodeLabelsUpdater( delegatedNodeLabelsUpdater); } + @Override + public MultiNodeSortingManager getMultiNodeSortingManager() { + return activeServiceContext.getMultiNodeSortingManager(); + } + + @Override + public void setMultiNodeSortingManager( + MultiNodeSortingManager multiNodeSortingManager) { + activeServiceContext.setMultiNodeSortingManager(multiNodeSortingManager); + } + public void setSchedulerRecoveryStartAndWaitTime(long waitTime) { activeServiceContext.setSchedulerRecoveryStartAndWaitTime(waitTime); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 0b7e87cc0b1..f4abe65df31 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -95,11 +95,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTagsManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.MemoryPlacementConstraintManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManagerService; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer; import org.apache.hadoop.yarn.server.resourcemanager.security.QueueACLsManager; import org.apache.hadoop.yarn.server.resourcemanager.timelineservice.RMTimelineCollectorManager; @@ -538,6 +540,10 @@ private FederationStateStoreService createFederationStateStoreService() { return new FederationStateStoreService(rmContext); } + protected MultiNodeSortingManager createMultiNodeSortingManager() { + return new MultiNodeSortingManager(); + } + protected SystemMetricsPublisher createSystemMetricsPublisher() { List publishers = new ArrayList(); @@ -657,6 +663,12 @@ protected void serviceInit(Configuration configuration) throws Exception { resourceProfilesManager.init(conf); rmContext.setResourceProfilesManager(resourceProfilesManager); + MultiNodeSortingManager multiNodeSortingManager = + createMultiNodeSortingManager(); + multiNodeSortingManager.setRMContext(rmContext); + addService(multiNodeSortingManager); + rmContext.setMultiNodeSortingManager(multiNodeSortingManager); + RMDelegatedNodeLabelsUpdater delegatedNodeLabelsUpdater = createRMDelegatedNodeLabelsUpdater(); if (delegatedNodeLabelsUpdater != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 8074f069698..8d001f8f668 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -93,7 +93,7 @@ private final ReentrantReadWriteLock.WriteLock writeLock; public final ContainerUpdateContext updateContext; - public final Map applicationSchedulingEnvs = new HashMap<>(); + private final Map applicationSchedulingEnvs = new HashMap<>(); private final RMContext rmContext; public AppSchedulingInfo(ApplicationAttemptId appAttemptId, String user, @@ -765,4 +765,13 @@ public boolean precheckNode(SchedulerRequestKey schedulerKey, this.readLock.unlock(); } } + + /** + * Get scheduling envs configured for this application. + * + * @return a map of applicationSchedulingEnvs + */ + public Map getApplicationSchedulingEnvs() { + return applicationSchedulingEnvs; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java index 66d88108932..32e174ad934 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java @@ -37,6 +37,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -57,6 +58,7 @@ private HashMap nodes = new HashMap<>(); private Map nodeNameToNodeMap = new HashMap<>(); private Map> nodesPerRack = new HashMap<>(); + private Map> nodesPerLabel = new HashMap<>(); private Resource clusterCapacity = Resources.createResource(0, 0); private volatile Resource staleClusterCapacity = @@ -80,6 +82,16 @@ public void addNode(N node) { nodes.put(node.getNodeID(), node); nodeNameToNodeMap.put(node.getNodeName(), node); + List nodes = nodesPerLabel.get(node.getPartition()); + + if (nodes == null) { + nodes = new ArrayList(); + } + nodes.add(node); + + // Update new set of nodes for given partition. + nodesPerLabel.put(node.getPartition(), nodes); + // Update nodes per rack as well String rackName = node.getRackName(); List nodesList = nodesPerRack.get(rackName); @@ -174,6 +186,16 @@ public N removeNode(NodeId nodeId) { } } + List nodes = nodesPerLabel.get(node.getPartition()); + nodes.remove(node); + + // Update new set of nodes for given partition. + if (nodes.isEmpty()) { + nodesPerLabel.remove(node.getPartition()); + } else { + nodesPerLabel.put(node.getPartition(), nodes); + } + // Update cluster capacity Resources.subtractFrom(clusterCapacity, node.getTotalResource()); staleClusterCapacity = Resources.clone(clusterCapacity); @@ -420,4 +442,43 @@ private void updateMaxResources(SchedulerNode node, boolean add) { } return retNodes; } + + /** + * update cached nodes per partition on a node label change event. + * @param partition nodeLabel + * @param nodeIds List of Node IDs + */ + public void updateNodesPerPartition(String partition, Set nodeIds) { + writeLock.lock(); + try { + // Clear all entries. + nodesPerLabel.remove(partition); + + List nodes = new ArrayList(); + for (NodeId nodeId : nodeIds) { + N n = getNode(nodeId); + if (n != null) { + nodes.add(n); + } + } + + // Update new set of nodes for given partition. + nodesPerLabel.put(partition, nodes); + } finally { + writeLock.unlock(); + } + } + + public List getNodesPerPartition(String partition) { + List nodesPerPartition = null; + readLock.lock(); + try { + if (nodesPerLabel.containsKey(partition)) { + nodesPerPartition = new ArrayList(nodesPerLabel.get(partition)); + } + } finally { + readLock.unlock(); + } + return nodesPerPartition; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Queue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Queue.java index d166e5fc568..f1dd0d8813b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Queue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Queue.java @@ -138,4 +138,10 @@ public void recoverContainer(Resource clusterResource, * reserved resource asked */ public void decReservedResource(String partition, Resource reservedRes); + + /** + * Get Multi Node scheduling policy name. + * @return policy name + */ + String getMultiNodeSortingPolicyName(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java index 0c351b671a3..3d610a14ffa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java @@ -21,13 +21,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; /** * Utility for logging scheduler activities @@ -63,7 +63,7 @@ public static void recordRejectedAppActivityFromLeafQueue( SchedulerApplicationAttempt application, Priority priority, String diagnostic) { String type = "app"; - if (activitiesManager == null) { + if (node == null || activitiesManager == null) { return; } if (activitiesManager.shouldRecordThisNode(node.getNodeID())) { @@ -84,18 +84,18 @@ public static void recordAppActivityWithoutAllocation( ActivitiesManager activitiesManager, SchedulerNode node, SchedulerApplicationAttempt application, Priority priority, String diagnostic, ActivityState appState) { - if (activitiesManager == null) { + if (node == null || activitiesManager == null) { return; } if (activitiesManager.shouldRecordThisNode(node.getNodeID())) { String type = "container"; // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(node.getNodeID(), + activitiesManager.addSchedulingActivityForNode(node, application.getApplicationId().toString(), null, priority.toString(), ActivityState.SKIPPED, diagnostic, type); type = "app"; // Add queue-application activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(node.getNodeID(), + activitiesManager.addSchedulingActivityForNode(node, application.getQueueName(), application.getApplicationId().toString(), application.getPriority().toString(), ActivityState.SKIPPED, @@ -121,20 +121,20 @@ public static void recordAppActivityWithAllocation( ActivitiesManager activitiesManager, SchedulerNode node, SchedulerApplicationAttempt application, RMContainer updatedContainer, ActivityState activityState) { - if (activitiesManager == null) { + if (node == null || activitiesManager == null) { return; } if (activitiesManager.shouldRecordThisNode(node.getNodeID())) { String type = "container"; // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(node.getNodeID(), + activitiesManager.addSchedulingActivityForNode(node, application.getApplicationId().toString(), updatedContainer.getContainer().toString(), updatedContainer.getContainer().getPriority().toString(), activityState, ActivityDiagnosticConstant.EMPTY, type); type = "app"; // Add queue-application activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(node.getNodeID(), + activitiesManager.addSchedulingActivityForNode(node, application.getQueueName(), application.getApplicationId().toString(), application.getPriority().toString(), ActivityState.ACCEPTED, @@ -157,12 +157,12 @@ public static void recordAppActivityWithAllocation( * update. */ public static void startAppAllocationRecording( - ActivitiesManager activitiesManager, NodeId nodeId, long currentTime, + ActivitiesManager activitiesManager, FiCaSchedulerNode node, long currentTime, SchedulerApplicationAttempt application) { - if (activitiesManager == null) { + if (node == null || activitiesManager == null) { return; } - activitiesManager.startAppAllocationRecording(nodeId, currentTime, + activitiesManager.startAppAllocationRecording(node.getNodeID(), currentTime, application); } @@ -208,7 +208,7 @@ public static void finishSkippedAppAllocationRecording( public static void recordQueueActivity(ActivitiesManager activitiesManager, SchedulerNode node, String parentQueueName, String queueName, ActivityState state, String diagnostic) { - if (activitiesManager == null) { + if (node == null || activitiesManager == null) { return; } if (activitiesManager.shouldRecordThisNode(node.getNodeID())) { @@ -240,7 +240,7 @@ public static void finishSkippedNodeAllocation( public static void finishAllocatedNodeAllocation( ActivitiesManager activitiesManager, SchedulerNode node, ContainerId containerId, AllocationState containerState) { - if (activitiesManager == null) { + if (node == null || activitiesManager == null) { return; } if (activitiesManager.shouldRecordThisNode(node.getNodeID())) { @@ -277,7 +277,7 @@ private static void recordActivity(ActivitiesManager activitiesManager, SchedulerNode node, String parentName, String childName, Priority priority, ActivityState state, String diagnostic, String type) { - activitiesManager.addSchedulingActivityForNode(node.getNodeID(), parentName, + activitiesManager.addSchedulingActivityForNode(node, parentName, childName, priority != null ? priority.toString() : null, state, diagnostic, type); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java index af73ae381a4..91bb3b0726f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppActivitiesInfo; import org.apache.hadoop.yarn.util.SystemClock; @@ -187,11 +188,12 @@ void startAppAllocationRecording(NodeId nodeID, long currTS, } // Add queue, application or container activity into specific node allocation. - void addSchedulingActivityForNode(NodeId nodeID, String parentName, + void addSchedulingActivityForNode(SchedulerNode node, String parentName, String childName, String priority, ActivityState state, String diagnostic, String type) { - if (shouldRecordThisNode(nodeID)) { - NodeAllocation nodeAllocation = getCurrentNodeAllocation(nodeID); + if (shouldRecordThisNode(node.getNodeID())) { + NodeAllocation nodeAllocation = getCurrentNodeAllocation( + node.getNodeID()); nodeAllocation.addAllocationActivity(parentName, childName, priority, state, diagnostic, type); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 9c3e98f0e71..2c9f9a37255 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -92,7 +92,8 @@ Set resourceTypes; final RMNodeLabelsManager labelManager; String defaultLabelExpression; - + private String multiNodeSortingPolicyName = null; + Map acls = new HashMap(); volatile boolean reservationsContinueLooking; @@ -414,6 +415,10 @@ protected void setupQueueConfigs(Resource clusterResource, this.priority = configuration.getQueuePriority( getQueuePath()); + // Update multi-node sorting algorithm for scheduling as configured. + setMultiNodeSortingPolicyName( + configuration.getMultiNodesSortingAlgorithmPolicy(getQueuePath())); + this.userWeights = getUserWeightsFromHierarchy(configuration); } finally { writeLock.unlock(); @@ -1259,4 +1264,13 @@ public void recoverDrainingState() { this.writeLock.unlock(); } } + + @Override + public String getMultiNodeSortingPolicyName() { + return this.multiNodeSortingPolicyName; + } + + public void setMultiNodeSortingPolicyName(String policyName) { + this.multiNodeSortingPolicyName = policyName; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 37f56deb119..e1a71b6b2f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -251,6 +252,7 @@ public Configuration getConf() { private ResourceCommitterService resourceCommitterService; private RMNodeLabelsManager labelManager; private AppPriorityACLsManager appPriorityACLManager; + private boolean multiNodePlacementEnabled; private static boolean printedVerboseLoggingForAsyncScheduling = false; @@ -391,6 +393,14 @@ void initScheduler(Configuration configuration) throws // Setup how many containers we can allocate for each round offswitchPerHeartbeatLimit = this.conf.getOffSwitchPerHeartbeatLimit(); + // Register CS specific multi-node policies to common MultiNodeManager. + multiNodePlacementEnabled = this.conf.getMultiNodePlacementEnabled(); + if(rmContext.getMultiNodeSortingManager() != null) { + rmContext.getMultiNodeSortingManager().registerMultiNodePolicyNames( + multiNodePlacementEnabled, + this.conf.getMultiNodePlacementPolicies()); + } + LOG.info("Initialized CapacityScheduler with " + "calculator=" + getResourceCalculator().getClass() + ", " + "minimumAllocation=<" + getMinimumResourceCapability() + ">, " + "maximumAllocation=<" @@ -1372,18 +1382,23 @@ private void updateSchedulerHealth(long now, NodeId nodeId, assignment.getAssignmentInformation().getAllocationDetails(); List reservations = assignment.getAssignmentInformation().getReservationDetails(); + // Get nodeId from allocated container if incoming argument is null. + NodeId updatedNodeid = (nodeId == null) + ? allocations.get(allocations.size() - 1).rmContainer.getNodeId() + : nodeId; + if (!allocations.isEmpty()) { ContainerId allocatedContainerId = allocations.get(allocations.size() - 1).containerId; String allocatedQueue = allocations.get(allocations.size() - 1).queue; - schedulerHealth.updateAllocation(now, nodeId, allocatedContainerId, + schedulerHealth.updateAllocation(now, updatedNodeid, allocatedContainerId, allocatedQueue); } if (!reservations.isEmpty()) { ContainerId reservedContainerId = reservations.get(reservations.size() - 1).containerId; String reservedQueue = reservations.get(reservations.size() - 1).queue; - schedulerHealth.updateReservation(now, nodeId, reservedContainerId, + schedulerHealth.updateReservation(now, updatedNodeid, reservedContainerId, reservedQueue); } schedulerHealth.updateSchedulerReservationCounts(assignment @@ -1420,6 +1435,23 @@ private boolean canAllocateMore(CSAssignment assignment, int offswitchCount, || assignedContainers < maxAssignPerHeartbeat); } + private CandidateNodeSet getCandidateNodeSet( + FiCaSchedulerNode node) { + CandidateNodeSet candidates = null; + candidates = new SimpleCandidateNodeSet<>(node); + if (multiNodePlacementEnabled) { + Map nodesByPartition = new HashMap<>(); + List nodes = nodeTracker + .getNodesPerPartition(node.getPartition()); + if (nodes != null && !nodes.isEmpty()) { + nodes.forEach(n -> nodesByPartition.put(n.getNodeID(), n)); + candidates = new SimpleCandidateNodeSet( + nodesByPartition, node.getPartition()); + } + } + return candidates; + } + /** * We need to make sure when doing allocation, Node should be existed * And we will construct a {@link CandidateNodeSet} before proceeding @@ -1431,8 +1463,8 @@ private void allocateContainersToNode(NodeId nodeId, int offswitchCount = 0; int assignedContainers = 0; - CandidateNodeSet candidates = - new SimpleCandidateNodeSet<>(node); + CandidateNodeSet candidates = getCandidateNodeSet( + node); CSAssignment assignment = allocateContainersToNode(candidates, withNodeHeartbeat); // Only check if we can allocate more container on the same node when @@ -1598,10 +1630,13 @@ private CSAssignment allocateOrReserveNewContainers( if (Resources.greaterThan(calculator, getClusterResource(), assignment.getResource(), Resources.none())) { + FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates); + NodeId nodeId = null; + if (node != null) { + nodeId = node.getNodeID(); + } if (withNodeHeartbeat) { - updateSchedulerHealth(lastNodeUpdateTime, - CandidateNodeSetUtils.getSingleNode(candidates).getNodeID(), - assignment); + updateSchedulerHealth(lastNodeUpdateTime, nodeId, assignment); } return assignment; } @@ -1680,7 +1715,7 @@ CSAssignment allocateContainersToNode( // We have two different logics to handle allocation on single node / multi // nodes. CSAssignment assignment; - if (null != node) { + if (!multiNodePlacementEnabled) { assignment = allocateContainerOnSingleNode(candidates, node, withNodeHeartbeat); } else{ @@ -1868,12 +1903,21 @@ private void updateNodeLabelsAndQueueResource( NodeLabelsUpdateSchedulerEvent labelUpdateEvent) { try { writeLock.lock(); + Set updateLabels = new HashSet(); for (Entry> entry : labelUpdateEvent .getUpdatedNodeToLabels().entrySet()) { NodeId id = entry.getKey(); Set labels = entry.getValue(); + FiCaSchedulerNode node = nodeTracker.getNode(id); + + if (node != null) { + // Update old partition to list. + updateLabels.add(node.getPartition()); + } updateLabelsOnNode(id, labels); + updateLabels.addAll(labels); } + refreshLabelToNodeCache(updateLabels); Resource clusterResource = getClusterResource(); getRootQueue().updateClusterResource(clusterResource, new ResourceLimits(clusterResource)); @@ -1882,6 +1926,18 @@ private void updateNodeLabelsAndQueueResource( } } + private void refreshLabelToNodeCache(Set updateLabels) { + Map> labelMapping = labelManager + .getLabelsToNodes(updateLabels); + for (String label : updateLabels) { + Set nodes = labelMapping.get(label); + if (nodes == null) { + continue; + } + nodeTracker.updateNodesPerPartition(label, nodes); + } + } + private void addNode(RMNode nodeManager) { try { writeLock.lock(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index e8de096c6a4..844ea7a2f99 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AppPriorityACLConfigurationParser.AppPriorityACLKeyType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.policy.PriorityUtilizationQueueOrderingPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.policy.QueueOrderingPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeLookupPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodePolicySpec; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FairOrderingPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FifoOrderingPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy; @@ -2129,4 +2131,109 @@ private void updateResourceValuesFromConfig(Set resourceTypes, break; } } + + @Private public static final String MULTI_NODE_SORTING_POLICIES = + PREFIX + "multi-node-sorting.policies"; + + @Private public static final String MULTI_NODE_SORTING_POLICY_NAME = + PREFIX + "multi-node-sorting.policy"; + + /** + * resource usage based node sorting algorithm. + */ + public static final String RESOURCE_USAGE_BASED_NODE_SORTING_POLICY = "resource-usage"; + public static final String DEFAULT_NODE_SORTING_POLICY = "default"; + public static final String DEFAULT_NODE_SORTING_POLICY_NAME = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.DefaultMultiNodeLookupPolicy"; + public static final long DEFAULT_MULTI_NODE_SORTING_INTERVAL = 1000L; + + @Private + public static final String MULTI_NODE_PLACEMENT_ENABLED = PREFIX + + "multi-node-placement-enabled"; + + @Private + public static final boolean DEFAULT_MULTI_NODE_PLACEMENT_ENABLED = false; + + public String getMultiNodesSortingAlgorithmPolicy( + String queue) { + + String policyName = get( + getQueuePrefix(queue) + "multi-node-sorting.policy"); + + if (policyName == null) { + policyName = get(MULTI_NODE_SORTING_POLICY_NAME); + } + + // If node sorting policy is not configured in queue and in cluster level, + // it is been assumed that this queue is not enabled with multi-node lookup. + if (policyName == null || policyName.isEmpty()) { + return null; + } + + String policyClassName = get(MULTI_NODE_SORTING_POLICY_NAME + DOT + + policyName.trim() + DOT + "class"); + + if (policyClassName == null || policyClassName.isEmpty()) { + throw new YarnRuntimeException( + policyName.trim() + " Class is not configured or not an instance of " + + MultiNodeLookupPolicy.class.getCanonicalName()); + } + + return normalizePolicyName(policyClassName.trim()); + } + + public boolean getMultiNodePlacementEnabled() { + return getBoolean(MULTI_NODE_PLACEMENT_ENABLED, + DEFAULT_MULTI_NODE_PLACEMENT_ENABLED); + } + + public Set getMultiNodePlacementPolicies() { + String[] policies = getTrimmedStrings(MULTI_NODE_SORTING_POLICIES); + + // In other cases, split the accessibleLabelStr by "," + Set set = new HashSet(); + for (String str : policies) { + if (!str.trim().isEmpty()) { + String policyClassName = get( + MULTI_NODE_SORTING_POLICY_NAME + DOT + str.trim() + DOT + "class"); + if (str.trim().equals(DEFAULT_NODE_SORTING_POLICY)) { + policyClassName = get( + MULTI_NODE_SORTING_POLICY_NAME + DOT + str.trim() + DOT + "class", + DEFAULT_NODE_SORTING_POLICY_NAME); + } + + // This check is needed as default class name is loaded only for + // DEFAULT_NODE_SORTING_POLICY. + if (policyClassName == null) { + throw new YarnRuntimeException( + str.trim() + " Class is not configured or not an instance of " + + MultiNodeLookupPolicy.class.getCanonicalName()); + } + policyClassName = normalizePolicyName(policyClassName.trim()); + long timeout = getLong(MULTI_NODE_SORTING_POLICY_NAME + DOT + str.trim() + + DOT + "sorting-interval.ms", DEFAULT_MULTI_NODE_SORTING_INTERVAL); + set.add(new MultiNodePolicySpec(policyClassName, timeout)); + } + } + + return Collections.unmodifiableSet(set); + } + + private String normalizePolicyName(String policyName) { + + // Ensure that custom node sorting algorithm class is valid. + try { + Class nodeSortingPolicyClazz = getClassByName(policyName); + if (MultiNodeLookupPolicy.class + .isAssignableFrom(nodeSortingPolicyClazz)) { + return policyName; + } else { + throw new YarnRuntimeException( + "Class: " + policyName + " not instance of " + + MultiNodeLookupPolicy.class.getCanonicalName()); + } + } catch (ClassNotFoundException e) { + throw new YarnRuntimeException( + "Could not instantiate " + "NodesSortingPolicy: " + policyName, e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 366bad0a4f2..ffe862fc618 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -53,10 +53,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; - -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.*; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivitiesLogger; @@ -1036,23 +1032,24 @@ private void setPreemptionAllowed(ResourceLimits limits, String nodePartition) { private CSAssignment allocateFromReservedContainer(Resource clusterResource, CandidateNodeSet candidates, ResourceLimits currentResourceLimits, SchedulingMode schedulingMode) { - FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates); - if (null == node) { - return null; - } - - RMContainer reservedContainer = node.getReservedContainer(); - if (reservedContainer != null) { - FiCaSchedulerApp application = getApplication( - reservedContainer.getApplicationAttemptId()); - - if (null != application) { - ActivitiesLogger.APP.startAppAllocationRecording(activitiesManager, - node.getNodeID(), SystemClock.getInstance().getTime(), application); - CSAssignment assignment = application.assignContainers(clusterResource, - candidates, currentResourceLimits, schedulingMode, - reservedContainer); - return assignment; + // Considering multi-node scheduling, its better to iterate through + // all candidates and stop once we get atleast one good node to allocate + // where reservation was made earlier. In normal case, there is only one + // node and hence there wont be any impact after this change. + for (FiCaSchedulerNode node : candidates.getAllNodes().values()) { + RMContainer reservedContainer = node.getReservedContainer(); + if (reservedContainer != null) { + FiCaSchedulerApp application = getApplication( + reservedContainer.getApplicationAttemptId()); + + if (null != application) { + ActivitiesLogger.APP.startAppAllocationRecording(activitiesManager, + node, SystemClock.getInstance().getTime(), application); + CSAssignment assignment = application.assignContainers( + clusterResource, candidates, currentResourceLimits, + schedulingMode, reservedContainer); + return assignment; + } } } @@ -1114,13 +1111,14 @@ public CSAssignment assignContainers(Resource clusterResource, FiCaSchedulerApp application = assignmentIterator.next(); ActivitiesLogger.APP.startAppAllocationRecording(activitiesManager, - node.getNodeID(), SystemClock.getInstance().getTime(), application); + node, SystemClock.getInstance().getTime(), application); // Check queue max-capacity limit Resource appReserved = application.getCurrentReservation(); if (needAssignToQueueCheck) { - if (!super.canAssignToThisQueue(clusterResource, node.getPartition(), - currentResourceLimits, appReserved, schedulingMode)) { + if (!super.canAssignToThisQueue(clusterResource, + candidates.getPartition(), currentResourceLimits, appReserved, + schedulingMode)) { ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue( activitiesManager, node, application, application.getPriority(), ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT); @@ -1155,7 +1153,8 @@ public CSAssignment assignContainers(Resource clusterResource, userAssignable = false; } else { userAssignable = canAssignToUser(clusterResource, application.getUser(), - userLimit, application, node.getPartition(), currentResourceLimits); + userLimit, application, candidates.getPartition(), + currentResourceLimits); if (!userAssignable && Resources.fitsIn(cul.reservation, appReserved)) { cul.canAssign = false; cul.reservation = appReserved; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index 2363b8809e8..80549ca5c16 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -553,8 +553,8 @@ public CSAssignment assignContainers(Resource clusterResource, ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + node - .getPartition()); + ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + + candidates.getPartition()); if (rootQueue) { ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index a843002cdb5..3e337ef492d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -96,11 +96,10 @@ private boolean checkHeadroom(Resource clusterResource, * headroom, etc. */ private ContainerAllocation preCheckForNodeCandidateSet( - Resource clusterResource, CandidateNodeSet candidates, + Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, ResourceLimits resourceLimits, SchedulerRequestKey schedulerKey) { Priority priority = schedulerKey.getPriority(); - FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates); PendingAsk offswitchPendingAsk = application.getPendingAsk(schedulerKey, ResourceRequest.ANY); @@ -164,7 +163,7 @@ private ContainerAllocation preCheckForNodeCandidateSet( } if (!checkHeadroom(clusterResource, resourceLimits, required, - candidates.getPartition())) { + node.getPartition())) { if (LOG.isDebugEnabled()) { LOG.debug("cannot allocate required resource=" + required + " because of headroom"); @@ -801,20 +800,6 @@ private ContainerAllocation allocate(Resource clusterResource, // Do checks before determining which node to allocate // Directly return if this check fails. ContainerAllocation result; - if (reservedContainer == null) { - result = preCheckForNodeCandidateSet(clusterResource, candidates, - schedulingMode, resourceLimits, schedulerKey); - if (null != result) { - return result; - } - } else { - // pre-check when allocating reserved container - if (application.getOutstandingAsksCount(schedulerKey) == 0) { - // Release - return new ContainerAllocation(reservedContainer, null, - AllocationState.QUEUE_SKIPPED); - } - } AppPlacementAllocator schedulingPS = application.getAppSchedulingInfo().getAppPlacementAllocator( @@ -833,6 +818,22 @@ private ContainerAllocation allocate(Resource clusterResource, while (iter.hasNext()) { FiCaSchedulerNode node = iter.next(); + if (reservedContainer == null) { + result = preCheckForNodeCandidateSet(clusterResource, node, + schedulingMode, resourceLimits, schedulerKey); + if (null != result) { + continue; + } + } else { + // pre-check when allocating reserved container + if (application.getOutstandingAsksCount(schedulerKey) == 0) { + // Release + result = new ContainerAllocation(reservedContainer, null, + AllocationState.QUEUE_SKIPPED); + continue; + } + } + result = tryAllocateOnNode(clusterResource, node, schedulingMode, resourceLimits, schedulerKey, reservedContainer); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/ApplicationSchedulingConfig.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/ApplicationSchedulingConfig.java index 1bd37431c15..0dd46d346c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/ApplicationSchedulingConfig.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/ApplicationSchedulingConfig.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.common; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.AppPlacementAllocator; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.LocalityAppPlacementAllocator; + /** * This class will keep all Scheduling env's names which will help in * placement calculations. @@ -32,4 +34,8 @@ @InterfaceAudience.Private public static final Class DEFAULT_APPLICATION_PLACEMENT_TYPE_CLASS = LocalityAppPlacementAllocator.class; + + @InterfaceAudience.Private + public static final String ENV_MULTI_NODE_SORTING_POLICY_CLASS = + "MULTI_NODE_SORTING_POLICY_CLASS"; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 9810e98c3e8..38ac40fd34d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AbstractContainerAllocator; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ApplicationSchedulingConfig; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ContainerAllocationProposal; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ContainerRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.PendingAsk; @@ -169,6 +170,16 @@ public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId, rc = scheduler.getResourceCalculator(); } + // Update multi-node sorting algorithm to scheduler envs + if (rmApp != null) { + if (!appSchedulingInfo.getApplicationSchedulingEnvs().containsKey( + ApplicationSchedulingConfig.ENV_MULTI_NODE_SORTING_POLICY_CLASS) + && queue.getMultiNodeSortingPolicyName() != null) { + appSchedulingInfo.getApplicationSchedulingEnvs().put( + ApplicationSchedulingConfig.ENV_MULTI_NODE_SORTING_POLICY_CLASS, + queue.getMultiNodeSortingPolicyName()); + } + } containerAllocator = new ContainerAllocator(this, rc, rmContext, activitiesManager); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index cbc74d25345..eb8ff70246e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -641,4 +641,9 @@ public void addAssignedApp(ApplicationId applicationId) { writeLock.unlock(); } } + + @Override + public String getMultiNodeSortingPolicyName() { + return null; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java index d5df549b282..db4ea0a9361 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java @@ -296,4 +296,10 @@ protected void dumpStateInternal(StringBuilder sb) { child.dumpStateInternal(sb); } } + + @Override + public String getMultiNodeSortingPolicyName() { + // To be implemented when multi-node look up is added in Fair Scheduler. + return null; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 8396db54ad8..f1e8b06ed52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -231,6 +231,12 @@ public void decReservedResource(String partition, Resource reservedRes) { // TODO add implementation for FIFO scheduler } + + @Override + public String getMultiNodeSortingPolicyName() { + // TODO add implementation for FIFO scheduler + return null; + } }; public FifoScheduler() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/DefaultMultiNodeLookupPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/DefaultMultiNodeLookupPolicy.java new file mode 100644 index 00000000000..772a24183e5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/DefaultMultiNodeLookupPolicy.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; + +import java.util.*; +import java.util.concurrent.ConcurrentSkipListSet; + +/** + *

+ * This class has the following functionality: + * + *

+ * ResourceUsageBasedMultiNodeLookupPolicy holds sorted nodes list based on the + * resource usage of nodes at given time. + *

+ */ +public class DefaultMultiNodeLookupPolicy + implements MultiNodeLookupPolicy { + + private Map> nodesPerPartition = new HashMap<>(); + protected Comparator comparator; + + public DefaultMultiNodeLookupPolicy() { + this.comparator = new Comparator() { + @Override + public int compare(N o1, N o2) { + return o2.getAllocatedResource().compareTo(o1.getAllocatedResource()); + } + }; + } + + @Override + public synchronized Iterator getPreferredNodeIterator(Collection nodes, + String partition) { + addAndRefreshNodesSet((Collection) nodes, partition); + return getNodesPerPartition(partition).iterator(); + } + + @Override + public synchronized void addAndRefreshNodesSet(Collection nodes, + String partition) { + Set nodeList = null; + if ((nodeList = nodesPerPartition.get(partition)) == null) { + nodeList = new ConcurrentSkipListSet(comparator); + this.nodesPerPartition.put(partition, nodeList); + } + + // Clear existing entries first. + nodeList.clear(); + + // Add fresh set of nodes for re-ordering. + nodeList.addAll(nodes); + } + + @Override + public synchronized Set getNodesPerPartition(String partition) { + if (!this.nodesPerPartition.containsKey(partition)) { + return new HashSet<>(); + } + return Collections.unmodifiableSet(this.nodesPerPartition.get(partition)); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/LocalityAppPlacementAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/LocalityAppPlacementAllocator.java index e1239a9db34..1e901673b60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/LocalityAppPlacementAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/LocalityAppPlacementAllocator.java @@ -24,11 +24,14 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.SchedulingRequest; import org.apache.hadoop.yarn.exceptions.SchedulerInvalidResoureRequestException; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AppSchedulingInfo; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ApplicationSchedulingConfig; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ContainerRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.PendingAsk; import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; @@ -55,6 +58,7 @@ new ConcurrentHashMap<>(); private volatile String primaryRequestedPartition = RMNodeLabelsManager.NO_LABEL; + private MultiNodeLookupPolicy nodeLookupPolicy = null; private final ReentrantReadWriteLock.ReadLock readLock; private final ReentrantReadWriteLock.WriteLock writeLock; @@ -65,6 +69,26 @@ public LocalityAppPlacementAllocator() { writeLock = lock.writeLock(); } + @SuppressWarnings("unchecked") + @Override + public void initialize(AppSchedulingInfo appSchedulingInfo, + SchedulerRequestKey schedulerRequestKey, RMContext rmContext) { + super.initialize(appSchedulingInfo, schedulerRequestKey, rmContext); + String multiNodePolicyName = appSchedulingInfo + .getApplicationSchedulingEnvs().get( + ApplicationSchedulingConfig.ENV_MULTI_NODE_SORTING_POLICY_CLASS); + if (multiNodePolicyName != null && !multiNodePolicyName.isEmpty()) { + nodeLookupPolicy = (MultiNodeLookupPolicy) rmContext + .getMultiNodeSortingManager().getMultiNodePolicy(multiNodePolicyName) + .getMultiNodeLookupPolicy(); + if(LOG.isDebugEnabled()) { + LOG.debug( + "nodeLookupPolicy used for " + appSchedulingInfo.getApplicationId() + + " is " + multiNodePolicyName); + } + } + } + @Override @SuppressWarnings("unchecked") public Iterator getPreferredNodeIterator( @@ -74,11 +98,15 @@ public LocalityAppPlacementAllocator() { // in. N singleNode = CandidateNodeSetUtils.getSingleNode(candidateNodeSet); - if (null != singleNode) { + if ( singleNode != null || nodeLookupPolicy == null) { return IteratorUtils.singletonIterator(singleNode); } - return IteratorUtils.emptyIterator(); + // singleNode will be null if Multi-node placement lookup is enabled, and + // hence could consider sorting policies. + return nodeLookupPolicy.getPreferredNodeIterator( + candidateNodeSet.getAllNodes().values(), + candidateNodeSet.getPartition()); } private boolean hasRequestLabelChanged(ResourceRequest requestOne, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeLookupPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeLookupPolicy.java new file mode 100644 index 00000000000..25c792f1ff9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeLookupPolicy.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; + +/** + *

+ * This class has the following functionality: + * + *

+ * Provide an interface for MultiNodeLookupPolicy so that different placement + * allocator can choose nodes based on need. + *

+ */ +public interface MultiNodeLookupPolicy { + /** + * Get iterator of preferred node depends on requirement and/or availability + * + * @param partition + * node label + * @param nodes + * List of Nodes + * + * @return iterator of preferred node + */ + Iterator getPreferredNodeIterator(Collection nodes, String partition); + + /** + * Refresh working nodes set for re-ordering based on the algorithm selected. + * + * @param nodes + * a collection working nm's. + */ + void addAndRefreshNodesSet(Collection nodes, String partition); + + /** + * Get sorted nodes per partition. + * + * @param partition + * node label + * + * @return collection of sorted nodes + */ + Set getNodesPerPartition(String partition); + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodePolicySpec.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodePolicySpec.java new file mode 100644 index 00000000000..9582aad89ec --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodePolicySpec.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement; + +/** + * MultiNodePolicySpec contains policyName and timeout. + */ +public class MultiNodePolicySpec { + + private String policyName; + private long sortingInterval; + + public MultiNodePolicySpec(String policyName, long timeout) { + this.setSortingInterval(timeout); + this.setPolicyName(policyName); + } + + public long getSortingInterval() { + return sortingInterval; + } + + public void setSortingInterval(long timeout) { + this.sortingInterval = timeout; + } + + public String getPolicyName() { + return policyName; + } + + public void setPolicyName(String policyName) { + this.policyName = policyName; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSorter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSorter.java new file mode 100644 index 00000000000..043e5a26044 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSorter.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; + +import com.google.common.annotations.VisibleForTesting; + +public class MultiNodeSorter extends AbstractService { + + private MultiNodeLookupPolicy multiNodePolicy; + private static final Log LOG = LogFactory.getLog(MultiNodeSorter.class); + + // ScheduledExecutorService which schedules the PreemptionChecker to run + // periodically. + private ScheduledExecutorService ses; + private ScheduledFuture handler; + private volatile boolean stopped; + private RMContext rmContext; + private MultiNodePolicySpec policySpec; + + public MultiNodeSorter(RMContext rmContext, + MultiNodePolicySpec policy) { + super("MultiNodeLookupPolicy"); + this.rmContext = rmContext; + this.policySpec = policy; + } + + @VisibleForTesting + public synchronized MultiNodeLookupPolicy getMultiNodeLookupPolicy() { + return multiNodePolicy; + } + + public void serviceInit(Configuration conf) throws Exception { + LOG.info("Initializing MultiNodeSorter=" + policySpec.getPolicyName()); + initPolicy(policySpec.getPolicyName()); + super.serviceInit(conf); + } + + @SuppressWarnings("unchecked") + void initPolicy(String policyName) throws YarnException { + Class policyClass; + try { + policyClass = Class.forName(policyName); + } catch (ClassNotFoundException e) { + throw new YarnException( + "Invalid policy name:" + policyName + e.getMessage()); + } + this.multiNodePolicy = (MultiNodeLookupPolicy) ReflectionUtils + .newInstance(policyClass, null); + } + + @Override + public void serviceStart() throws Exception { + LOG.info("Starting SchedulingMonitor=" + getName()); + assert !stopped : "starting when already stopped"; + ses = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() { + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setName(getName()); + return t; + } + }); + handler = ses.scheduleAtFixedRate(new SortingThread(), + 0, policySpec.getSortingInterval(), TimeUnit.MILLISECONDS); + super.serviceStart(); + } + + @Override + public void serviceStop() throws Exception { + stopped = true; + if (handler != null) { + LOG.info("Stop " + getName()); + handler.cancel(true); + ses.shutdown(); + } + super.serviceStop(); + } + + @SuppressWarnings("unchecked") + @VisibleForTesting + public void reSortClusterNodes() { + Set nodeLabels = new HashSet<>(); + nodeLabels + .addAll(rmContext.getNodeLabelManager().getClusterNodeLabelNames()); + nodeLabels.add(RMNodeLabelsManager.NO_LABEL); + for (String label : nodeLabels) { + Map nodesByPartition = new HashMap<>(); + List nodes = ((AbstractYarnScheduler) rmContext + .getScheduler()).getNodeTracker().getNodesPerPartition(label); + if (nodes != null && !nodes.isEmpty()) { + nodes.forEach(n -> nodesByPartition.put(n.getNodeID(), n)); + multiNodePolicy.addAndRefreshNodesSet( + (Collection) nodesByPartition.values(), label); + } + } + } + + private class SortingThread implements Runnable { + @Override + public void run() { + try { + reSortClusterNodes(); + } catch (Throwable t) { + // The preemption monitor does not alter structures nor do structures + // persist across invocations. Therefore, log, skip, and retry. + LOG.error("Exception raised while executing multinode" + + " sorter, skip this run..., exception=", t); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSortingManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSortingManager.java new file mode 100644 index 00000000000..e7eae062922 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/MultiNodeSortingManager.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; + + +public class MultiNodeSortingManager extends AbstractService { + + private static final Log LOG = LogFactory.getLog(MultiNodeSortingManager.class); + + private volatile boolean stopped; + private RMContext rmContext; + private Map> runningMultiNodeSorters; + private Set policySpecs = new HashSet(); + private Configuration conf; + private boolean multiNodePlacementEnabled; + + public MultiNodeSortingManager() { + super("MultiNodeSortingManager"); + this.runningMultiNodeSorters = new ConcurrentHashMap<>(); + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + LOG.info("Initializing MultiNodeSortingManager=" + getName()); + super.serviceInit(conf); + this.conf = conf; + } + + @Override + public void serviceStart() throws Exception { + LOG.info("Starting NodeSortingService=" + getName()); + assert !stopped : "starting when already stopped"; + createAllPolicies(); + super.serviceStart(); + } + + @Override + public void serviceStop() throws Exception { + for (MultiNodeSorter sorter : runningMultiNodeSorters.values()) { + sorter.stop(); + } + super.serviceStop(); + } + + private void createAllPolicies() { + if (!multiNodePlacementEnabled) { + return; + } + for (MultiNodePolicySpec policy : policySpecs) { + MultiNodeSorter mon = new MultiNodeSorter(rmContext, policy); + mon.init(conf); + mon.start(); + runningMultiNodeSorters.put(policy.getPolicyName(), mon); + } + } + + public MultiNodeSorter getMultiNodePolicy(String name) { + return runningMultiNodeSorters.get(name); + } + + public void setRMContext(RMContext rmContext) { + this.rmContext = rmContext; + } + + public void registerMultiNodePolicyNames( + boolean multiNodePlacementEnabled, + Set multiNodePlacementPolicies) { + this.policySpecs.addAll(multiNodePlacementPolicies); + this.multiNodePlacementEnabled = multiNodePlacementEnabled; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/ResourceUsageBasedMultiNodeLookupPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/ResourceUsageBasedMultiNodeLookupPolicy.java new file mode 100644 index 00000000000..8b74b4172a4 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/ResourceUsageBasedMultiNodeLookupPolicy.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; + +import java.util.*; +import java.util.concurrent.ConcurrentSkipListSet; + +/** + *

+ * This class has the following functionality: + * + *

+ * ResourceUsageBasedMultiNodeLookupPolicy holds sorted nodes list based on the + * resource usage of nodes at given time. + *

+ */ +public class ResourceUsageBasedMultiNodeLookupPolicy + implements MultiNodeLookupPolicy { + + private Map> nodesPerPartition = new HashMap<>(); + protected Comparator comparator; + + public ResourceUsageBasedMultiNodeLookupPolicy() { + this.comparator = new Comparator() { + @Override + public int compare(N o1, N o2) { + int allocatedDiff = o1.getAllocatedResource() + .compareTo(o2.getAllocatedResource()); + if (allocatedDiff == 0) { + return o1.getNodeID().compareTo(o2.getNodeID()); + } + return allocatedDiff; + } + }; + } + + @Override + public synchronized Iterator getPreferredNodeIterator(Collection nodes, + String partition) { + // Incoming nodes are skipped as multi-node sorter will populate sorted nodes + // ready for pulling everytime. + return getNodesPerPartition(partition).iterator(); + } + + @Override + public synchronized void addAndRefreshNodesSet(Collection nodes, String partition) { + Set nodeList = null; + if ((nodeList = nodesPerPartition.get(partition)) == null) { + nodeList = new ConcurrentSkipListSet(comparator); + this.nodesPerPartition.put(partition, nodeList); + } + + // Clear existing entries first. + nodeList.clear(); + + // Add fresh set of nodes for re-ordering. + nodeList.addAll(nodes); + } + + @Override + public synchronized Set getNodesPerPartition(String partition) { + if (!this.nodesPerPartition.containsKey(partition)) { + return new HashSet<>(); + } + return Collections.unmodifiableSet(this.nodesPerPartition.get(partition)); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java index eef86a44990..09d3327263b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java @@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; @@ -295,6 +296,8 @@ public Resource answer(InvocationOnMock invocation) throws Throwable { }); mockRmContext.setNodeLabelManager(nlm); + mockRmContext + .setMultiNodeSortingManager(mock(MultiNodeSortingManager.class)); return mockRmContext; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerTestBase.java index 5cea3a2f1ab..60e25ed83ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerTestBase.java @@ -21,6 +21,7 @@ import com.google.common.collect.Sets; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.junit.Assert; import java.util.Set; @@ -76,4 +77,16 @@ protected void checkPendingResourceGreaterThanZero(MockRM rm, String queueName, .getPending(label == null ? RMNodeLabelsManager.NO_LABEL : label) .getMemorySize() > 0); } + + protected void waitforNMRegistered(ResourceScheduler scheduler, int nodecount, + int timesec) throws InterruptedException { + long start = System.currentTimeMillis(); + while (System.currentTimeMillis() - start < timesec * 1000) { + if (scheduler.getNumClusterNodes() < nodecount) { + Thread.sleep(100); + } else { + break; + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 8d948b57ba4..8556c681ccc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -30,13 +30,7 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.security.PrivilegedAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; @@ -148,6 +142,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSorter; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SimpleCandidateNodeSet; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FairOrderingPolicy; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; @@ -172,7 +168,6 @@ import com.google.common.base.Supplier; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -4871,18 +4866,6 @@ private CapacityScheduler setUpCSQueue(long maxLifetime, return cs; } - private void waitforNMRegistered(ResourceScheduler scheduler, int nodecount, - int timesec) throws InterruptedException { - long start = System.currentTimeMillis(); - while (System.currentTimeMillis() - start < timesec * 1000) { - if (scheduler.getNumClusterNodes() < nodecount) { - Thread.sleep(100); - } else { - break; - } - } - } - @Test (timeout = 60000) public void testClearRequestsBeforeApplyTheProposal() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerMultiNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerMultiNodes.java new file mode 100644 index 00000000000..d91d4bcce5f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerMultiNodes.java @@ -0,0 +1,158 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSorter; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestCapacitySchedulerMultiNodes extends CapacitySchedulerTestBase { + + private static final Log LOG = LogFactory + .getLog(TestCapacitySchedulerMultiNodes.class); + private CapacitySchedulerConfiguration conf; + private static final String resPolicyClass = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.ResourceUsageBasedMultiNodeLookupPolicy"; + + @Before + public void setUp() { + CapacitySchedulerConfiguration config = new CapacitySchedulerConfiguration(); + config.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, + DominantResourceCalculator.class.getName()); + conf = new CapacitySchedulerConfiguration(config); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + conf.set(CapacitySchedulerConfiguration.MULTI_NODE_SORTING_POLICIES, + "resource-based"); + conf.set(CapacitySchedulerConfiguration.MULTI_NODE_SORTING_POLICY_NAME, + "resource-based"); + String policyName = CapacitySchedulerConfiguration.MULTI_NODE_SORTING_POLICY_NAME + + ".resource-based" + ".class"; + conf.set(policyName, resPolicyClass); + conf.setBoolean(CapacitySchedulerConfiguration.MULTI_NODE_PLACEMENT_ENABLED, + true); + conf.setInt("yarn.scheduler.minimum-allocation-mb", 512); + conf.setInt("yarn.scheduler.minimum-allocation-vcores", 1); + } + + @Test + public void testMultiNodeSorterForScheduling() throws Exception { + MockRM rm = new MockRM(conf); + rm.start(); + rm.registerNode("127.0.0.1:1234", 10 * GB); + rm.registerNode("127.0.0.1:1235", 10 * GB); + rm.registerNode("127.0.0.1:1236", 10 * GB); + rm.registerNode("127.0.0.1:1237", 10 * GB); + ResourceScheduler scheduler = rm.getRMContext().getScheduler(); + waitforNMRegistered(scheduler, 4, 5); + MultiNodeSortingManager mns = rm.getRMContext() + .getMultiNodeSortingManager(); + MultiNodeSorter sorter = mns + .getMultiNodePolicy(resPolicyClass); + sorter.reSortClusterNodes(); + Set nodes = sorter.getMultiNodeLookupPolicy() + .getNodesPerPartition(""); + Assert.assertEquals(4, nodes.size()); + rm.stop(); + } + + @Test + public void testMultiNodeSorterForSchedulingWithOrdering() throws Exception { + MockRM rm = new MockRM(conf); + rm.start(); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10 * GB, 10); + MockNM nm2 = rm.registerNode("127.0.0.2:1235", 10 * GB, 10); + MockNM nm3 = rm.registerNode("127.0.0.3:1236", 10 * GB, 10); + MockNM nm4 = rm.registerNode("127.0.0.4:1237", 10 * GB, 10); + ResourceScheduler scheduler = rm.getRMContext().getScheduler(); + waitforNMRegistered(scheduler, 4, 5); + + MultiNodeSortingManager mns = rm.getRMContext() + .getMultiNodeSortingManager(); + MultiNodeSorter sorter = mns + .getMultiNodePolicy(resPolicyClass); + sorter.reSortClusterNodes(); + + Set nodes = sorter.getMultiNodeLookupPolicy() + .getNodesPerPartition(""); + Assert.assertEquals(4, nodes.size()); + + RMApp app1 = rm.submitApp(2048, "app-1", "user1", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + SchedulerNodeReport report_nm1 = + rm.getResourceScheduler().getNodeReport(nm1.getNodeId()); + + // check node report + Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize()); + Assert.assertEquals(8 * GB, + report_nm1.getAvailableResource().getMemorySize()); + + // Ideally thread will invoke this, but thread operates every 1sec. Hence forcefully recompute nodes.. + sorter.reSortClusterNodes(); + + RMApp app2 = rm.submitApp(1024, "app-2", "user2", null, "default"); + MockAM am2 = MockRM.launchAndRegisterAM(app2, rm, nm2); + SchedulerNodeReport report_nm2 = + rm.getResourceScheduler().getNodeReport(nm2.getNodeId()); + + // check node report + Assert.assertEquals(1 * GB, report_nm2.getUsedResource().getMemorySize()); + Assert.assertEquals(9 * GB, + report_nm2.getAvailableResource().getMemorySize()); + + // Ideally thread will invoke this, but thread operates every 1sec. Hence forcefully recompute nodes.. + sorter.reSortClusterNodes(); + + // Node1 and Node2 are now having used resources. Hence ensure these 2 comes + // latter in the list. + nodes = sorter.getMultiNodeLookupPolicy() + .getNodesPerPartition(""); + List currentNodes = new ArrayList<>(); + currentNodes.add(nm3.getNodeId()); + currentNodes.add(nm4.getNodeId()); + currentNodes.add(nm2.getNodeId()); + currentNodes.add(nm1.getNodeId()); + Iterator it = nodes.iterator(); + SchedulerNode current; + int i = 0; + while (it.hasNext()) { + current = it.next(); + Assert.assertEquals(current.getNodeID(), currentNodes.get(i++)); + } + rm.stop(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNodeLabelUpdate.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNodeLabelUpdate.java index b4ebd15ccde..4cbfcf02b24 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNodeLabelUpdate.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNodeLabelUpdate.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -45,6 +46,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeLabelsUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo; @@ -817,4 +819,74 @@ private long waitForResourceUpdate(MockRM rm, String queuename, long memory, } return memorySize; } + + private long waitForNodeLabelSchedulerEventUpdate(MockRM rm, String partition, + long expectedNodeCount, long timeout) throws InterruptedException { + long start = System.currentTimeMillis(); + long size = 0; + while (System.currentTimeMillis() - start < timeout) { + CapacityScheduler scheduler = (CapacityScheduler) rm + .getResourceScheduler(); + size = scheduler.getNodeTracker().getNodesPerPartition(partition).size(); + if (size == expectedNodeCount) { + return size; + } + Thread.sleep(100); + } + return size; + } + + @Test + public void testNodeCountBasedOnNodeLabelsFromClusterNodeTracker() + throws Exception { + // set node -> label + mgr.addToCluserNodeLabelsWithDefaultExclusivity( + ImmutableSet.of("x", "y", "z")); + + // set mapping: + // h1 -> x + // h2 -> y + mgr.addLabelsToNode( + ImmutableMap.of(NodeId.newInstance("h1", 1234), toSet("x"))); + mgr.addLabelsToNode( + ImmutableMap.of(NodeId.newInstance("h2", 1234), toSet("x"))); + + // inject node label manager + MockRM rm = new MockRM(getConfigurationWithQueueLabels(conf)) { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + + rm.getRMContext().setNodeLabelManager(mgr); + rm.start(); + MockNM nm1 = rm.registerNode("h1:1234", 8000); + rm.registerNode("h2:1234", 8000); + rm.registerNode("h3:1234", 8000); + + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + + // Ensure that cluster node tracker is updated with correct set of node + // after Node registration. + Assert.assertEquals(2, + cs.getNodeTracker().getNodesPerPartition("x").size()); + Assert.assertEquals(1, cs.getNodeTracker().getNodesPerPartition("").size()); + + rm.unRegisterNode(nm1); + rm.registerNode("h4:1234", 8000); + + // Ensure that cluster node tracker is updated with correct set of node + // after new Node registration and old node label change. + Assert.assertEquals(1, + cs.getNodeTracker().getNodesPerPartition("x").size()); + Assert.assertEquals(2, cs.getNodeTracker().getNodesPerPartition("").size()); + + mgr.replaceLabelsOnNode( + ImmutableMap.of(NodeId.newInstance("h2", 1234), toSet(""))); + + // Last node with label x is replaced by CLI or REST. + Assert.assertEquals(0, + waitForNodeLabelSchedulerEventUpdate(rm, "x", 0, 3000L)); + } } -- 2.15.2 (Apple Git-101.1)