diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java index e698d1a4d00..06ec152e16a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java @@ -30,6 +30,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; +import java.util.function.Supplier; + /** * Utility for logging scheduler activities */ @@ -52,9 +54,9 @@ public static void recordSkippedAppActivityWithoutAllocation( ActivitiesManager activitiesManager, SchedulerNode node, SchedulerApplicationAttempt application, SchedulerRequestKey requestKey, - String diagnostic) { + String diagnostic, ActivityLevel level) { recordAppActivityWithoutAllocation(activitiesManager, node, application, - requestKey, diagnostic, ActivityState.SKIPPED); + requestKey, diagnostic, ActivityState.SKIPPED, level); } /* @@ -72,7 +74,7 @@ public static void recordRejectedAppActivityFromLeafQueue( if (activitiesManager.shouldRecordThisNode(nodeId)) { recordActivity(activitiesManager, nodeId, application.getQueueName(), application.getApplicationId().toString(), priority, - ActivityState.REJECTED, diagnostic, "app"); + ActivityState.REJECTED, diagnostic, ActivityLevel.APP); } finishSkippedAppAllocationRecording(activitiesManager, application.getApplicationId(), ActivityState.REJECTED, diagnostic); @@ -87,48 +89,52 @@ public static void recordAppActivityWithoutAllocation( ActivitiesManager activitiesManager, SchedulerNode node, SchedulerApplicationAttempt application, SchedulerRequestKey schedulerKey, - String diagnostic, ActivityState appState) { + String diagnostic, ActivityState appState, ActivityLevel level) { if (activitiesManager == null) { return; } NodeId nodeId = getRecordingNodeId(activitiesManager, node); if (activitiesManager.shouldRecordThisNode(nodeId)) { - if (schedulerKey != null) { - String allocationRequestId = + String requestName = null, priority = null, allocationRequestId = null; + if (level == ActivityLevel.NODE || level == ActivityLevel.REQUEST) { + if (schedulerKey == null) { + LOG.warn("Request key should not be null at " + level + " level."); + return; + } + allocationRequestId = String.valueOf(schedulerKey.getAllocationRequestId()); - String priorityStr = getPriorityStr(schedulerKey); - String requestName = getRequestName(priorityStr, allocationRequestId); - String type = "container"; - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - requestName, null, - priorityStr, appState, diagnostic, type, - null); - type = "request"; - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getApplicationId().toString(), requestName, - priorityStr, appState, - ActivityDiagnosticConstant.EMPTY, type, allocationRequestId); + priority = getPriorityStr(schedulerKey); + requestName = getRequestName(priority, allocationRequestId); + } + switch (level) { + case NODE: + recordSchedulerActivityAtNodeLevel(activitiesManager, application, + requestName, priority, allocationRequestId, null, nodeId, + appState, diagnostic); + break; + case REQUEST: + recordSchedulerActivityAtRequestLevel(activitiesManager, application, + requestName, priority, allocationRequestId, nodeId, appState, + diagnostic); + break; + case APP: + recordSchedulerActivityAtAppLevel(activitiesManager, application, + nodeId, appState, diagnostic); + break; + default: + LOG.warn("Doesn't handle app activities at " + level + " level."); + break; } - // Add queue-application activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getQueueName(), - application.getApplicationId().toString(), - application.getPriority().toString(), appState, - schedulerKey != null ? ActivityDiagnosticConstant.EMPTY : - diagnostic, "app", null); } // Add application-container activity into specific application allocation // Under this condition, it fails to allocate a container to this // application, so containerId is null. if (activitiesManager.shouldRecordThisApp( application.getApplicationId())) { - String type = "container"; activitiesManager.addSchedulingActivityForApp( application.getApplicationId(), null, getPriorityStr(schedulerKey), appState, - diagnostic, type, nodeId, + diagnostic, level, nodeId, schedulerKey == null ? null : String.valueOf(schedulerKey.getAllocationRequestId())); } @@ -156,43 +162,61 @@ public static void recordAppActivityWithAllocation( .valueOf(updatedContainer.getContainer().getAllocationRequestId()); String requestName = getRequestName(containerPriorityStr, allocationRequestId); - String type = "container"; - - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - requestName, - updatedContainer.getContainer().toString(), - containerPriorityStr, - activityState, ActivityDiagnosticConstant.EMPTY, type, null); - type = "request"; - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getApplicationId().toString(), - requestName, containerPriorityStr, - activityState, ActivityDiagnosticConstant.EMPTY, type, - allocationRequestId); - type = "app"; - // Add queue-application activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getQueueName(), - application.getApplicationId().toString(), - application.getPriority().toString(), ActivityState.ACCEPTED, - ActivityDiagnosticConstant.EMPTY, type, null); + // Add node,request,app level activities into scheduler activities. + recordSchedulerActivityAtNodeLevel(activitiesManager, application, + requestName, containerPriorityStr, allocationRequestId, + updatedContainer.getContainer().toString(), nodeId, activityState, + ActivityDiagnosticConstant.EMPTY); } // Add application-container activity into specific application allocation if (activitiesManager.shouldRecordThisApp( application.getApplicationId())) { - String type = "container"; activitiesManager.addSchedulingActivityForApp( application.getApplicationId(), updatedContainer.getContainerId(), updatedContainer.getContainer().getPriority().toString(), - activityState, ActivityDiagnosticConstant.EMPTY, type, nodeId, + activityState, ActivityDiagnosticConstant.EMPTY, + ActivityLevel.NODE, nodeId, String.valueOf( updatedContainer.getContainer().getAllocationRequestId())); } } + private static void recordSchedulerActivityAtNodeLevel( + ActivitiesManager activitiesManager, SchedulerApplicationAttempt app, + String requestName, String priority, String allocationRequestId, + String containerId, NodeId nodeId, ActivityState state, + String diagnostic) { + activitiesManager + .addSchedulingActivityForNode(nodeId, requestName, containerId, null, + state, diagnostic, ActivityLevel.NODE, null); + // Record request level activity additionally. + recordSchedulerActivityAtRequestLevel(activitiesManager, app, requestName, + priority, allocationRequestId, nodeId, state, + ActivityDiagnosticConstant.EMPTY); + } + + private static void recordSchedulerActivityAtRequestLevel( + ActivitiesManager activitiesManager, SchedulerApplicationAttempt app, + String requestName, String priority, String allocationRequestId, + NodeId nodeId, ActivityState state, String diagnostic) { + activitiesManager.addSchedulingActivityForNode(nodeId, + app.getApplicationId().toString(), requestName, priority, + state, diagnostic, ActivityLevel.REQUEST, + allocationRequestId); + // Record app level activity additionally. + recordSchedulerActivityAtAppLevel(activitiesManager, app, nodeId, state, + ActivityDiagnosticConstant.EMPTY); + } + + private static void recordSchedulerActivityAtAppLevel( + ActivitiesManager activitiesManager, SchedulerApplicationAttempt app, + NodeId nodeId, ActivityState state, String diagnostic) { + activitiesManager.addSchedulingActivityForNode(nodeId, app.getQueueName(), + app.getApplicationId().toString(), app.getPriority().toString(), + state, diagnostic, ActivityLevel.APP, null); + } + /* * Invoked when scheduler starts to look at this application within one node * update. @@ -252,13 +276,20 @@ public static void finishSkippedAppAllocationRecording( public static void recordQueueActivity(ActivitiesManager activitiesManager, SchedulerNode node, String parentQueueName, String queueName, ActivityState state, String diagnostic) { + recordQueueActivity(activitiesManager, node, parentQueueName, queueName, + state, () -> diagnostic); + } + + public static void recordQueueActivity(ActivitiesManager activitiesManager, + SchedulerNode node, String parentQueueName, String queueName, + ActivityState state, Supplier diagnosticSupplier) { if (activitiesManager == null) { return; } NodeId nodeId = getRecordingNodeId(activitiesManager, node); if (activitiesManager.shouldRecordThisNode(nodeId)) { recordActivity(activitiesManager, nodeId, parentQueueName, queueName, - null, state, diagnostic, null); + null, state, diagnosticSupplier.get(), ActivityLevel.QUEUE); } } } @@ -299,11 +330,11 @@ public static void finishAllocatedNodeAllocation( * Invoked when node heartbeat finishes */ public static void finishNodeUpdateRecording( - ActivitiesManager activitiesManager, NodeId nodeID) { + ActivitiesManager activitiesManager, NodeId nodeID, String partition) { if (activitiesManager == null) { return; } - activitiesManager.finishNodeUpdateRecording(nodeID); + activitiesManager.finishNodeUpdateRecording(nodeID, partition); } /* @@ -320,11 +351,11 @@ public static void startNodeUpdateRecording( // Add queue, application or container activity into specific node allocation. private static void recordActivity(ActivitiesManager activitiesManager, - NodeId nodeId, String parentName, String childName, - Priority priority, ActivityState state, String diagnostic, String type) { + NodeId nodeId, String parentName, String childName, Priority priority, + ActivityState state, String diagnostic, ActivityLevel level) { activitiesManager.addSchedulingActivityForNode(nodeId, parentName, childName, priority != null ? priority.toString() : null, state, - diagnostic, type, null); + diagnostic, level, null); } private static NodeId getRecordingNodeId(ActivitiesManager activitiesManager, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java index 4149ac1565d..7dd1182b6c2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java @@ -341,11 +341,11 @@ void startAppAllocationRecording(NodeId nodeID, long currTS, // Add queue, application or container activity into specific node allocation. void addSchedulingActivityForNode(NodeId nodeId, String parentName, String childName, String priority, ActivityState state, String diagnostic, - String type, String allocationRequestId) { + ActivityLevel level, String allocationRequestId) { if (shouldRecordThisNode(nodeId)) { NodeAllocation nodeAllocation = getCurrentNodeAllocation(nodeId); nodeAllocation.addAllocationActivity(parentName, childName, priority, - state, diagnostic, type, nodeId, allocationRequestId); + state, diagnostic, level, nodeId, allocationRequestId); } } @@ -353,13 +353,13 @@ void addSchedulingActivityForNode(NodeId nodeId, String parentName, // allocation. void addSchedulingActivityForApp(ApplicationId applicationId, ContainerId containerId, String priority, ActivityState state, - String diagnostic, String type, NodeId nodeId, + String diagnostic, ActivityLevel level, NodeId nodeId, String allocationRequestId) { if (shouldRecordThisApp(applicationId)) { AppAllocation appAllocation = appsAllocation.get().get(applicationId); appAllocation.addAppAllocationActivity(containerId == null ? "Container-Id-Not-Assigned" : - containerId.toString(), priority, state, diagnostic, type, nodeId, + containerId.toString(), priority, state, diagnostic, level, nodeId, allocationRequestId); } } @@ -407,7 +407,7 @@ void finishAppAllocationRecording(ApplicationId applicationId, } } - void finishNodeUpdateRecording(NodeId nodeID) { + void finishNodeUpdateRecording(NodeId nodeID, String partition) { List value = recordingNodesAllocation.get().get(nodeID); long timeStamp = SystemClock.getInstance().getTime(); @@ -417,6 +417,7 @@ void finishNodeUpdateRecording(NodeId nodeID) { for (NodeAllocation allocation : lastAvailableNodeActivities) { allocation.transformToTree(); allocation.setTimeStamp(timeStamp); + allocation.setPartition(partition); } if (recordNextAvailableNode) { recordNextAvailableNode = false; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java index 4cdaac8e914..e7e4ad480f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java @@ -40,11 +40,11 @@ private ActivitiesUtils(){} } if (groupBy == RMWSConsts.ActivitiesGroupBy.DIAGNOSTIC) { Map>> groupingResults = - activityNodes.stream().collect(Collectors - .groupingBy(ActivityNode::getState, Collectors + activityNodes.stream() + .filter(e -> e.getNodeId() != null) + .collect(Collectors.groupingBy(ActivityNode::getState, Collectors .groupingBy(ActivityNode::getShortDiagnostic, - Collectors.mapping(e -> e.getNodeId() == null ? - "" : + Collectors.mapping(e -> e.getNodeId() == null ? "" : e.getNodeId().toString(), Collectors.toList())))); return groupingResults.entrySet().stream().flatMap( stateMap -> stateMap.getValue().entrySet().stream().map( @@ -53,8 +53,8 @@ private ActivitiesUtils(){} diagMap.getValue()))) .collect(Collectors.toList()); } else { - return activityNodes.stream().map( - e -> new ActivityNodeInfo(e.getName(), e.getState(), + return activityNodes.stream().filter(e -> e.getNodeId() != null) + .map(e -> new ActivityNodeInfo(e.getName(), e.getState(), e.getDiagnostic(), e.getNodeId())).collect(Collectors.toList()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java index d3d4d9b4d33..bfd6ed7cd10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java @@ -26,51 +26,82 @@ // In order not to show "diagnostic" line in frontend, // we set the value to null. public final static String EMPTY = null; - public final static String NOT_ABLE_TO_ACCESS_PARTITION = - "Not able to access partition"; + + /* + * Initial check diagnostics + */ + public final static String INIT_CHECK_SINGLE_NODE_REMOVED = "Initial check: " + + "single placement node has been removed from scheduler"; + public final static String INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT = + "Initial check: single placement node resource is insufficient " + + "for minimum allocation"; + public final static String INIT_CHECK_PARTITION_RESOURCE_INSUFFICIENT = + "Initial check: insufficient resource in partition"; + + /* + * Queue level diagnostics + */ + public final static String QUEUE_NOT_ABLE_TO_ACCESS_PARTITION = + "Queue is not able to access partition"; + public final static String QUEUE_HIT_MAX_CAPACITY_LIMIT = + "Queue hits max-capacity limit"; + public final static String QUEUE_HIT_USER_MAX_CAPACITY_LIMIT = + "Queue hits user max-capacity limit"; + public final static String QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM = + "Queue does not have enough headroom for inner highest-priority request"; + public final static String QUEUE_DO_NOT_NEED_MORE_RESOURCE = "Queue does not need more resource"; - public final static String QUEUE_MAX_CAPACITY_LIMIT = - "Hit queue max-capacity limit"; - public final static String USER_CAPACITY_MAXIMUM_LIMIT = - "Hit user capacity maximum limit"; - public final static String SKIP_BLACK_LISTED_NODE = "Skip black listed node"; - public final static String PRIORITY_SKIPPED = "Priority skipped"; - public final static String PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST = - "Priority skipped because off-switch request is null"; - public final static String SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY = - "Priority skipped because of relax locality is not allowed"; - public final static String SKIP_IN_IGNORE_EXCLUSIVITY_MODE = - "Skipping assigning to Node in Ignore Exclusivity mode"; - public final static String DO_NOT_NEED_ALLOCATIONATTEMPTINFOS = - "Doesn't need containers based on reservation algo!"; - public final static String QUEUE_SKIPPED_HEADROOM = - "Queue skipped because of headroom"; - public final static String NON_PARTITIONED_PARTITION_FIRST = - "Non-partitioned resource request should be scheduled to " - + "non-partitioned partition first"; - public final static String SKIP_NODE_LOCAL_REQUEST = - "Skip node-local request"; - public final static String SKIP_RACK_LOCAL_REQUEST = - "Skip rack-local request"; - public final static String SKIP_OFF_SWITCH_REQUEST = - "Skip offswitch request"; - public final static String REQUEST_CAN_NOT_ACCESS_NODE_LABEL = - "Resource request can not access the label"; - public final static String NOT_SUFFICIENT_RESOURCE = - "Node does not have sufficient resource for request"; - public final static String LOCALITY_SKIPPED = "Locality skipped"; - public final static String FAIL_TO_ALLOCATE = "Fail to allocate"; - public final static String COULD_NOT_GET_CONTAINER = - "Couldn't get container for allocation"; + public final static String QUEUE_SKIPPED_TO_RESPECT_FIFO = "Queue skipped " + + "following applications in the queue to respect FIFO of applications"; + public final static String QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESERVED = + "Queue skipped because single placement node has been reserved"; + public final static String + QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESOURCE_INSUFFICIENT = + "Queue skipped because single placement node resource is insufficient"; + + /* + * Application level diagnostics + */ + public final static String APPLICATION_FAIL_TO_ALLOCATE = + "Application fails to allocate"; + public final static String APPLICATION_COULD_NOT_GET_CONTAINER = + "Application couldn't get container for allocation"; + public final static String APPLICATION_DO_NOT_NEED_RESOURCE = "Application does not need more resource"; - public final static String APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE = - "Application priority does not need more resource"; - public final static String SKIPPED_ALL_PRIORITIES = - "All priorities are skipped of the app"; - public final static String RESPECT_FIFO = "To respect FIFO of applications, " - + "skipped following applications in the queue"; + + /* + * Request level diagnostics + */ + public final static String REQUEST_SKIPPED = "Request skipped"; + public final static String REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST = + "Request skipped because off-switch request is null"; + public final static String REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE = + "Request skipped in Ignore Exclusivity mode for AM allocation"; + public final static String REQUEST_SKIPPED_BECAUSE_OF_RESERVATION = + "Request skipped based on reservation algo"; + public final static String + REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST = + "Request skipped because non-partitioned resource request should be " + + "scheduled to non-partitioned partition first"; + public final static String REQUEST_DO_NOT_NEED_RESOURCE = + "Request does not need more resource"; + + /* + * Node level diagnostics + */ + public final static String NODE_SKIPPED_FOR_NODE_LOCAL_REQUEST = + "Node skipped for node-local request"; + public final static String NODE_SKIPPED_FOR_RACK_LOCAL_REQUEST = + "Node skipped for rack-local request"; + public final static String NODE_SKIPPED_FOR_OFF_SWITCH_REQUEST = + "Node skipped for off-switch request"; + public final static String NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE = + "Node does not have sufficient resource for request"; + public final static String NODE_IS_BLACKLISTED = "Node is blacklisted"; + public final static String NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY = + "Node skipped because relax locality is not allowed"; public final static String NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS = "Node does not match partition or placement constraints"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java new file mode 100644 index 00000000000..ab08d5f0780 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities; + +/** + * Collection of activity operation levels. + */ +public enum ActivityLevel { + QUEUE, + APP, + REQUEST, + NODE +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java index e658d2fbefd..103ef85dd07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java @@ -42,26 +42,26 @@ private List childNode; public ActivityNode(String activityNodeName, String parentName, - String priority, ActivityState state, String diagnostic, String type) { - this(activityNodeName, parentName, priority, state, diagnostic, type, null, - null); - } - - public ActivityNode(String activityNodeName, String parentName, - String priority, ActivityState state, String diagnostic, String type, - NodeId nodeId, String allocationRequestId) { + String priority, ActivityState state, String diagnostic, + ActivityLevel level, NodeId nodeId, String allocationRequestId) { this.activityNodeName = activityNodeName; this.parentName = parentName; - if (type != null) { - if (type.equals("app")) { + if (level != null) { + switch (level) { + case APP: this.appPriority = priority; - } else if (type.equals("request")) { + break; + case REQUEST: this.requestPriority = priority; this.allocationRequestId = allocationRequestId; - } else if (type.equals("container")) { + break; + case NODE: this.requestPriority = priority; this.allocationRequestId = allocationRequestId; this.nodeId = nodeId; + break; + default: + break; } } this.state = state; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java index a71ec6f5613..fb832a7c32a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java @@ -36,23 +36,31 @@ private String diagnostic = null; private NodeId nodeId; private String allocationRequestId; + private ActivityLevel level; private static final Logger LOG = LoggerFactory.getLogger(AllocationActivity.class); public AllocationActivity(String parentName, String queueName, - String priority, ActivityState state, String diagnostic, String type, - NodeId nodeId, String allocationRequestId) { + String priority, ActivityState state, String diagnostic, + ActivityLevel level, NodeId nodeId, String allocationRequestId) { this.childName = queueName; this.parentName = parentName; - if (type != null) { - if (type.equals("app")) { + if (level != null) { + this.level = level; + switch (level) { + case APP: this.appPriority = priority; - } else if (type.equals("request")) { + break; + case REQUEST: this.requestPriority = priority; this.allocationRequestId = allocationRequestId; - } else if (type.equals("container")) { + break; + case NODE: this.nodeId = nodeId; + break; + default: + break; } } this.state = state; @@ -60,21 +68,11 @@ public AllocationActivity(String parentName, String queueName, } public ActivityNode createTreeNode() { - if (appPriority != null) { - return new ActivityNode(this.childName, this.parentName, this.appPriority, - this.state, this.diagnostic, "app"); - } else if (requestPriority != null) { - return new ActivityNode(this.childName, this.parentName, - this.requestPriority, this.state, this.diagnostic, "request", null, - allocationRequestId); - } else if (nodeId != null) { - return new ActivityNode(this.childName, this.parentName, - this.requestPriority, this.state, this.diagnostic, "container", - this.nodeId, null); - } else { - return new ActivityNode(this.childName, this.parentName, null, this.state, - this.diagnostic, null); - } + return new ActivityNode(this.childName, this.parentName, + this.level == ActivityLevel.APP ? + this.appPriority : this.requestPriority, + this.state, this.diagnostic, this.level, + this.nodeId, this.allocationRequestId); } public String getName() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java index e226b50fb77..1e63c3e72e6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java @@ -60,10 +60,10 @@ public void updateAppContainerStateAndTime(ContainerId containerId, } public void addAppAllocationActivity(String containerId, String priority, - ActivityState state, String diagnose, String type, NodeId nId, + ActivityState state, String diagnose, ActivityLevel level, NodeId nId, String allocationRequestId) { ActivityNode container = new ActivityNode(containerId, null, priority, - state, diagnose, type, nId, allocationRequestId); + state, diagnose, level, nId, allocationRequestId); this.allocationAttempts.add(container); if (state == ActivityState.REJECTED) { this.appState = ActivityState.SKIPPED; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java index df1d00c77e2..c502e4911f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java @@ -43,6 +43,7 @@ private ContainerId containerId = null; private AllocationState containerState = AllocationState.DEFAULT; private List allocationOperations; + private String partition; private ActivityNode root = null; @@ -55,10 +56,10 @@ public NodeAllocation(NodeId nodeId) { } public void addAllocationActivity(String parentName, String childName, - String priority, ActivityState state, String diagnostic, String type, - NodeId nId, String allocationRequestId) { + String priority, ActivityState state, String diagnostic, + ActivityLevel level, NodeId nId, String allocationRequestId) { AllocationActivity allocate = new AllocationActivity(parentName, childName, - priority, state, diagnostic, type, nId, allocationRequestId); + priority, state, diagnostic, level, nId, allocationRequestId); this.allocationOperations.add(allocate); } @@ -138,4 +139,12 @@ public ActivityNode getRoot() { public NodeId getNodeId() { return nodeId; } + + public String getPartition() { + return partition; + } + + public void setPartition(String partition) { + this.partition = partition; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index e59abee6b51..a247f3d8907 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1518,6 +1518,11 @@ private CSAssignment allocateContainerOnSingleNode( if (getNode(node.getNodeID()) != node) { LOG.error("Trying to schedule on a removed node, please double check, " + "nodeId=" + node.getNodeID()); + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + "", getRootQueue().getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant.INIT_CHECK_SINGLE_NODE_REMOVED); + ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, + node); return null; } @@ -1527,12 +1532,9 @@ private CSAssignment allocateContainerOnSingleNode( RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { allocateFromReservedContainer(node, withNodeHeartbeat, reservedContainer); - } - - // Do not schedule if there are any reservations to fulfill on the node - if (node.getReservedContainer() != null) { + // Do not schedule if there are any reservations to fulfill on the node LOG.debug("Skipping scheduling since node {} is reserved by" - + " application {}", node.getNodeID(), node.getReservedContainer(). + + " application {}", node.getNodeID(), reservedContainer. getContainerId().getApplicationAttemptId()); return null; } @@ -1543,8 +1545,14 @@ private CSAssignment allocateContainerOnSingleNode( if (calculator.computeAvailableContainers(Resources .add(node.getUnallocatedResource(), node.getTotalKillableResources()), minimumAllocation) <= 0) { - LOG.debug("This node or node partition doesn't have available or" + - " preemptible resource"); + LOG.debug("This node " + node.getNodeID() + " doesn't have sufficient " + + "available or preemptible resource for minimum allocation"); + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + "", getRootQueue().getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant. + INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT); + ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, + node); return null; } @@ -1594,7 +1602,13 @@ private void allocateFromReservedContainer(FiCaSchedulerNode node, ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.ALLOCATED_FROM_RESERVED); - } else{ + } else if (assignment.getAssignmentInformation().getNumReservations() > 0) { + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + queue.getParent().getQueueName(), queue.getQueueName(), + ActivityState.RE_RESERVED, ActivityDiagnosticConstant.EMPTY); + ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, + node, reservedContainer.getContainerId(), AllocationState.RESERVED); + } else { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); @@ -1685,12 +1699,14 @@ private CSAssignment allocateContainersOnMultiNodes( allocateFromReservedContainer(node, false, reservedContainer); } } - LOG.debug("This node or this node partition doesn't have available or " - + "killable resource"); + LOG.debug("This partition '{}' doesn't have available or " + + "killable resource", candidates.getPartition()); ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, null, "", getRootQueue().getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " " - + candidates.getPartition()); + ActivityDiagnosticConstant. + INIT_CHECK_PARTITION_RESOURCE_INSUFFICIENT); + ActivitiesLogger.NODE + .finishSkippedNodeAllocation(activitiesManager, null); return null; } @@ -1721,13 +1737,13 @@ CSAssignment allocateContainersToNode( assignment = allocateContainerOnSingleNode(candidates, node, withNodeHeartbeat); ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager, - node.getNodeID()); + node.getNodeID(), candidates.getPartition()); } else{ ActivitiesLogger.NODE.startNodeUpdateRecording(activitiesManager, ActivitiesManager.EMPTY_NODE_ID); assignment = allocateContainersOnMultiNodes(candidates); ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager, - ActivitiesManager.EMPTY_NODE_ID); + ActivitiesManager.EMPTY_NODE_ID, candidates.getPartition()); } if (assignment != null && assignment.getAssignmentInformation() != null diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index a178f9e9a0b..755d7abd333 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1074,8 +1074,7 @@ public CSAssignment assignContainers(Resource clusterResource, && !accessibleToPartition(candidates.getPartition())) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParent().getQueueName(), getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " " - + candidates.getPartition()); + ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION); return CSAssignment.NULL_ASSIGNMENT; } @@ -1113,7 +1112,7 @@ public CSAssignment assignContainers(Resource clusterResource, schedulingMode)) { ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue( activitiesManager, node, application, application.getPriority(), - ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT); + ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT); ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); @@ -1157,7 +1156,7 @@ public CSAssignment assignContainers(Resource clusterResource, "User capacity has reached its maximum limit."); ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue( activitiesManager, node, application, application.getPriority(), - ActivityDiagnosticConstant.USER_CAPACITY_MAXIMUM_LIMIT); + ActivityDiagnosticConstant.QUEUE_HIT_USER_MAX_CAPACITY_LIMIT); continue; } @@ -1189,15 +1188,16 @@ public CSAssignment assignContainers(Resource clusterResource, } else if (assignment.getSkippedType() == CSAssignment.SkippedType.QUEUE_LIMIT) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, - getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM); + getParent().getQueueName(), getQueueName(), ActivityState.REJECTED, + () -> ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM + + " from " + application.getApplicationId()); return assignment; } else{ // If we don't allocate anything, and it is not skipped by application, // we will return to respect FIFO of applications ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.RESPECT_FIFO); + ActivityDiagnosticConstant.QUEUE_SKIPPED_TO_RESPECT_FIFO); ActivitiesLogger.APP.finishSkippedAppAllocationRecording( activitiesManager, application.getApplicationId(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index c56369c3e1c..b15bc57f745 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -559,8 +559,7 @@ public CSAssignment assignContainers(Resource clusterResource, ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION - + candidates.getPartition()); + ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION); if (rootQueue) { ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node); @@ -613,8 +612,8 @@ public CSAssignment assignContainers(Resource clusterResource, getMetrics().getReservedVirtualCores()), schedulingMode)) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, - getParentName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT); + getParentName(), getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT); if (rootQueue) { ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node); @@ -648,22 +647,13 @@ public CSAssignment assignContainers(Resource clusterResource, assignedToChild.getAssignmentInformation().getReservationDetails() != null && !assignedToChild.getAssignmentInformation() .getReservationDetails().isEmpty(); - if (node != null && !isReserved) { - if (rootQueue) { - ActivitiesLogger.NODE.finishAllocatedNodeAllocation( - activitiesManager, node, - assignedToChild.getAssignmentInformation() - .getFirstAllocatedOrReservedContainerId(), - AllocationState.ALLOCATED); - } - } else{ - if (rootQueue) { - ActivitiesLogger.NODE.finishAllocatedNodeAllocation( - activitiesManager, node, - assignedToChild.getAssignmentInformation() - .getFirstAllocatedOrReservedContainerId(), - AllocationState.RESERVED); - } + if (rootQueue) { + ActivitiesLogger.NODE.finishAllocatedNodeAllocation( + activitiesManager, node, + assignedToChild.getAssignmentInformation() + .getFirstAllocatedOrReservedContainerId(), + isReserved ? + AllocationState.RESERVED : AllocationState.ALLOCATED); } // Track resource utilization in this pass of the scheduler @@ -735,10 +725,24 @@ private boolean canAssign(Resource clusterResource, FiCaSchedulerNode node) { // Two conditions need to meet when trying to allocate: // 1) Node doesn't have reserved container // 2) Node's available-resource + killable-resource should > 0 - return node.getReservedContainer() == null && Resources.greaterThanOrEqual( - resourceCalculator, clusterResource, Resources + boolean accept = node.getReservedContainer() == null && Resources + .greaterThanOrEqual(resourceCalculator, clusterResource, Resources .add(node.getUnallocatedResource(), node.getTotalKillableResources()), minimumAllocation); + if (!accept) { + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + getParentName(), getQueueName(), ActivityState.REJECTED, + () -> node.getReservedContainer() != null ? + ActivityDiagnosticConstant. + QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESERVED : + ActivityDiagnosticConstant. + QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESOURCE_INSUFFICIENT); + if (rootQueue) { + ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, + node); + } + } + return accept; } private ResourceLimits getResourceLimitsOfChild(CSQueue child, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 2643fd0b7a1..2e752be3bfc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -24,6 +24,7 @@ import java.util.Optional; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityLevel; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.DiagnosticsCollector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -107,7 +108,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( if (offswitchPendingAsk.getCount() <= 0) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -118,7 +120,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( if (application.getOutstandingAsksCount(schedulerKey) <= 0) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE); + ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -133,7 +136,9 @@ private ContainerAllocation preCheckForNodeCandidateSet( "Skipping assigning to Node in Ignore Exclusivity mode. "); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_IN_IGNORE_EXCLUSIVITY_MODE); + ActivityDiagnosticConstant. + REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE, + ActivityLevel.REQUEST); return ContainerAllocation.APP_SKIPPED; } } @@ -148,7 +153,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( activitiesManager, node, application, schedulerKey, ActivityDiagnosticConstant. NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS - + ActivitiesManager.getDiagnostics(dcOpt)); + + ActivitiesManager.getDiagnostics(dcOpt), + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -157,7 +163,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( LOG.debug("doesn't need containers based on reservation algo!"); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.DO_NOT_NEED_ALLOCATIONATTEMPTINFOS); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_OF_RESERVATION, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } } @@ -166,9 +173,11 @@ private ContainerAllocation preCheckForNodeCandidateSet( node.getPartition())) { LOG.debug("cannot allocate required resource={} because of headroom", required); - ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( + ActivitiesLogger.APP.recordAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM); + ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM, + ActivityState.REJECTED, + ActivityLevel.REQUEST); return ContainerAllocation.QUEUE_SKIPPED; } @@ -183,7 +192,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( // thread. ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } String requestPartition = @@ -213,7 +223,9 @@ private ContainerAllocation preCheckForNodeCandidateSet( } ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NON_PARTITIONED_PARTITION_FIRST); + ActivityDiagnosticConstant. + REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST, + ActivityLevel.REQUEST); return ContainerAllocation.APP_SKIPPED; } } @@ -228,7 +240,8 @@ private ContainerAllocation checkIfNodeBlackListed(FiCaSchedulerNode node, CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_BLACK_LISTED_NODE); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_BLACK_LISTED_NODE); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityLevel.NODE); return ContainerAllocation.APP_SKIPPED; } @@ -368,7 +381,8 @@ private ContainerAllocation assignNodeLocalContainers( // Skip node-local request, go to rack-local request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_NODE_LOCAL_REQUEST); + ActivityDiagnosticConstant.NODE_SKIPPED_FOR_NODE_LOCAL_REQUEST, + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } @@ -386,7 +400,8 @@ private ContainerAllocation assignRackLocalContainers( // Skip rack-local request, go to off-switch request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_RACK_LOCAL_REQUEST); + ActivityDiagnosticConstant.NODE_SKIPPED_FOR_RACK_LOCAL_REQUEST, + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } @@ -405,7 +420,8 @@ private ContainerAllocation assignOffSwitchContainers( CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_DUE_TO_LOCALITY); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_OFF_SWITCH_REQUEST); + ActivityDiagnosticConstant.NODE_SKIPPED_FOR_OFF_SWITCH_REQUEST, + ActivityLevel.NODE); return ContainerAllocation.APP_SKIPPED; } @@ -439,7 +455,8 @@ private ContainerAllocation assignContainersOnNode(Resource clusterResource, if (!appInfo.canDelayTo(schedulerKey, node.getRackName())) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY); + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -465,7 +482,8 @@ private ContainerAllocation assignContainersOnNode(Resource clusterResource, if (!appInfo.canDelayTo(schedulerKey, ResourceRequest.ANY)) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY); + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -489,7 +507,8 @@ private ContainerAllocation assignContainersOnNode(Resource clusterResource, } ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.PRIORITY_SKIPPED); + ActivityDiagnosticConstant.REQUEST_SKIPPED, + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -516,8 +535,9 @@ private ContainerAllocation assignContainer(Resource clusterResource, // Skip this locality request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE - + getResourceDiagnostics(capability, totalResource)); + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + + getResourceDiagnostics(capability, totalResource), + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } @@ -597,7 +617,8 @@ private ContainerAllocation assignContainer(Resource clusterResource, ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, ActivityDiagnosticConstant. - NODE_CAN_NOT_FIND_CONTAINER_TO_BE_UNRESERVED_WHEN_NEEDED); + NODE_CAN_NOT_FIND_CONTAINER_TO_BE_UNRESERVED_WHEN_NEEDED, + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } } @@ -622,18 +643,20 @@ private ContainerAllocation assignContainer(Resource clusterResource, // Skip the locality request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE - + getResourceDiagnostics(capability, availableForDC)); + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + + getResourceDiagnostics(capability, availableForDC), + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } } ActivitiesLogger.APP.recordAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + getResourceDiagnostics(capability, availableForDC), rmContainer == null ? - ActivityState.RESERVED : ActivityState.RE_RESERVED); + ActivityState.RESERVED : ActivityState.RE_RESERVED, + ActivityLevel.NODE); ContainerAllocation result = new ContainerAllocation(null, pendingAsk.getPerAllocationResource(), AllocationState.RESERVED); result.containerNodeType = type; @@ -643,8 +666,9 @@ private ContainerAllocation assignContainer(Resource clusterResource, // Skip the locality request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE - + getResourceDiagnostics(capability, availableForDC)); + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + + getResourceDiagnostics(capability, availableForDC), + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } } @@ -719,7 +743,8 @@ private ContainerAllocation handleNewContainerAllocation( null, AllocationState.APP_SKIPPED); ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, ActivityState.REJECTED); + ActivityDiagnosticConstant.APPLICATION_FAIL_TO_ALLOCATE, + ActivityState.REJECTED, ActivityLevel.APP); return ret; } @@ -741,8 +766,8 @@ ContainerAllocation doAllocation(ContainerAllocation allocationResult, LOG.warn("Couldn't get container for allocation!"); ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.COULD_NOT_GET_CONTAINER, - ActivityState.REJECTED); + ActivityDiagnosticConstant.APPLICATION_COULD_NOT_GET_CONTAINER, + ActivityState.REJECTED, ActivityLevel.APP); return ContainerAllocation.APP_SKIPPED; } @@ -765,8 +790,8 @@ ContainerAllocation doAllocation(ContainerAllocation allocationResult, .recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey, ActivityDiagnosticConstant. - PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST, - ActivityState.REJECTED); + REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityState.REJECTED, ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } updatedContainer = new RMContainerImpl(container, schedulerKey, @@ -827,8 +852,8 @@ private ContainerAllocation allocate(Resource clusterResource, if (schedulingPS == null) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, null, application, schedulerKey, - ActivityDiagnosticConstant. - APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityLevel.REQUEST); return new ContainerAllocation(reservedContainer, null, AllocationState.PRIORITY_SKIPPED); } @@ -888,7 +913,8 @@ public CSAssignment assignContainers(Resource clusterResource, } ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, null, - ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE); + ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE, + ActivityLevel.APP); return CSAssignment.SKIP_ASSIGNMENT; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java index b6e0a533b23..a70ba693588 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java @@ -38,13 +38,13 @@ @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) public class ActivityNodeInfo { - protected String name; // The name for activity node - protected String appPriority; - protected String requestPriority; - protected String allocationState; - protected String diagnostic; - private String nodeId; + private String name; // The name for activity node + private String appPriority; + private String requestPriority; private String allocationRequestId; + private String activityState; + private String diagnostic; + private String nodeId; // Used for groups of activities private String count; @@ -55,17 +55,17 @@ ActivityNodeInfo() { } - public ActivityNodeInfo(String name, ActivityState allocationState, + public ActivityNodeInfo(String name, ActivityState activityState, String diagnostic, NodeId nId) { this.name = name; - this.allocationState = allocationState.name(); + this.activityState = activityState.name(); this.diagnostic = diagnostic; setNodeId(nId); } - public ActivityNodeInfo(ActivityState groupAllocationState, + public ActivityNodeInfo(ActivityState groupActivityState, String groupDiagnostic, List groupNodeIds) { - this.allocationState = groupAllocationState.name(); + this.activityState = groupActivityState.name(); this.diagnostic = groupDiagnostic; this.count = String.valueOf(groupNodeIds.size()); this.nodeIds = groupNodeIds; @@ -76,7 +76,7 @@ public ActivityNodeInfo(ActivityState groupAllocationState, this.name = node.getName(); setPriority(node); setNodeId(node.getNodeId()); - this.allocationState = node.getState().name(); + this.activityState = node.getState().name(); this.diagnostic = node.getDiagnostic(); this.requestPriority = node.getRequestPriority(); this.allocationRequestId = node.getAllocationRequestId(); @@ -128,4 +128,24 @@ public String getCount() { public List getChildren() { return children; } + + public String getActivityState() { + return activityState; + } + + public String getName() { + return name; + } + + public String getAppPriority() { + return appPriority; + } + + public String getRequestPriority() { + return requestPriority; + } + + public String getDiagnostic() { + return diagnostic; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java index 6ae1f9a819b..e057ff0b50d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java @@ -38,27 +38,27 @@ @XmlAccessorType(XmlAccessType.FIELD) public class AppAllocationInfo { private String nodeId; - private String queueName; - private String appPriority; private long timestamp; private String dateTime; - private String allocationState; + private String queueName; + private String appPriority; + private String activityState; private String diagnostic; - private List requestAllocation; + private List requestAllocations; AppAllocationInfo() { } AppAllocationInfo(AppAllocation allocation, RMWSConsts.ActivitiesGroupBy groupBy) { - this.requestAllocation = new ArrayList<>(); + this.requestAllocations = new ArrayList<>(); this.nodeId = allocation.getNodeId(); this.queueName = allocation.getQueueName(); this.appPriority = allocation.getPriority() == null ? null : allocation.getPriority().toString(); this.timestamp = allocation.getTime(); this.dateTime = new Date(allocation.getTime()).toString(); - this.allocationState = allocation.getAppState().name(); + this.activityState = allocation.getAppState().name(); this.diagnostic = allocation.getDiagnostic(); Map> requestToActivityNodes = allocation.getAllocationAttempts().stream().collect(Collectors @@ -68,7 +68,7 @@ .values()) { AppRequestAllocationInfo requestAllocationInfo = new AppRequestAllocationInfo(requestActivityNodes, groupBy); - this.requestAllocation.add(requestAllocationInfo); + this.requestAllocations.add(requestAllocationInfo); } } @@ -92,12 +92,12 @@ public String getDateTime() { return dateTime; } - public String getAllocationState() { - return allocationState; + public String getActivityState() { + return activityState; } - public List getRequestAllocation() { - return requestAllocation; + public List getRequestAllocations() { + return requestAllocations; } public String getDiagnostic() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java index 09251283aeb..e05677d5cad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java @@ -36,8 +36,9 @@ public class AppRequestAllocationInfo { private String requestPriority; private String allocationRequestId; - private String allocationState; - private List allocationAttempt; + private String activityState; + private String diagnostic; + private List allocationAttempts; AppRequestAllocationInfo() { } @@ -47,8 +48,12 @@ ActivityNode lastActivityNode = Iterables.getLast(activityNodes); this.requestPriority = lastActivityNode.getRequestPriority(); this.allocationRequestId = lastActivityNode.getAllocationRequestId(); - this.allocationState = lastActivityNode.getState().name(); - this.allocationAttempt = ActivitiesUtils + this.activityState = lastActivityNode.getState().name(); + if (lastActivityNode.isRequestType() + && lastActivityNode.getDiagnostic() != null) { + this.diagnostic = lastActivityNode.getDiagnostic(); + } + this.allocationAttempts = ActivitiesUtils .getRequestActivityNodeInfos(activityNodes, groupBy); } @@ -60,11 +65,15 @@ public String getAllocationRequestId() { return allocationRequestId; } - public String getAllocationState() { - return allocationState; + public String getActivityState() { + return activityState; } - public List getAllocationAttempt() { - return allocationAttempt; + public List getAllocationAttempts() { + return allocationAttempts; + } + + public String getDiagnostic() { + return diagnostic; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java index 71c576d74c4..0239ccc8e0f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java @@ -33,6 +33,7 @@ @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) public class NodeAllocationInfo { + private String partition; protected String allocatedContainerId; protected String finalAllocationState; protected ActivityNodeInfo root = null; @@ -45,10 +46,13 @@ NodeAllocationInfo(NodeAllocation allocation, RMWSConsts.ActivitiesGroupBy groupBy) { + this.partition = allocation.getPartition(); this.allocatedContainerId = allocation.getContainerId(); this.finalAllocationState = allocation.getFinalAllocationState().name(); - root = new ActivityNodeInfo(allocation.getRoot(), groupBy); + } + public String getPartition() { + return partition; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java index 2bf6b23ed70..92111e46a0b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java @@ -145,10 +145,10 @@ public void testRecordingDifferentNodeActivitiesInMultiThreads() .recordAppActivityWithoutAllocation(activitiesManager, node, randomApp, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); ActivitiesLogger.NODE - .finishNodeUpdateRecording(activitiesManager, node.getNodeID()); + .finishNodeUpdateRecording(activitiesManager, node.getNodeID(), ""); return null; }; futures.add(threadPoolExecutor.submit(task)); @@ -190,10 +190,10 @@ public void testRecordingSchedulerActivitiesForMultiNodesInMultiThreads() .recordAppActivityWithoutAllocation(activitiesManager, node, randomApp, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager, - ActivitiesManager.EMPTY_NODE_ID); + ActivitiesManager.EMPTY_NODE_ID, ""); return null; }; futures.add(threadPoolExecutor.submit(task)); @@ -231,13 +231,13 @@ public void testRecordingAppActivitiesInMultiThreads() .recordAppActivityWithoutAllocation(activitiesManager, node, randomApp, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); } ActivitiesLogger.APP - .finishAllocatedAppAllocationRecording(activitiesManager, - randomApp.getApplicationId(), null, ActivityState.SKIPPED, - ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES); + .finishSkippedAppAllocationRecording(activitiesManager, + randomApp.getApplicationId(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.EMPTY); return null; }; futures.add(threadPoolExecutor.submit(task)); @@ -280,12 +280,12 @@ public void testAppActivitiesTTL() throws Exception { ActivitiesLogger.APP .recordAppActivityWithoutAllocation(newActivitiesManager, node, app, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); ActivitiesLogger.APP - .finishAllocatedAppAllocationRecording(newActivitiesManager, - app.getApplicationId(), null, ActivityState.SKIPPED, - ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES); + .finishSkippedAppAllocationRecording(newActivitiesManager, + app.getApplicationId(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.EMPTY); } AppActivitiesInfo appActivitiesInfo = newActivitiesManager .getAppActivitiesInfo(app.getApplicationId(), null, null, null, -1, @@ -319,13 +319,13 @@ public void testAppActivitiesPerformance() { activitiesManager .addSchedulingActivityForApp(app.getApplicationId(), null, "0", ActivityState.SKIPPED, - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, "container", - nodeId, "0"); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityLevel.NODE, nodeId, "0"); } ActivitiesLogger.APP - .finishAllocatedAppAllocationRecording(activitiesManager, - app.getApplicationId(), null, ActivityState.SKIPPED, - ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES); + .finishSkippedAppAllocationRecording(activitiesManager, + app.getApplicationId(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.EMPTY); } // It often take a longer time for the first query, ignore this distraction @@ -341,11 +341,11 @@ public void testAppActivitiesPerformance() { Assert.assertEquals(numActivities, appActivitiesInfo.getAllocations().size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() .size()); Assert.assertEquals(numNodes, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().size()); + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() + .get(0).getAllocationAttempts().size()); return null; }; testManyTimes("Getting normal app activities", normalSupplier, @@ -359,14 +359,14 @@ public void testAppActivitiesPerformance() { Assert.assertEquals(numActivities, appActivitiesInfo.getAllocations().size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() .size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().size()); + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() + .get(0).getAllocationAttempts().size()); Assert.assertEquals(numNodes, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().get(0).getNodeIds().size()); + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() + .get(0).getAllocationAttempts().get(0).getNodeIds().size()); return null; }; testManyTimes("Getting aggregated app activities", aggregatedSupplier, @@ -379,14 +379,14 @@ public void testAppActivitiesPerformance() { RMWSConsts.ActivitiesGroupBy.DIAGNOSTIC, -1, true, 100); Assert.assertEquals(1, appActivitiesInfo.getAllocations().size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() .size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().size()); + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() + .get(0).getAllocationAttempts().size()); Assert.assertEquals(numNodes, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().get(0).getNodeIds().size()); + appActivitiesInfo.getAllocations().get(0).getRequestAllocations() + .get(0).getAllocationAttempts().get(0).getNodeIds().size()); return null; }; testManyTimes("Getting summarized app activities", summarizedSupplier, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java index 666e5fe9a5d..e862d6c285b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; +import com.google.common.collect.Lists; import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.api.client.WebResource; import org.apache.hadoop.http.JettyUtils; @@ -43,6 +44,7 @@ import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -52,7 +54,7 @@ public final class ActivitiesTestUtils { public static final String INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX = - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + ", " + GenericDiagnosticsCollector.RESOURCE_DIAGNOSTICS_PREFIX; public static final String UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX = @@ -60,12 +62,49 @@ NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS + ", " + GenericDiagnosticsCollector.PLACEMENT_CONSTRAINT_DIAGNOSTICS_PREFIX; + /* + * Field names in response of scheduler/app activities. + */ + public static final String FN_ACT_ALLOCATIONS = "allocations"; + + public static final String FN_ACT_DIAGNOSTIC = "diagnostic"; + + public static final String FN_ACT_ACTIVITY_STATE = "activityState"; + + public static final String FN_ACT_FINAL_ALLOCATION_STATE = + "finalAllocationState"; + + public static final String FN_ACT_NODE_ID = "nodeId"; + + public static final String FN_ACT_NODE_IDS = "nodeIds"; + + public static final String FN_ACT_COUNT = "count"; + + public static final String FN_ACT_APP_PRIORITY = "appPriority"; + + public static final String FN_ACT_REQUEST_PRIORITY = "requestPriority"; + + public static final String FN_ACT_ALLOCATION_REQUEST_ID = + "allocationRequestId"; + + public static final String FN_APP_ACT_REQUESTS = "requestAllocations"; + + public static final String FN_APP_ACT_ALLOCATION_ATTEMPTS = + "allocationAttempts"; + + public static final String FN_SCHEDULER_ACT_NAME = "name"; + + public static final String FN_SCHEDULER_ACT_ROOT = "root"; + + public static final String FN_SCHEDULER_ACT_CHILDREN = "children"; + private ActivitiesTestUtils(){} public static List findInAllocations(JSONObject allocationObj, Predicate p) throws JSONException { List target = new ArrayList<>(); - recursiveFindObj(allocationObj.getJSONObject("root"), p, target); + recursiveFindObj(allocationObj.getJSONObject(FN_SCHEDULER_ACT_ROOT), p, + target); return target; } @@ -74,14 +113,14 @@ private static void recursiveFindObj(JSONObject obj, Predicate p, if (p.test(obj)) { target.add(obj); } - if (obj.has("children")) { - JSONArray childrenObjs = obj.optJSONArray("children"); + if (obj.has(FN_SCHEDULER_ACT_CHILDREN)) { + JSONArray childrenObjs = obj.optJSONArray(FN_SCHEDULER_ACT_CHILDREN); if (childrenObjs != null) { for (int i = 0; i < childrenObjs.length(); i++) { recursiveFindObj(childrenObjs.getJSONObject(i), p, target); } } else { - JSONObject childrenObj = obj.optJSONObject("children"); + JSONObject childrenObj = obj.optJSONObject(FN_SCHEDULER_ACT_CHILDREN); recursiveFindObj(childrenObj, p, target); } } @@ -103,17 +142,18 @@ public static SchedulingRequest schedulingRequest(int numContainers, public static void verifyNumberOfNodes(JSONObject allocation, int expectValue) throws Exception { - if (allocation.isNull("root")) { + if (allocation.isNull(FN_SCHEDULER_ACT_ROOT)) { assertEquals("State of allocation is wrong", expectValue, 0); } else { assertEquals("State of allocation is wrong", expectValue, - 1 + getNumberOfNodes(allocation.getJSONObject("root"))); + 1 + getNumberOfNodes( + allocation.getJSONObject(FN_SCHEDULER_ACT_ROOT))); } } public static int getNumberOfNodes(JSONObject allocation) throws Exception { - if (!allocation.isNull("children")) { - Object object = allocation.get("children"); + if (!allocation.isNull(FN_SCHEDULER_ACT_CHILDREN)) { + Object object = allocation.get(FN_SCHEDULER_ACT_CHILDREN); if (object.getClass() == JSONObject.class) { return 1 + getNumberOfNodes((JSONObject) object); } else { @@ -137,10 +177,10 @@ public static void verifyStateOfAllocations(JSONObject allocation, public static void verifyNumberOfAllocations(JSONObject json, int expectValue) throws Exception { - if (json.isNull("allocations")) { + if (json.isNull(FN_ACT_ALLOCATIONS)) { assertEquals("Number of allocations is wrong", expectValue, 0); } else { - Object object = json.get("allocations"); + Object object = json.get(FN_ACT_ALLOCATIONS); if (object.getClass() == JSONObject.class) { assertEquals("Number of allocations is wrong", expectValue, 1); } else if (object.getClass() == JSONArray.class) { @@ -153,31 +193,32 @@ public static void verifyNumberOfAllocations(JSONObject json, int expectValue) public static void verifyQueueOrder(JSONObject json, String expectOrder) throws Exception { String order = ""; - if (!json.isNull("root")) { - JSONObject root = json.getJSONObject("root"); - order = root.getString("name") + "-" + getQueueOrder(root); + if (!json.isNull(FN_SCHEDULER_ACT_ROOT)) { + JSONObject root = json.getJSONObject(FN_SCHEDULER_ACT_ROOT); + order = root.getString(FN_SCHEDULER_ACT_NAME) + "-" + getQueueOrder(root); } assertEquals("Order of queue is wrong", expectOrder, order.substring(0, order.length() - 1)); } public static String getQueueOrder(JSONObject node) throws Exception { - if (!node.isNull("children")) { - Object children = node.get("children"); + if (!node.isNull(FN_SCHEDULER_ACT_CHILDREN)) { + Object children = node.get(FN_SCHEDULER_ACT_CHILDREN); if (children.getClass() == JSONObject.class) { - if (!((JSONObject) children).isNull("appPriority")) { + if (!((JSONObject) children).isNull(FN_ACT_APP_PRIORITY)) { return ""; } - return ((JSONObject) children).getString("name") + "-" + getQueueOrder( - (JSONObject) children); + return ((JSONObject) children).getString(FN_SCHEDULER_ACT_NAME) + "-" + + getQueueOrder((JSONObject) children); } else if (children.getClass() == JSONArray.class) { String order = ""; for (int i = 0; i < ((JSONArray) children).length(); i++) { JSONObject child = (JSONObject) ((JSONArray) children).get(i); - if (!child.isNull("appPriority")) { + if (!child.isNull(FN_ACT_APP_PRIORITY)) { return ""; } - order += (child.getString("name") + "-" + getQueueOrder(child)); + order += (child.getString(FN_SCHEDULER_ACT_NAME) + "-" + + getQueueOrder(child)); } return order; } @@ -185,12 +226,46 @@ public static String getQueueOrder(JSONObject node) throws Exception { return ""; } + public static List getSubNodesFromJson(JSONObject json, + String[] hierachicalFieldNames) { + List results = Lists.newArrayList(json); + for (String fieldName : hierachicalFieldNames) { + results = results.stream().filter(e -> e.has(fieldName)) + .flatMap(e -> getJSONObjects(e, fieldName).stream()) + .collect(Collectors.toList()); + if (results.isEmpty()) { + return results; + } + } + return results; + } + + private static List getJSONObjects(JSONObject json, + String fieldName) { + List objects = new ArrayList<>(); + if (json.has(fieldName)) { + try { + Object tmpObj = json.get(fieldName); + if (tmpObj.getClass() == JSONObject.class) { + objects.add((JSONObject) tmpObj); + } else if (tmpObj.getClass() == JSONArray.class) { + for (int i = 0; i < ((JSONArray) tmpObj).length(); i++) { + objects.add(((JSONArray) tmpObj).getJSONObject(i)); + } + } + } catch (JSONException e) { + throw new RuntimeException(e); + } + } + return objects; + } + public static void verifyNumberOfAllocationAttempts(JSONObject allocation, int expectValue) throws Exception { - if (allocation.isNull("allocationAttempt")) { + if (allocation.isNull(FN_APP_ACT_ALLOCATION_ATTEMPTS)) { assertEquals("Number of allocation attempts is wrong", expectValue, 0); } else { - Object object = allocation.get("allocationAttempt"); + Object object = allocation.get(FN_APP_ACT_ALLOCATION_ATTEMPTS); if (object.getClass() == JSONObject.class) { assertEquals("Number of allocations attempts is wrong", expectValue, 1); } else if (object.getClass() == JSONArray.class) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java index 95dffce2275..7c7e7be081c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java @@ -18,26 +18,45 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ACTIVITY_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.StringReader; +import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.function.Predicate; import javax.ws.rs.core.MediaType; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; import org.apache.hadoop.http.JettyUtils; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; import org.apache.hadoop.yarn.webapp.GuiceServletConfig; import org.apache.hadoop.yarn.webapp.JerseyTestBase; @@ -242,6 +261,82 @@ public void testSchedulerPartitionsXML() throws JSONException, Exception { verifySchedulerInfoXML(dom); } + @Test + public void testPartitionInSchedulerActivities() throws Exception { + rm.start(); + rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap + .of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX))); + + MockNM nm1 = new MockNM("127.0.0.1:1234", 2 * 1024, + rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("127.0.0.2:1234", 2 * 1024, + rm.getResourceTrackerService()); + nm1.registerNode(); + nm2.registerNode(); + + try { + RMApp app1 = rm.submitApp(1024, "app1", "user1", null, QUEUE_B, LABEL_LX); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + am1.allocate(Arrays.asList( + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("*").nodeLabelExpression(LABEL_LX) + .capability(Resources.createResource(2048)).numContainers(1) + .build()), null); + + WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(RMWSConsts.SCHEDULER_ACTIVITIES); + ActivitiesTestUtils.requestWebResource(sr, null); + + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + + JSONObject schedulerActivitiesJson = + ActivitiesTestUtils.requestWebResource(sr, null); + + /* + * verify scheduler activities + */ + verifyNumberOfAllocations(schedulerActivitiesJson, 1); + // verify queue Qb + Predicate findQueueBPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_B); + List queueBObj = ActivitiesTestUtils.findInAllocations( + schedulerActivitiesJson.getJSONObject(FN_ACT_ALLOCATIONS), + findQueueBPred); + assertEquals(1, queueBObj.size()); + assertEquals(ActivityState.REJECTED.name(), + queueBObj.get(0).optString(FN_ACT_ACTIVITY_STATE)); + assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM + + " from " + am1.getApplicationAttemptId().getApplicationId(), + queueBObj.get(0).optString(FN_ACT_DIAGNOSTIC)); + // verify queue Qa + Predicate findQueueAPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_A); + List queueAObj = ActivitiesTestUtils.findInAllocations( + schedulerActivitiesJson.getJSONObject(FN_ACT_ALLOCATIONS), + findQueueAPred); + assertEquals(1, queueAObj.size()); + assertEquals(ActivityState.REJECTED.name(), + queueAObj.get(0).optString(FN_ACT_ACTIVITY_STATE)); + assertEquals( + ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION, + queueAObj.get(0).optString(FN_ACT_DIAGNOSTIC)); + // verify queue Qc + Predicate findQueueCPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_C); + List queueCObj = ActivitiesTestUtils.findInAllocations( + schedulerActivitiesJson.getJSONObject(FN_ACT_ALLOCATIONS), + findQueueCPred); + assertEquals(1, queueCObj.size()); + assertEquals(ActivityState.SKIPPED.name(), + queueCObj.get(0).optString(FN_ACT_ACTIVITY_STATE)); + assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_NEED_MORE_RESOURCE, + queueCObj.get(0).optString(FN_ACT_DIAGNOSTIC)); + } finally { + rm.stop(); + } + } + private void verifySchedulerInfoXML(Document dom) throws Exception { NodeList scheduler = dom.getElementsByTagName("scheduler"); assertEquals("incorrect number of elements", 1, scheduler.getLength()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java index 8bdecb769d0..b17583f2d3a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java @@ -22,6 +22,8 @@ import com.sun.jersey.api.client.WebResource; import com.sun.jersey.core.util.MultivaluedMapImpl; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.http.JettyUtils; @@ -54,6 +56,19 @@ import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.NODE; import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.PlacementTargets.allocationTag; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_REQUEST_ID; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ACTIVITY_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_FINAL_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_ID; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_IDS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_REQUEST_PRIORITY; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_ALLOCATION_ATTEMPTS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_REQUESTS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_CHILDREN; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocationAttempts; @@ -62,6 +77,7 @@ import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyQueueOrder; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyStateOfAllocations; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -117,9 +133,10 @@ public void testAssignMultipleContainersPerNodeHeartbeat() // Collection logic of scheduler activities changed after YARN-9313, // only one allocation should be recorded for all scenarios. verifyNumberOfAllocations(json, 1); - verifyStateOfAllocations(json.getJSONObject("allocations"), - "finalAllocationState", "ALLOCATED"); - verifyQueueOrder(json.getJSONObject("allocations"), "root-a-b-b2-b3-b1"); + verifyStateOfAllocations(json.getJSONObject(FN_ACT_ALLOCATIONS), + FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED"); + verifyQueueOrder(json.getJSONObject(FN_ACT_ALLOCATIONS), + "root-a-b-b2-b3-b1"); } finally { rm.stop(); } @@ -167,7 +184,13 @@ public void testAssignWithoutAvailableResource() throws Exception { response.getType().toString()); json = response.getEntity(JSONObject.class); - verifyNumberOfAllocations(json, 0); + // verify scheduler activities + verifyNumberOfAllocations(json, 1); + JSONObject rootObj = json.getJSONObject(FN_ACT_ALLOCATIONS) + .getJSONObject(FN_SCHEDULER_ACT_ROOT); + assertTrue(rootObj.optString(FN_ACT_DIAGNOSTIC).startsWith( + ActivityDiagnosticConstant. + INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT)); } finally { rm.stop(); } @@ -301,10 +324,12 @@ public void testReserveNewContainer() throws Exception { verifyNumberOfAllocations(json, 1); - verifyQueueOrder(json.getJSONObject("allocations"), "root-a-b-b3-b1"); + verifyQueueOrder(json.getJSONObject(FN_ACT_ALLOCATIONS), + "root-a-b-b3-b1"); - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", "RESERVED"); + JSONObject allocations = json.getJSONObject(FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE, + "RESERVED"); // Do a node heartbeat again without releasing container from app2 r = resource(); @@ -329,10 +354,10 @@ public void testReserveNewContainer() throws Exception { verifyNumberOfAllocations(json, 1); - verifyQueueOrder(json.getJSONObject("allocations"), "b1"); - - allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", "SKIPPED"); + JSONObject allocation = json.getJSONObject(FN_ACT_ALLOCATIONS); + verifyQueueOrder(allocation, "b1"); + verifyStateOfAllocations(allocation, FN_ACT_FINAL_ALLOCATION_STATE, + "RESERVED"); // Finish application 2 CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -365,10 +390,10 @@ public void testReserveNewContainer() throws Exception { verifyNumberOfAllocations(json, 1); - verifyQueueOrder(json.getJSONObject("allocations"), "b1"); + verifyQueueOrder(json.getJSONObject(FN_ACT_ALLOCATIONS), "b1"); - allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", + allocations = json.getJSONObject(FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED_FROM_RESERVED"); } finally { rm.stop(); @@ -411,14 +436,14 @@ public void testActivityJSON() throws Exception { verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", + JSONObject allocations = json.getJSONObject(FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED"); // Increase number of nodes to 6 since request node has been added verifyNumberOfNodes(allocations, 6); - verifyQueueOrder(json.getJSONObject("allocations"), "root-b-b1"); + verifyQueueOrder(json.getJSONObject(FN_ACT_ALLOCATIONS), "root-b-b1"); } finally { rm.stop(); } @@ -451,22 +476,25 @@ public void testAppActivityJSON() throws Exception { //Check app activities verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "allocationState", "ALLOCATED"); + JSONObject allocations = json.getJSONObject(FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocations, FN_ACT_ACTIVITY_STATE, + "ALLOCATED"); //Check request allocation JSONObject requestAllocationObj = - allocations.getJSONObject("requestAllocation"); - verifyStateOfAllocations(requestAllocationObj, "allocationState", + allocations.getJSONObject(FN_APP_ACT_REQUESTS); + verifyStateOfAllocations(requestAllocationObj, FN_ACT_ACTIVITY_STATE, "ALLOCATED"); - assertEquals("0", requestAllocationObj.optString("requestPriority")); - assertEquals("-1", requestAllocationObj.optString("allocationRequestId")); + assertEquals("0", + requestAllocationObj.optString(FN_ACT_REQUEST_PRIORITY)); + assertEquals("-1", + requestAllocationObj.optString(FN_ACT_ALLOCATION_REQUEST_ID)); //Check allocation attempts verifyNumberOfAllocationAttempts(requestAllocationObj, 1); JSONObject allocationAttemptObj = - requestAllocationObj.getJSONObject("allocationAttempt"); - verifyStateOfAllocations(allocationAttemptObj, "allocationState", + requestAllocationObj.getJSONObject(FN_APP_ACT_ALLOCATION_ATTEMPTS); + verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ACTIVITY_STATE, "ALLOCATED"); - assertNotNull(allocationAttemptObj.get("nodeId")); + assertNotNull(allocationAttemptObj.get(FN_ACT_NODE_ID)); } finally { rm.stop(); } @@ -508,10 +536,10 @@ public void testAppAssignMultipleContainersPerNodeHeartbeat() verifyNumberOfAllocations(json, 10); - JSONArray allocations = json.getJSONArray("allocations"); + JSONArray allocations = json.getJSONArray(FN_ACT_ALLOCATIONS); for (int i = 0; i < allocations.length(); i++) { verifyStateOfAllocations(allocations.getJSONObject(i), - "allocationState", "ALLOCATED"); + FN_ACT_ACTIVITY_STATE, "ALLOCATED"); } } finally { rm.stop(); @@ -664,7 +692,7 @@ public void testInsufficientResourceDiagnostic() response.getType().toString()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("waiting for next allocation", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); am1.allocate(Arrays.asList(ResourceRequest .newInstance(Priority.UNDEFINED, "*", @@ -682,15 +710,16 @@ public void testInsufficientResourceDiagnostic() json = response.getEntity(JSONObject.class); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); // check diagnostics Predicate findReqPred = - (obj) -> obj.optString("name").equals("request_-1_-1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1"); List app2ReqObjs = ActivitiesTestUtils.findInAllocations(allocationObj, findReqPred); assertEquals(1, app2ReqObjs.size()); - JSONObject reqChild = app2ReqObjs.get(0).getJSONObject("children"); - assertTrue(reqChild.getString("diagnostic") + JSONObject reqChild = + app2ReqObjs.get(0).getJSONObject(FN_SCHEDULER_ACT_CHILDREN); + assertTrue(reqChild.getString(FN_ACT_DIAGNOSTIC) .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); @@ -728,7 +757,7 @@ public void testPlacementConstraintDiagnostic() response.getType().toString()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("waiting for next allocation", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -742,15 +771,16 @@ public void testPlacementConstraintDiagnostic() json = response.getEntity(JSONObject.class); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); // check diagnostics Predicate findReqPred = - (obj) -> obj.optString("name").equals("request_1_1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_1"); List reqObjs = ActivitiesTestUtils.findInAllocations(allocationObj, findReqPred); assertEquals(1, reqObjs.size()); - JSONObject reqChild = reqObjs.get(0).getJSONObject("children"); - assertTrue(reqChild.getString("diagnostic") + JSONObject reqChild = + reqObjs.get(0).getJSONObject(FN_SCHEDULER_ACT_CHILDREN); + assertTrue(reqChild.getString(FN_ACT_DIAGNOSTIC) .contains(UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); @@ -776,7 +806,7 @@ public void testAppInsufficientResourceDiagnostic() MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); // am1 asks for 1 * 5GB container am1.allocate(Arrays.asList(ResourceRequest @@ -788,15 +818,15 @@ public void testAppInsufficientResourceDiagnostic() json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + allocationObj.getJSONObject(FN_APP_ACT_REQUESTS); verifyNumberOfAllocationAttempts(requestAllocationObj, 1); JSONObject allocationAttemptObj = - requestAllocationObj.getJSONObject("allocationAttempt"); - verifyStateOfAllocations(allocationAttemptObj, "allocationState", + requestAllocationObj.getJSONObject(FN_APP_ACT_ALLOCATION_ATTEMPTS); + verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ACTIVITY_STATE, "SKIPPED"); - assertTrue(allocationAttemptObj.optString("diagnostic") + assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC) .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); @@ -822,7 +852,7 @@ public void testAppPlacementConstraintDiagnostic() MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); // am1 asks for 1 * 5GB container with PC expression: in,node,foo PlacementConstraint pcExpression = PlacementConstraints @@ -840,15 +870,15 @@ public void testAppPlacementConstraintDiagnostic() json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + allocationObj.getJSONObject(FN_APP_ACT_REQUESTS); verifyNumberOfAllocationAttempts(requestAllocationObj, 1); JSONObject allocationAttemptObj = - requestAllocationObj.getJSONObject("allocationAttempt"); - verifyStateOfAllocations(allocationAttemptObj, "allocationState", + requestAllocationObj.getJSONObject(FN_APP_ACT_ALLOCATION_ATTEMPTS); + verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ACTIVITY_STATE, "SKIPPED"); - assertTrue(allocationAttemptObj.optString("diagnostic") + assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC) .contains(UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); @@ -873,7 +903,7 @@ public void testAppFilterByRequestPrioritiesAndAllocationRequestIds() MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); // am1 asks for 1 * 1GB container with requestPriority=-1 // and allocationRequestId=1 @@ -931,11 +961,11 @@ public void testAppFilterByRequestPrioritiesAndAllocationRequestIds() filterParams2.add(RMWSConsts.REQUEST_PRIORITIES, "-1"); json = ActivitiesTestUtils.requestWebResource(r, filterParams2); verifyNumberOfAllocations(json, 2); - JSONArray allocations = json.getJSONArray("allocations"); + JSONArray allocations = json.getJSONArray(FN_ACT_ALLOCATIONS); for (int i=0; i()); @@ -1016,19 +1046,19 @@ public void testAppLimit() throws Exception { // query all app activities with invalid limit params.putSingle(RMWSConsts.LIMIT, "STRING"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("limit must be integer!", json.getString("diagnostic")); + assertEquals("limit must be integer!", json.getString(FN_ACT_DIAGNOSTIC)); // query all app activities with limit = 0 params.putSingle(RMWSConsts.LIMIT, "0"); json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("limit must be greater than 0!", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); // query all app activities with limit < 0 params.putSingle(RMWSConsts.LIMIT, "-3"); json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("limit must be greater than 0!", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); } finally { rm.stop(); } @@ -1055,14 +1085,15 @@ public void testAppActions() throws Exception { params.add(RMWSConsts.ACTIONS, "get"); params.add(RMWSConsts.ACTIONS, "invalid-action"); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - assertTrue(json.getString("diagnostic").startsWith("Got invalid action")); + assertTrue( + json.getString(FN_ACT_DIAGNOSTIC).startsWith("Got invalid action")); /* * testing get action */ params.putSingle(RMWSConsts.ACTIONS, "get"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", json.getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -1071,7 +1102,7 @@ public void testAppActions() throws Exception { // app activities won't be recorded params.putSingle(RMWSConsts.ACTIONS, "get"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", json.getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -1083,7 +1114,7 @@ public void testAppActions() throws Exception { params.putSingle(RMWSConsts.ACTIONS, "refresh"); json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("Successfully notified actions: refresh", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -1149,7 +1180,7 @@ public void testAppSummary() throws Exception { MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getString(FN_ACT_DIAGNOSTIC)); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); // am1 asks for 1 * 5GB container @@ -1170,23 +1201,192 @@ public void testAppSummary() throws Exception { // verify that response contains an allocation summary for all nodes verifyNumberOfAllocations(json, 1); - JSONObject allocation = json.getJSONObject("allocations"); + JSONObject allocation = json.getJSONObject(FN_ACT_ALLOCATIONS); JSONObject reqestAllocation = - allocation.getJSONObject("requestAllocation"); - JSONArray attempts = reqestAllocation.getJSONArray("allocationAttempt"); + allocation.getJSONObject(FN_APP_ACT_REQUESTS); + JSONArray attempts = reqestAllocation.getJSONArray( + FN_APP_ACT_ALLOCATION_ATTEMPTS); assertEquals(2, attempts.length()); for (int i = 0; i < attempts.length(); i++) { JSONObject attempt = attempts.getJSONObject(i); - if (attempt.getString("allocationState").equals("SKIPPED")) { - JSONArray nodeIds = attempt.optJSONArray("nodeIds"); + if (attempt.getString(FN_ACT_ACTIVITY_STATE) + .equals(ActivityState.SKIPPED.name())) { + JSONArray nodeIds = attempt.optJSONArray(FN_ACT_NODE_IDS); assertEquals(2, nodeIds.length()); - } else if (attempt.getString("allocationState").equals("RESERVED")) { + } else if (attempt.getString(FN_ACT_ACTIVITY_STATE) + .equals(ActivityState.RESERVED.name())) { assertEquals(nm1.getNodeId().toString(), - attempt.getString("nodeIds")); + attempt.getString(FN_ACT_NODE_IDS)); } } } finally { rm.stop(); } } + + @Test + public void testNodeSkippedBecauseOfRelaxLocality() throws Exception { + //Start RM so that it accepts app submissions + rm.start(); + + MockNM nm1 = new MockNM("127.0.0.1:1234", 4 * 1024, + rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("127.0.0.2:1234", 4 * 1024, + rm.getResourceTrackerService()); + + nm1.registerNode(); + nm2.registerNode(); + + try { + RMApp app1 = rm.submitApp(10, "app1", "user1", null, "b1"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + + am1.allocate(Arrays.asList( + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("127.0.0.2") + .capability(Resources.createResource(1024)).numContainers(1) + .build(), + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("/default-rack") + .capability(Resources.createResource(1024)).numContainers(1) + .relaxLocality(false) + .build(), + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("*") + .capability(Resources.createResource(1024)).numContainers(1) + .relaxLocality(false) + .build()), null); + + WebResource r = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(ActivitiesTestUtils.format(RMWSConsts.SCHEDULER_APP_ACTIVITIES, + app1.getApplicationId().toString())); + ActivitiesTestUtils.requestWebResource(r, null); + WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(RMWSConsts.SCHEDULER_ACTIVITIES); + ActivitiesTestUtils.requestWebResource(sr, null); + + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + + JSONObject appActivitiesJson = + ActivitiesTestUtils.requestWebResource(r, null); + JSONObject schedulerActivitiesJson = + ActivitiesTestUtils.requestWebResource(sr, null); + + // verify app activities + verifyNumberOfAllocations(appActivitiesJson, 1); + List allocationAttempts = ActivitiesTestUtils + .getSubNodesFromJson(appActivitiesJson, + new String[] {FN_ACT_ALLOCATIONS, FN_APP_ACT_REQUESTS, + FN_APP_ACT_ALLOCATION_ATTEMPTS}); + assertEquals(1, allocationAttempts.size()); + assertEquals( + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + allocationAttempts.get(0).optString(FN_ACT_DIAGNOSTIC)); + + /* + * verify scheduler activities + */ + verifyNumberOfAllocations(schedulerActivitiesJson, 1); + // verify request activity + Predicate findA1AQueuePred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1"); + List reqObjs = ActivitiesTestUtils.findInAllocations( + schedulerActivitiesJson.getJSONObject(FN_ACT_ALLOCATIONS), + findA1AQueuePred); + assertEquals(1, reqObjs.size()); + assertEquals(ActivityState.SKIPPED.name(), + reqObjs.get(0).optString(FN_ACT_ACTIVITY_STATE)); + // verify node activity + JSONObject nodeObj = + reqObjs.get(0).getJSONObject(FN_SCHEDULER_ACT_CHILDREN); + assertEquals(nm1.getNodeId().toString(), + nodeObj.optString(FN_ACT_NODE_ID)); + assertEquals( + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + nodeObj.optString(FN_ACT_DIAGNOSTIC)); + } finally { + rm.stop(); + } + } + + @Test + public void testQueueSkippedBecauseOfHeadroom() throws Exception { + //Start RM so that it accepts app submissions + rm.start(); + + MockNM nm1 = new MockNM("127.0.0.1:1234", 4 * 1024, + rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("127.0.0.2:1234", 4 * 1024, + rm.getResourceTrackerService()); + nm1.registerNode(); + nm2.registerNode(); + + try { + RMApp app1 = rm.submitApp(10, "app1", "user1", null, "a1a"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + + am1.allocate(Arrays.asList( + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("*").capability(Resources.createResource(3072)) + .numContainers(1).relaxLocality(false).build()), null); + + WebResource r = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(ActivitiesTestUtils.format(RMWSConsts.SCHEDULER_APP_ACTIVITIES, + app1.getApplicationId().toString())); + ActivitiesTestUtils.requestWebResource(r, null); + WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(RMWSConsts.SCHEDULER_ACTIVITIES); + ActivitiesTestUtils.requestWebResource(sr, null); + + + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + + JSONObject appActivitiesJson = + ActivitiesTestUtils.requestWebResource(r, null); + JSONObject schedulerActivitiesJson = + ActivitiesTestUtils.requestWebResource(sr, null); + + // verify app activities: diagnostic should be attached at request level + // and there should be no allocation attempts at node level + verifyNumberOfAllocations(appActivitiesJson, 1); + List requestAllocations = ActivitiesTestUtils + .getSubNodesFromJson(appActivitiesJson, + new String[] {FN_ACT_ALLOCATIONS, FN_APP_ACT_REQUESTS}); + assertEquals(1, requestAllocations.size()); + assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM, + requestAllocations.get(0).optString(FN_ACT_DIAGNOSTIC)); + assertFalse( + requestAllocations.get(0).has(FN_APP_ACT_ALLOCATION_ATTEMPTS)); + + // verify scheduler activities: diagnostic should be attached at request + // level and queue level + verifyNumberOfAllocations(schedulerActivitiesJson, 1); + // verify at queue level + Predicate findA1AQueuePred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("a1a"); + List a1aQueueObj = ActivitiesTestUtils.findInAllocations( + schedulerActivitiesJson.getJSONObject(FN_ACT_ALLOCATIONS), + findA1AQueuePred); + assertEquals(1, a1aQueueObj.size()); + assertEquals(ActivityState.REJECTED.name(), + a1aQueueObj.get(0).optString(FN_ACT_ACTIVITY_STATE)); + assertTrue(a1aQueueObj.get(0).optString(FN_ACT_DIAGNOSTIC).startsWith( + ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM)); + // verify at request level + Predicate findReqPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1"); + List reqObj = ActivitiesTestUtils.findInAllocations( + schedulerActivitiesJson.getJSONObject(FN_ACT_ALLOCATIONS), + findReqPred); + assertEquals(1, reqObj.size()); + assertEquals(ActivityState.REJECTED.name(), + reqObj.get(0).optString(FN_ACT_ACTIVITY_STATE)); + assertTrue(reqObj.get(0).optString(FN_ACT_DIAGNOSTIC).startsWith( + ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM)); + } finally { + rm.stop(); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java index 8998221238a..49b2ca29f43 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java @@ -57,6 +57,16 @@ import java.util.List; import java.util.function.Predicate; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ACTIVITY_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_COUNT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_FINAL_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_IDS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_ALLOCATION_ATTEMPTS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_REQUESTS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_CHILDREN; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.findInAllocations; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocationAttempts; @@ -184,9 +194,9 @@ public void testAssignContainer() throws Exception { verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); + JSONObject allocations = json.getJSONObject(FN_ACT_ALLOCATIONS); verifyStateOfAllocations(allocations, - "finalAllocationState", "ALLOCATED"); + FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED"); } finally { rm.stop(); } @@ -225,9 +235,9 @@ public void testSchedulingWithoutPendingRequests() JSONObject json = response.getEntity(JSONObject.class); verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); + JSONObject allocations = json.getJSONObject(FN_ACT_ALLOCATIONS); verifyStateOfAllocations(allocations, - "finalAllocationState", "SKIPPED"); + FN_ACT_FINAL_ALLOCATION_STATE, "SKIPPED"); } finally { rm.stop(); } @@ -254,7 +264,7 @@ public void testAppAssignContainer() throws Exception { app1.getApplicationId().toString())); MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", json.getString(FN_ACT_DIAGNOSTIC)); //Trigger scheduling for this app CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -267,22 +277,23 @@ public void testAppAssignContainer() throws Exception { verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocationObj, FN_ACT_ACTIVITY_STATE, + "ALLOCATED"); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + allocationObj.getJSONObject(FN_APP_ACT_REQUESTS); verifyNumberOfAllocationAttempts(requestAllocationObj, 2); - verifyStateOfAllocations(requestAllocationObj, "allocationState", + verifyStateOfAllocations(requestAllocationObj, FN_ACT_ACTIVITY_STATE, "ALLOCATED"); JSONArray allocationAttemptArray = - requestAllocationObj.getJSONArray("allocationAttempt"); + requestAllocationObj.getJSONArray(FN_APP_ACT_ALLOCATION_ATTEMPTS); JSONObject allocationAttempt1 = allocationAttemptArray.getJSONObject(0); - verifyStateOfAllocations(allocationAttempt1, "allocationState", + verifyStateOfAllocations(allocationAttempt1, FN_ACT_ACTIVITY_STATE, "SKIPPED"); - assertTrue(allocationAttempt1.optString("diagnostic") + assertTrue(allocationAttempt1.optString(FN_ACT_DIAGNOSTIC) .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); JSONObject allocationAttempt2 = allocationAttemptArray.getJSONObject(1); - verifyStateOfAllocations(allocationAttempt2, "allocationState", + verifyStateOfAllocations(allocationAttempt2, FN_ACT_ACTIVITY_STATE, "ALLOCATED"); } finally { rm.stop(); @@ -329,27 +340,29 @@ public void testInsufficientResourceDiagnostic() throws Exception { //Check app activities verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); //Check diagnostic for request of app1 - Predicate findApp1Pred = (obj) -> obj.optString("name") - .equals(app1.getApplicationId().toString()); + Predicate findApp1Pred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME) + .equals(app1.getApplicationId().toString()); JSONObject app1Obj = findInAllocations(allocationObj, findApp1Pred).get(0); - assertEquals("SKIPPED", app1Obj.optString("allocationState")); + assertEquals("SKIPPED", app1Obj.optString(FN_ACT_ACTIVITY_STATE)); assertEquals(ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE, - app1Obj.optString("diagnostic")); + app1Obj.optString(FN_ACT_DIAGNOSTIC)); //Check diagnostic for request of app2 Predicate findApp2ReqPred = - (obj) -> obj.optString("name").equals("request_1_-1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_-1"); List app2ReqObjs = findInAllocations(allocationObj, findApp2ReqPred); assertEquals(1, app2ReqObjs.size()); - JSONArray app2ReqChildren = app2ReqObjs.get(0).getJSONArray("children"); + JSONArray app2ReqChildren = + app2ReqObjs.get(0).getJSONArray(FN_SCHEDULER_ACT_CHILDREN); assertEquals(4, app2ReqChildren.length()); for (int i = 0; i < app2ReqChildren.length(); i++) { JSONObject reqChild = app2ReqChildren.getJSONObject(i); - if (reqChild.getString("allocationState").equals("SKIPPED")) { - String diagnostic = reqChild.getString("diagnostic"); + if (reqChild.getString(FN_ACT_ACTIVITY_STATE).equals("SKIPPED")) { + String diagnostic = reqChild.getString(FN_ACT_DIAGNOSTIC); assertTrue( diagnostic.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); } @@ -378,7 +391,7 @@ public void testAppInsufficientResourceDiagnostic() throws Exception { app1.getApplicationId().toString())); MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", json.getString(FN_ACT_DIAGNOSTIC)); //Request two containers with different priority for am1 am1.allocate(Arrays.asList(ResourceRequest @@ -398,29 +411,31 @@ public void testAppInsufficientResourceDiagnostic() throws Exception { //Check app activities json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 2); - JSONArray allocationArray = json.getJSONArray("allocations"); + JSONArray allocationArray = json.getJSONArray(FN_ACT_ALLOCATIONS); //Check first activity is for second allocation with RESERVED state JSONObject allocationObj = allocationArray.getJSONObject(0); - verifyStateOfAllocations(allocationObj, "allocationState", "RESERVED"); + verifyStateOfAllocations(allocationObj, FN_ACT_ACTIVITY_STATE, + "RESERVED"); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + allocationObj.getJSONObject(FN_APP_ACT_REQUESTS); verifyNumberOfAllocationAttempts(requestAllocationObj, 4); JSONArray allocationAttemptArray = - requestAllocationObj.getJSONArray("allocationAttempt"); + requestAllocationObj.getJSONArray(FN_APP_ACT_ALLOCATION_ATTEMPTS); for (int i=0; i()); @@ -472,27 +488,28 @@ public void testGroupByDiagnostics() throws Exception { //Check activities verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = json.getJSONObject(FN_ACT_ALLOCATIONS); //Check diagnostic for request of app1 Predicate findReqPred = - (obj) -> obj.optString("name").equals("request_1_-1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_-1"); List reqObjs = findInAllocations(allocationObj, findReqPred); assertEquals(1, reqObjs.size()); - JSONArray reqChildren = reqObjs.get(0).getJSONArray("children"); + JSONArray reqChildren = + reqObjs.get(0).getJSONArray(FN_SCHEDULER_ACT_CHILDREN); assertEquals(2, reqChildren.length()); for (int i = 0; i < reqChildren.length(); i++) { JSONObject reqChild = reqChildren.getJSONObject(i); - if (reqChild.getString("allocationState") + if (reqChild.getString(FN_ACT_ACTIVITY_STATE) .equals(AllocationState.SKIPPED.name())) { - assertEquals("3", reqChild.getString("count")); - assertEquals(3, reqChild.getJSONArray("nodeIds").length()); - assertTrue(reqChild.optString("diagnostic") + assertEquals("3", reqChild.getString(FN_ACT_COUNT)); + assertEquals(3, reqChild.getJSONArray(FN_ACT_NODE_IDS).length()); + assertTrue(reqChild.optString(FN_ACT_DIAGNOSTIC) .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); - } else if (reqChild.getString("allocationState") + } else if (reqChild.getString(FN_ACT_ACTIVITY_STATE) .equals(AllocationState.RESERVED.name())) { - assertEquals("1", reqChild.getString("count")); - assertNotNull(reqChild.getString("nodeIds")); + assertEquals("1", reqChild.getString(FN_ACT_COUNT)); + assertNotNull(reqChild.getString(FN_ACT_NODE_IDS)); } else { Assert.fail("Allocation state should be " + AllocationState.SKIPPED.name() + " or " @@ -528,7 +545,7 @@ public void testAppGroupByDiagnostics() throws Exception { */ params.add(RMWSConsts.GROUP_BY, "NON-EXIST-GROUP-BY"); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - Assert.assertTrue(json.getString("diagnostic") + Assert.assertTrue(json.getString(FN_ACT_DIAGNOSTIC) .startsWith("Got invalid groupBy:")); params.remove(RMWSConsts.GROUP_BY); @@ -538,7 +555,7 @@ public void testAppGroupByDiagnostics() throws Exception { params.add(RMWSConsts.GROUP_BY, RMWSConsts.ActivitiesGroupBy. DIAGNOSTIC.name().toLowerCase()); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", json.getString(FN_ACT_DIAGNOSTIC)); //Request two containers with different priority for am1 am1.allocate(Arrays.asList(ResourceRequest @@ -559,29 +576,30 @@ public void testAppGroupByDiagnostics() throws Exception { //Check app activities verifyNumberOfAllocations(json, 2); - JSONArray allocationArray = json.getJSONArray("allocations"); + JSONArray allocationArray = json.getJSONArray(FN_ACT_ALLOCATIONS); //Check first activity is for second allocation with RESERVED state JSONObject allocationObj = allocationArray.getJSONObject(0); - verifyStateOfAllocations(allocationObj, "allocationState", "RESERVED"); + verifyStateOfAllocations(allocationObj, FN_ACT_ACTIVITY_STATE, + "RESERVED"); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + allocationObj.getJSONObject(FN_APP_ACT_REQUESTS); verifyNumberOfAllocationAttempts(requestAllocationObj, 2); JSONArray allocationAttemptArray = - requestAllocationObj.getJSONArray("allocationAttempt"); + requestAllocationObj.getJSONArray(FN_APP_ACT_ALLOCATION_ATTEMPTS); for (int i=0; i