From 38fce83f2d5f7211c4460b96e870561cab4f69ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=B7=A8=E4=B8=B0?= <920347627@qq.com>
Date: Thu, 5 Nov 2020 17:56:13 +0800
Subject: [PATCH 6/8] =?UTF-8?q?=E5=AE=89=E8=A3=85hadoop2.7=E7=9A=84?=
=?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9=E9=94=81=E4=B8=BAsynchroni?=
=?UTF-8?q?zed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../scheduler/capacity/CapacityScheduler.java | 5593 ++++++++---------
.../scheduler/capacity/LeafQueue.java | 224 +-
2 files changed, 2749 insertions(+), 3068 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index 54b301a..8aeb203 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -142,7 +142,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
- .QueueManagementChangeEvent;
+ .QueueManagementChangeEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ReleaseContainerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@@ -169,2930 +169,2765 @@
@Evolving
@SuppressWarnings("unchecked")
public class CapacityScheduler extends
- AbstractYarnScheduler implements
- PreemptableResourceScheduler, CapacitySchedulerContext, Configurable,
- ResourceAllocationCommitter, MutableConfScheduler {
-
- private static final Log LOG = LogFactory.getLog(CapacityScheduler.class);
-
- private CapacitySchedulerQueueManager queueManager;
-
- // timeout to join when we stop this service
- protected final long THREAD_JOIN_TIMEOUT_MS = 1000;
-
- private PreemptionManager preemptionManager = new PreemptionManager();
-
- private volatile boolean isLazyPreemptionEnabled = false;
-
- private int offswitchPerHeartbeatLimit;
-
- private boolean assignMultipleEnabled;
-
- private int maxAssignPerHeartbeat;
-
- private CSConfigurationProvider csConfProvider;
-
- @Override
- public void setConf(Configuration conf) {
- yarnConf = conf;
- }
-
- private void validateConf(Configuration conf) {
- // validate scheduler memory allocation setting
- int minMem = conf.getInt(
- YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
- YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
- int maxMem = conf.getInt(
- YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
- YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
-
- if (minMem <= 0 || minMem > maxMem) {
- throw new YarnRuntimeException("Invalid resource scheduler memory"
- + " allocation configuration"
- + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB
- + "=" + minMem
- + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB
- + "=" + maxMem + ", min and max should be greater than 0"
- + ", max should be no smaller than min.");
- }
-
- // validate scheduler vcores allocation setting
- int minVcores = conf.getInt(
- YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
- YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
- int maxVcores = conf.getInt(
- YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
- YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES);
-
- if (minVcores <= 0 || minVcores > maxVcores) {
- throw new YarnRuntimeException("Invalid resource scheduler vcores"
- + " allocation configuration"
- + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES
- + "=" + minVcores
- + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES
- + "=" + maxVcores + ", min and max should be greater than 0"
- + ", max should be no smaller than min.");
- }
- }
-
- @Override
- public Configuration getConf() {
- return yarnConf;
- }
-
- private CapacitySchedulerConfiguration conf;
- private Configuration yarnConf;
-
- private ResourceCalculator calculator;
- private boolean usePortForNodeName;
-
- private boolean scheduleAsynchronously;
- private List asyncSchedulerThreads;
- private ResourceCommitterService resourceCommitterService;
- private RMNodeLabelsManager labelManager;
- private AppPriorityACLsManager appPriorityACLManager;
-
- private static boolean printedVerboseLoggingForAsyncScheduling = false;
-
- /**
- * EXPERT
- */
- private long asyncScheduleInterval;
- private static final String ASYNC_SCHEDULER_INTERVAL =
- CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_PREFIX
- + ".scheduling-interval-ms";
- private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5;
- private long asyncMaxPendingBacklogs;
-
- public CapacityScheduler() {
- super(CapacityScheduler.class.getName());
- }
-
- @Override
- public QueueMetrics getRootQueueMetrics() {
- return getRootQueue().getMetrics();
- }
-
- public CSQueue getRootQueue() {
- return queueManager.getRootQueue();
- }
-
- @Override
- public CapacitySchedulerConfiguration getConfiguration() {
- return conf;
- }
-
- @Override
- public RMContainerTokenSecretManager getContainerTokenSecretManager() {
- return this.rmContext.getContainerTokenSecretManager();
- }
-
- @Override
- public ResourceCalculator getResourceCalculator() {
- return calculator;
- }
-
- @VisibleForTesting
- public void setResourceCalculator(ResourceCalculator rc) {
- this.calculator = rc;
- }
-
- @Override
- public int getNumClusterNodes() {
- return nodeTracker.nodeCount();
- }
-
- @Override
- public RMContext getRMContext() {
- return this.rmContext;
- }
-
- @Override
- public void setRMContext(RMContext rmContext) {
- this.rmContext = rmContext;
- }
-
- @VisibleForTesting
- void initScheduler(Configuration configuration) throws
- IOException {
- try {
- writeLock.lock();
- String confProviderStr = configuration.get(
- YarnConfiguration.SCHEDULER_CONFIGURATION_STORE_CLASS,
- YarnConfiguration.DEFAULT_CONFIGURATION_STORE);
- switch (confProviderStr) {
- case YarnConfiguration.FILE_CONFIGURATION_STORE:
- this.csConfProvider =
- new FileBasedCSConfigurationProvider(rmContext);
- break;
- case YarnConfiguration.MEMORY_CONFIGURATION_STORE:
- case YarnConfiguration.LEVELDB_CONFIGURATION_STORE:
- case YarnConfiguration.ZK_CONFIGURATION_STORE:
- this.csConfProvider = new MutableCSConfigurationProvider(rmContext);
- break;
- default:
- throw new IOException("Invalid configuration store class: " +
- confProviderStr);
- }
- this.csConfProvider.init(configuration);
- this.conf = this.csConfProvider.loadConfiguration(configuration);
- validateConf(this.conf);
- this.minimumAllocation = super.getMinimumAllocation();
- initMaximumResourceCapability(super.getMaximumAllocation());
- this.calculator = this.conf.getResourceCalculator();
- if (this.calculator instanceof DefaultResourceCalculator
- && ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
- throw new YarnRuntimeException("RM uses DefaultResourceCalculator which"
- + " used only memory as resource-type but invalid resource-types"
- + " specified " + ResourceUtils.getResourceTypes() + ". Use"
- + " DomainantResourceCalculator instead to make effective use of"
- + " these resource-types");
- }
- this.usePortForNodeName = this.conf.getUsePortForNodeName();
- this.applications = new ConcurrentHashMap<>();
- this.labelManager = rmContext.getNodeLabelManager();
- this.appPriorityACLManager = new AppPriorityACLsManager(conf);
- this.queueManager = new CapacitySchedulerQueueManager(yarnConf,
- this.labelManager, this.appPriorityACLManager);
- this.queueManager.setCapacitySchedulerContext(this);
-
- this.activitiesManager = new ActivitiesManager(rmContext);
- activitiesManager.init(conf);
- initializeQueues(this.conf);
- this.isLazyPreemptionEnabled = conf.getLazyPreemptionEnabled();
-
- scheduleAsynchronously = this.conf.getScheduleAynschronously();
- asyncScheduleInterval = this.conf.getLong(ASYNC_SCHEDULER_INTERVAL,
- DEFAULT_ASYNC_SCHEDULER_INTERVAL);
-
- this.assignMultipleEnabled = this.conf.getAssignMultipleEnabled();
- this.maxAssignPerHeartbeat = this.conf.getMaxAssignPerHeartbeat();
-
- // number of threads for async scheduling
- int maxAsyncSchedulingThreads = this.conf.getInt(
- CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_THREAD,
- 1);
- maxAsyncSchedulingThreads = Math.max(maxAsyncSchedulingThreads, 1);
-
- if (scheduleAsynchronously) {
- asyncSchedulerThreads = new ArrayList<>();
- for (int i = 0; i < maxAsyncSchedulingThreads; i++) {
- asyncSchedulerThreads.add(new AsyncScheduleThread(this));
- }
- resourceCommitterService = new ResourceCommitterService(this);
- asyncMaxPendingBacklogs = this.conf.getInt(
- CapacitySchedulerConfiguration.
- SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS,
- CapacitySchedulerConfiguration.
- DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS);
- }
-
- // Setup how many containers we can allocate for each round
- offswitchPerHeartbeatLimit = this.conf.getOffSwitchPerHeartbeatLimit();
-
- LOG.info("Initialized CapacityScheduler with " + "calculator="
- + getResourceCalculator().getClass() + ", " + "minimumAllocation=<"
- + getMinimumResourceCapability() + ">, " + "maximumAllocation=<"
- + getMaximumResourceCapability() + ">, " + "asynchronousScheduling="
- + scheduleAsynchronously + ", " + "asyncScheduleInterval="
- + asyncScheduleInterval + "ms");
- } finally {
- writeLock.unlock();
- }
- }
-
- private void startSchedulerThreads() {
- try {
- writeLock.lock();
- activitiesManager.start();
- if (scheduleAsynchronously) {
- Preconditions.checkNotNull(asyncSchedulerThreads,
- "asyncSchedulerThreads is null");
- for (Thread t : asyncSchedulerThreads) {
- t.start();
- }
-
- resourceCommitterService.start();
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public void serviceInit(Configuration conf) throws Exception {
- Configuration configuration = new Configuration(conf);
- super.serviceInit(conf);
- initScheduler(configuration);
- // Initialize SchedulingMonitorManager
- schedulingMonitorManager.initialize(rmContext, conf);
- }
-
- @Override
- public void serviceStart() throws Exception {
- startSchedulerThreads();
- super.serviceStart();
- }
-
- @Override
- public void serviceStop() throws Exception {
- try {
- writeLock.lock();
- this.activitiesManager.stop();
- if (scheduleAsynchronously && asyncSchedulerThreads != null) {
- for (Thread t : asyncSchedulerThreads) {
- t.interrupt();
- t.join(THREAD_JOIN_TIMEOUT_MS);
- }
- resourceCommitterService.interrupt();
- resourceCommitterService.join(THREAD_JOIN_TIMEOUT_MS);
- }
- } finally {
- writeLock.unlock();
- }
-
- if (isConfigurationMutable()) {
- ((MutableConfigurationProvider) csConfProvider).close();
- }
- super.serviceStop();
- }
-
- @Override
- public void reinitialize(Configuration newConf, RMContext rmContext)
- throws IOException {
- writeLock.lock();
- try {
- Configuration configuration = new Configuration(newConf);
- CapacitySchedulerConfiguration oldConf = this.conf;
- this.conf = csConfProvider.loadConfiguration(configuration);
- validateConf(this.conf);
- try {
- LOG.info("Re-initializing queues...");
- refreshMaximumAllocation(
- ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
- reinitializeQueues(this.conf);
- } catch (Throwable t) {
- this.conf = oldConf;
- refreshMaximumAllocation(
- ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
- throw new IOException("Failed to re-init queues : " + t.getMessage(),
- t);
- }
-
- // update lazy preemption
- this.isLazyPreemptionEnabled = this.conf.getLazyPreemptionEnabled();
-
- // Setup how many containers we can allocate for each round
- offswitchPerHeartbeatLimit = this.conf.getOffSwitchPerHeartbeatLimit();
-
- super.reinitialize(newConf, rmContext);
- } finally {
- writeLock.unlock();
- }
- }
-
- long getAsyncScheduleInterval() {
- return asyncScheduleInterval;
- }
-
- private final static Random random = new Random(System.currentTimeMillis());
-
- private static boolean shouldSkipNodeSchedule(FiCaSchedulerNode node,
- CapacityScheduler cs, boolean printVerboseLog) {
- // Skip node which missed 2 heartbeats since the node might be dead and
- // we should not continue allocate containers on that.
- long timeElapsedFromLastHeartbeat =
- Time.monotonicNow() - node.getLastHeartbeatMonotonicTime();
- if (timeElapsedFromLastHeartbeat > cs.nmHeartbeatInterval * 2) {
- if (printVerboseLog && LOG.isDebugEnabled()) {
- LOG.debug("Skip scheduling on node because it haven't heartbeated for "
- + timeElapsedFromLastHeartbeat / 1000.0f + " secs");
- }
- return true;
- }
- return false;
- }
-
- /**
- * Schedule on all nodes by starting at a random point.
- * @param cs
- */
- static void schedule(CapacityScheduler cs) throws InterruptedException{
- // First randomize the start point
- int current = 0;
- Collection nodes = cs.nodeTracker.getAllNodes();
-
- // If nodes size is 0 (when there are no node managers registered,
- // we can return from here itself.
- int nodeSize = nodes.size();
- if(nodeSize == 0) {
- return;
- }
- int start = random.nextInt(nodeSize);
-
- // To avoid too verbose DEBUG logging, only print debug log once for
- // every 10 secs.
- boolean printSkipedNodeLogging = false;
- if (Time.monotonicNow() / 1000 % 10 == 0) {
- printSkipedNodeLogging = (!printedVerboseLoggingForAsyncScheduling);
- } else {
- printedVerboseLoggingForAsyncScheduling = false;
- }
-
- // Allocate containers of node [start, end)
- for (FiCaSchedulerNode node : nodes) {
- if (current++ >= start) {
- if (shouldSkipNodeSchedule(node, cs, printSkipedNodeLogging)) {
- continue;
- }
- cs.allocateContainersToNode(node.getNodeID(), false);
- }
- }
-
- current = 0;
-
- // Allocate containers of node [0, start)
- for (FiCaSchedulerNode node : nodes) {
- if (current++ > start) {
- break;
- }
- if (shouldSkipNodeSchedule(node, cs, printSkipedNodeLogging)) {
- continue;
- }
- cs.allocateContainersToNode(node.getNodeID(), false);
- }
-
- if (printSkipedNodeLogging) {
- printedVerboseLoggingForAsyncScheduling = true;
- }
-
- Thread.sleep(cs.getAsyncScheduleInterval());
- }
+ AbstractYarnScheduler implements
+ PreemptableResourceScheduler, CapacitySchedulerContext, Configurable,
+ ResourceAllocationCommitter, MutableConfScheduler {
- static class AsyncScheduleThread extends Thread {
+ private static final Log LOG = LogFactory.getLog(CapacityScheduler.class);
- private final CapacityScheduler cs;
- private AtomicBoolean runSchedules = new AtomicBoolean(false);
+ private CapacitySchedulerQueueManager queueManager;
- public AsyncScheduleThread(CapacityScheduler cs) {
- this.cs = cs;
- setDaemon(true);
+ // timeout to join when we stop this service
+ protected final long THREAD_JOIN_TIMEOUT_MS = 1000;
+
+ private PreemptionManager preemptionManager = new PreemptionManager();
+
+ private volatile boolean isLazyPreemptionEnabled = false;
+
+ private int offswitchPerHeartbeatLimit;
+
+ private boolean assignMultipleEnabled;
+
+ private int maxAssignPerHeartbeat;
+
+ private CSConfigurationProvider csConfProvider;
+
+ @Override
+ public void setConf(Configuration conf) {
+ yarnConf = conf;
+ }
+
+ private void validateConf(Configuration conf) {
+ // validate scheduler memory allocation setting
+ int minMem = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+ int maxMem = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
+
+ if (minMem <= 0 || minMem > maxMem) {
+ throw new YarnRuntimeException("Invalid resource scheduler memory"
+ + " allocation configuration"
+ + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB
+ + "=" + minMem
+ + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB
+ + "=" + maxMem + ", min and max should be greater than 0"
+ + ", max should be no smaller than min.");
+ }
+
+ // validate scheduler vcores allocation setting
+ int minVcores = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
+ int maxVcores = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES);
+
+ if (minVcores <= 0 || minVcores > maxVcores) {
+ throw new YarnRuntimeException("Invalid resource scheduler vcores"
+ + " allocation configuration"
+ + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES
+ + "=" + minVcores
+ + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES
+ + "=" + maxVcores + ", min and max should be greater than 0"
+ + ", max should be no smaller than min.");
+ }
+ }
+
+ @Override
+ public Configuration getConf() {
+ return yarnConf;
+ }
+
+ private CapacitySchedulerConfiguration conf;
+ private Configuration yarnConf;
+
+ private ResourceCalculator calculator;
+ private boolean usePortForNodeName;
+
+ private boolean scheduleAsynchronously;
+ private List asyncSchedulerThreads;
+ private ResourceCommitterService resourceCommitterService;
+ private RMNodeLabelsManager labelManager;
+ private AppPriorityACLsManager appPriorityACLManager;
+
+ private static boolean printedVerboseLoggingForAsyncScheduling = false;
+
+ /**
+ * EXPERT
+ */
+ private long asyncScheduleInterval;
+ private static final String ASYNC_SCHEDULER_INTERVAL =
+ CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_PREFIX
+ + ".scheduling-interval-ms";
+ private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5;
+ private long asyncMaxPendingBacklogs;
+
+ public CapacityScheduler() {
+ super(CapacityScheduler.class.getName());
+ }
+
+ @Override
+ public QueueMetrics getRootQueueMetrics() {
+ return getRootQueue().getMetrics();
+ }
+
+ public CSQueue getRootQueue() {
+ return queueManager.getRootQueue();
+ }
+
+ @Override
+ public CapacitySchedulerConfiguration getConfiguration() {
+ return conf;
+ }
+
+ @Override
+ public RMContainerTokenSecretManager getContainerTokenSecretManager() {
+ return this.rmContext.getContainerTokenSecretManager();
+ }
+
+ @Override
+ public ResourceCalculator getResourceCalculator() {
+ return calculator;
+ }
+
+ @VisibleForTesting
+ public void setResourceCalculator(ResourceCalculator rc) {
+ this.calculator = rc;
+ }
+
+ @Override
+ public int getNumClusterNodes() {
+ return nodeTracker.nodeCount();
+ }
+
+ @Override
+ public RMContext getRMContext() {
+ return this.rmContext;
+ }
+
+ @Override
+ public void setRMContext(RMContext rmContext) {
+ this.rmContext = rmContext;
+ }
+
+ @VisibleForTesting
+ synchronized void initScheduler(Configuration configuration) throws
+ IOException {
+ String confProviderStr = configuration.get(
+ YarnConfiguration.SCHEDULER_CONFIGURATION_STORE_CLASS,
+ YarnConfiguration.DEFAULT_CONFIGURATION_STORE);
+ switch (confProviderStr) {
+ case YarnConfiguration.FILE_CONFIGURATION_STORE:
+ this.csConfProvider =
+ new FileBasedCSConfigurationProvider(rmContext);
+ break;
+ case YarnConfiguration.MEMORY_CONFIGURATION_STORE:
+ case YarnConfiguration.LEVELDB_CONFIGURATION_STORE:
+ case YarnConfiguration.ZK_CONFIGURATION_STORE:
+ this.csConfProvider = new MutableCSConfigurationProvider(rmContext);
+ break;
+ default:
+ throw new IOException("Invalid configuration store class: " +
+ confProviderStr);
+ }
+ this.csConfProvider.init(configuration);
+ this.conf = this.csConfProvider.loadConfiguration(configuration);
+ validateConf(this.conf);
+ this.minimumAllocation = super.getMinimumAllocation();
+ initMaximumResourceCapability(super.getMaximumAllocation());
+ this.calculator = this.conf.getResourceCalculator();
+ if (this.calculator instanceof DefaultResourceCalculator
+ && ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
+ throw new YarnRuntimeException("RM uses DefaultResourceCalculator which"
+ + " used only memory as resource-type but invalid resource-types"
+ + " specified " + ResourceUtils.getResourceTypes() + ". Use"
+ + " DomainantResourceCalculator instead to make effective use of"
+ + " these resource-types");
+ }
+ this.usePortForNodeName = this.conf.getUsePortForNodeName();
+ this.applications = new ConcurrentHashMap<>();
+ this.labelManager = rmContext.getNodeLabelManager();
+ this.appPriorityACLManager = new AppPriorityACLsManager(conf);
+ this.queueManager = new CapacitySchedulerQueueManager(yarnConf,
+ this.labelManager, this.appPriorityACLManager);
+ this.queueManager.setCapacitySchedulerContext(this);
+
+ this.activitiesManager = new ActivitiesManager(rmContext);
+ activitiesManager.init(conf);
+ initializeQueues(this.conf);
+ this.isLazyPreemptionEnabled = conf.getLazyPreemptionEnabled();
+
+ scheduleAsynchronously = this.conf.getScheduleAynschronously();
+ asyncScheduleInterval = this.conf.getLong(ASYNC_SCHEDULER_INTERVAL,
+ DEFAULT_ASYNC_SCHEDULER_INTERVAL);
+
+ this.assignMultipleEnabled = this.conf.getAssignMultipleEnabled();
+ this.maxAssignPerHeartbeat = this.conf.getMaxAssignPerHeartbeat();
+
+ // number of threads for async scheduling
+ int maxAsyncSchedulingThreads = this.conf.getInt(
+ CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_THREAD,
+ 1);
+ maxAsyncSchedulingThreads = Math.max(maxAsyncSchedulingThreads, 1);
+
+ if (scheduleAsynchronously) {
+ asyncSchedulerThreads = new ArrayList<>();
+ for (int i = 0; i < maxAsyncSchedulingThreads; i++) {
+ asyncSchedulerThreads.add(new AsyncScheduleThread(this));
+ }
+ resourceCommitterService = new ResourceCommitterService(this);
+ asyncMaxPendingBacklogs = this.conf.getInt(
+ CapacitySchedulerConfiguration.
+ SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS,
+ CapacitySchedulerConfiguration.
+ DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS);
+ }
+
+ // Setup how many containers we can allocate for each round
+ offswitchPerHeartbeatLimit = this.conf.getOffSwitchPerHeartbeatLimit();
+
+ LOG.info("Initialized CapacityScheduler with " + "calculator="
+ + getResourceCalculator().getClass() + ", " + "minimumAllocation=<"
+ + getMinimumResourceCapability() + ">, " + "maximumAllocation=<"
+ + getMaximumResourceCapability() + ">, " + "asynchronousScheduling="
+ + scheduleAsynchronously + ", " + "asyncScheduleInterval="
+ + asyncScheduleInterval + "ms");
+ }
+
+ private synchronized void startSchedulerThreads() {
+ activitiesManager.start();
+ if (scheduleAsynchronously) {
+ Preconditions.checkNotNull(asyncSchedulerThreads,
+ "asyncSchedulerThreads is null");
+ for (Thread t : asyncSchedulerThreads) {
+ t.start();
+ }
+ resourceCommitterService.start();
+ }
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) throws Exception {
+ Configuration configuration = new Configuration(conf);
+ super.serviceInit(conf);
+ initScheduler(configuration);
+ // Initialize SchedulingMonitorManager
+ schedulingMonitorManager.initialize(rmContext, conf);
+ }
+
+ @Override
+ public void serviceStart() throws Exception {
+ startSchedulerThreads();
+ super.serviceStart();
+ }
+
+ @Override
+ public void serviceStop() throws Exception {
+ synchronized (this) {
+ this.activitiesManager.stop();
+ if (scheduleAsynchronously && asyncSchedulerThreads != null) {
+ for (Thread t : asyncSchedulerThreads) {
+ t.interrupt();
+ t.join(THREAD_JOIN_TIMEOUT_MS);
+ }
+ resourceCommitterService.interrupt();
+ resourceCommitterService.join(THREAD_JOIN_TIMEOUT_MS);
+ }
+ }
+
+ if (isConfigurationMutable()) {
+ ((MutableConfigurationProvider) csConfProvider).close();
+ }
+ super.serviceStop();
+ }
+
+ @Override
+ public synchronized void reinitialize(Configuration newConf, RMContext rmContext)
+ throws IOException {
+ Configuration configuration = new Configuration(newConf);
+ CapacitySchedulerConfiguration oldConf = this.conf;
+ this.conf = csConfProvider.loadConfiguration(configuration);
+ validateConf(this.conf);
+ try {
+ LOG.info("Re-initializing queues...");
+ refreshMaximumAllocation(
+ ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
+ reinitializeQueues(this.conf);
+ } catch (Throwable t) {
+ this.conf = oldConf;
+ refreshMaximumAllocation(
+ ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
+ throw new IOException("Failed to re-init queues : " + t.getMessage(),
+ t);
+ }
+
+ // update lazy preemption
+ this.isLazyPreemptionEnabled = this.conf.getLazyPreemptionEnabled();
+
+ // Setup how many containers we can allocate for each round
+ offswitchPerHeartbeatLimit = this.conf.getOffSwitchPerHeartbeatLimit();
+
+ super.reinitialize(newConf, rmContext);
+ }
+
+ long getAsyncScheduleInterval() {
+ return asyncScheduleInterval;
+ }
+
+ private final static Random random = new Random(System.currentTimeMillis());
+
+ private static boolean shouldSkipNodeSchedule(FiCaSchedulerNode node,
+ CapacityScheduler cs, boolean printVerboseLog) {
+ // Skip node which missed 2 heartbeats since the node might be dead and
+ // we should not continue allocate containers on that.
+ long timeElapsedFromLastHeartbeat =
+ Time.monotonicNow() - node.getLastHeartbeatMonotonicTime();
+ if (timeElapsedFromLastHeartbeat > cs.nmHeartbeatInterval * 2) {
+ if (printVerboseLog && LOG.isDebugEnabled()) {
+ LOG.debug("Skip scheduling on node because it haven't heartbeated for "
+ + timeElapsedFromLastHeartbeat / 1000.0f + " secs");
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Schedule on all nodes by starting at a random point.
+ * @param cs
+ */
+ static void schedule(CapacityScheduler cs) throws InterruptedException {
+ // First randomize the start point
+ int current = 0;
+ Collection nodes = cs.nodeTracker.getAllNodes();
+
+ // If nodes size is 0 (when there are no node managers registered,
+ // we can return from here itself.
+ int nodeSize = nodes.size();
+ if (nodeSize == 0) {
+ return;
+ }
+ int start = random.nextInt(nodeSize);
+
+ // To avoid too verbose DEBUG logging, only print debug log once for
+ // every 10 secs.
+ boolean printSkipedNodeLogging = false;
+ if (Time.monotonicNow() / 1000 % 10 == 0) {
+ printSkipedNodeLogging = (!printedVerboseLoggingForAsyncScheduling);
+ } else {
+ printedVerboseLoggingForAsyncScheduling = false;
+ }
+
+ // Allocate containers of node [start, end)
+ for (FiCaSchedulerNode node : nodes) {
+ if (current++ >= start) {
+ if (shouldSkipNodeSchedule(node, cs, printSkipedNodeLogging)) {
+ continue;
+ }
+ cs.allocateContainersToNode(node.getNodeID(), false);
+ }
+ }
+
+ current = 0;
+
+ // Allocate containers of node [0, start)
+ for (FiCaSchedulerNode node : nodes) {
+ if (current++ > start) {
+ break;
+ }
+ if (shouldSkipNodeSchedule(node, cs, printSkipedNodeLogging)) {
+ continue;
+ }
+ cs.allocateContainersToNode(node.getNodeID(), false);
+ }
+
+ if (printSkipedNodeLogging) {
+ printedVerboseLoggingForAsyncScheduling = true;
+ }
+
+ Thread.sleep(cs.getAsyncScheduleInterval());
+ }
+
+ static class AsyncScheduleThread extends Thread {
+
+ private final CapacityScheduler cs;
+ private AtomicBoolean runSchedules = new AtomicBoolean(false);
+
+ public AsyncScheduleThread(CapacityScheduler cs) {
+ this.cs = cs;
+ setDaemon(true);
+ }
+
+ @Override
+ public void run() {
+ int debuggingLogCounter = 0;
+ while (!Thread.currentThread().isInterrupted()) {
+ try {
+ if (!runSchedules.get()) {
+ Thread.sleep(100);
+ } else {
+ // Don't run schedule if we have some pending backlogs already
+ if (cs.getAsyncSchedulingPendingBacklogs()
+ > cs.asyncMaxPendingBacklogs) {
+ Thread.sleep(1);
+ } else {
+ schedule(cs);
+ if (LOG.isDebugEnabled()) {
+ // Adding a debug log here to ensure that the thread is alive
+ // and running fine.
+ if (debuggingLogCounter++ > 10000) {
+ debuggingLogCounter = 0;
+ LOG.debug("AsyncScheduleThread[" + getName() + "] is running!");
+ }
+ }
+ }
+ }
+ } catch (InterruptedException ie) {
+ // keep interrupt signal
+ Thread.currentThread().interrupt();
+ }
+ }
+ LOG.info("AsyncScheduleThread[" + getName() + "] exited!");
+ }
+
+ public void beginSchedule() {
+ runSchedules.set(true);
+ }
+
+ public void suspendSchedule() {
+ runSchedules.set(false);
+ }
+
+ }
+
+ static class ResourceCommitterService extends Thread {
+ private final CapacityScheduler cs;
+ private BlockingQueue>
+ backlogs = new LinkedBlockingQueue<>();
+
+ public ResourceCommitterService(CapacityScheduler cs) {
+ this.cs = cs;
+ setDaemon(true);
+ }
+
+ @Override
+ public void run() {
+ while (!Thread.currentThread().isInterrupted()) {
+ try {
+ ResourceCommitRequest request =
+ backlogs.take();
+
+ synchronized (backlogs) {
+ cs.tryCommit(cs.getClusterResource(), request, true);
+ }
+
+ } catch (InterruptedException e) {
+ LOG.error(e);
+ Thread.currentThread().interrupt();
+ }
+ }
+ LOG.info("ResourceCommitterService exited!");
+ }
+
+ public void addNewCommitRequest(
+ ResourceCommitRequest proposal) {
+ backlogs.add(proposal);
+ }
+
+ public int getPendingBacklogs() {
+ return backlogs.size();
+ }
+ }
+
+ @VisibleForTesting
+ public PlacementRule getUserGroupMappingPlacementRule() throws IOException {
+ UserGroupMappingPlacementRule ugRule = new UserGroupMappingPlacementRule();
+ ugRule.initialize(this);
+ return ugRule;
+ }
+
+ public PlacementRule getAppNameMappingPlacementRule() throws IOException {
+ AppNameMappingPlacementRule anRule = new AppNameMappingPlacementRule();
+ anRule.initialize(this);
+ return anRule;
+ }
+
+ @VisibleForTesting
+ public void updatePlacementRules() throws IOException {
+ // Initialize placement rules
+ Collection placementRuleStrs = conf.getStringCollection(
+ YarnConfiguration.QUEUE_PLACEMENT_RULES);
+ List placementRules = new ArrayList<>();
+ Set distingushRuleSet = new HashSet<>();
+ // fail the case if we get duplicate placementRule add in
+ for (String pls : placementRuleStrs) {
+ if (!distingushRuleSet.add(pls)) {
+ throw new IOException("Invalid PlacementRule inputs which "
+ + "contains duplicate rule strings");
+ }
+ }
+
+ // add UserGroupMappingPlacementRule if absent
+ distingushRuleSet.add(YarnConfiguration.USER_GROUP_PLACEMENT_RULE);
+
+ placementRuleStrs = new ArrayList<>(distingushRuleSet);
+
+ for (String placementRuleStr : placementRuleStrs) {
+ switch (placementRuleStr) {
+ case YarnConfiguration.USER_GROUP_PLACEMENT_RULE:
+ PlacementRule ugRule = getUserGroupMappingPlacementRule();
+ if (null != ugRule) {
+ placementRules.add(ugRule);
+ }
+ break;
+ case YarnConfiguration.APP_NAME_PLACEMENT_RULE:
+ PlacementRule anRule = getAppNameMappingPlacementRule();
+ if (null != anRule) {
+ placementRules.add(anRule);
+ }
+ break;
+ default:
+ boolean isMappingNotEmpty;
+ try {
+ PlacementRule rule = PlacementFactory.getPlacementRule(
+ placementRuleStr, conf);
+ if (null != rule) {
+ try {
+ isMappingNotEmpty = rule.initialize(this);
+ } catch (IOException ie) {
+ throw new IOException(ie);
+ }
+ if (isMappingNotEmpty) {
+ placementRules.add(rule);
+ }
+ }
+ } catch (ClassNotFoundException cnfe) {
+ throw new IOException(cnfe);
+ }
+ }
+ }
+
+ rmContext.getQueuePlacementManager().updateRules(placementRules);
+ }
+
+ @Lock(CapacityScheduler.class)
+ private void initializeQueues(CapacitySchedulerConfiguration conf)
+ throws IOException {
+
+ this.queueManager.initializeQueues(conf);
+
+ updatePlacementRules();
+
+ // Notify Preemption Manager
+ preemptionManager.refreshQueues(null, this.getRootQueue());
+ }
+
+ @Lock(CapacityScheduler.class)
+ private void reinitializeQueues(CapacitySchedulerConfiguration newConf)
+ throws IOException {
+ this.queueManager.reinitializeQueues(newConf);
+ updatePlacementRules();
+
+ // Notify Preemption Manager
+ preemptionManager.refreshQueues(null, this.getRootQueue());
+ }
+
+ @Override
+ public CSQueue getQueue(String queueName) {
+ if (queueName == null) {
+ return null;
+ }
+ return this.queueManager.getQueue(queueName);
+ }
+
+ private synchronized void addApplicationOnRecovery(ApplicationId applicationId,
+ String queueName, String user,
+ Priority priority, ApplicationPlacementContext placementContext) {
+ //check if the queue needs to be auto-created during recovery
+ CSQueue queue = getOrCreateQueueFromPlacementContext(applicationId, user,
+ queueName, placementContext, true);
+
+ if (queue == null) {
+ //During a restart, this indicates a queue was removed, which is
+ //not presently supported
+ if (!getConfiguration().shouldAppFailFast(getConfig())) {
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.KILL,
+ "Application killed on recovery as it"
+ + " was submitted to queue " + queueName
+ + " which no longer exists after restart."));
+ return;
+ } else {
+ String queueErrorMsg = "Queue named " + queueName + " missing "
+ + "during application recovery."
+ + " Queue removal during recovery is not presently "
+ + "supported by the capacity scheduler, please "
+ + "restart with all queues configured"
+ + " which were present before shutdown/restart.";
+ LOG.fatal(queueErrorMsg);
+ throw new QueueInvalidException(queueErrorMsg);
+ }
+ }
+ if (!(queue instanceof LeafQueue)) {
+ // During RM restart, this means leaf queue was converted to a parent
+ // queue, which is not supported for running apps.
+ if (!getConfiguration().shouldAppFailFast(getConfig())) {
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.KILL,
+ "Application killed on recovery as it was "
+ + "submitted to queue " + queueName
+ + " which is no longer a leaf queue after restart."));
+ return;
+ } else {
+ String queueErrorMsg = "Queue named " + queueName
+ + " is no longer a leaf queue during application recovery."
+ + " Changing a leaf queue to a parent queue during recovery is"
+ + " not presently supported by the capacity scheduler. Please"
+ + " restart with leaf queues before shutdown/restart continuing"
+ + " as leaf queues.";
+ LOG.fatal(queueErrorMsg);
+ throw new QueueInvalidException(queueErrorMsg);
+ }
+ }
+ // When recovering apps in this queue but queue is in STOPPED state,
+ // that means its previous state was DRAINING. So we auto transit
+ // the state to DRAINING for recovery.
+ if (queue.getState() == QueueState.STOPPED) {
+ ((LeafQueue) queue).recoverDrainingState();
+ }
+ // Submit to the queue
+ try {
+ queue.submitApplication(applicationId, user, queueName);
+ } catch (AccessControlException ace) {
+ // Ignore the exception for recovered app as the app was previously
+ // accepted.
+ }
+ queue.getMetrics().submitApp(user);
+ SchedulerApplication application =
+ new SchedulerApplication(queue, user, priority);
+ applications.put(applicationId, application);
+ LOG.info("Accepted application " + applicationId + " from user: " + user
+ + ", in queue: " + queueName);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ applicationId + " is recovering. Skip notifying APP_ACCEPTED");
+ }
+ }
+
+ private CSQueue getOrCreateQueueFromPlacementContext(ApplicationId
+ applicationId, String user, String queueName,
+ ApplicationPlacementContext placementContext,
+ boolean isRecovery) {
+
+ CSQueue queue = getQueue(queueName);
+
+ if (queue == null) {
+ if (placementContext != null && placementContext.hasParentQueue()) {
+ try {
+ return autoCreateLeafQueue(placementContext);
+ } catch (YarnException | IOException e) {
+ if (isRecovery) {
+ if (!getConfiguration().shouldAppFailFast(getConfig())) {
+ LOG.error("Could not auto-create leaf queue " + queueName +
+ " due to : ", e);
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.KILL,
+ "Application killed on recovery"
+ + " as it was submitted to queue " + queueName
+ + " which could not be auto-created"));
+ } else {
+ String queueErrorMsg =
+ "Queue named " + queueName + " could not be "
+ + "auto-created during application recovery.";
+ LOG.fatal(queueErrorMsg, e);
+ throw new QueueInvalidException(queueErrorMsg);
+ }
+ } else {
+ LOG.error("Could not auto-create leaf queue due to : ", e);
+ final String message =
+ "Application " + applicationId + " submission by user : "
+ + user
+ + " to queue : " + queueName + " failed : " + e
+ .getMessage();
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ }
+ }
+ }
+ }
+ return queue;
+ }
+
+ private synchronized void addApplication(ApplicationId applicationId, String queueName,
+ String user, Priority priority,
+ ApplicationPlacementContext placementContext) {
+ if (isSystemAppsLimitReached()) {
+ String message = "Maximum system application limit reached,"
+ + "cannot accept submission of application: " + applicationId;
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return;
+ }
+
+ //Could be a potential auto-created leaf queue
+ CSQueue queue = getOrCreateQueueFromPlacementContext(applicationId, user,
+ queueName, placementContext, false);
+
+ if (queue == null) {
+ final String message =
+ "Application " + applicationId + " submitted by user " + user
+ + " to unknown queue: " + queueName;
+
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return;
+ }
+
+ if (!(queue instanceof LeafQueue)) {
+ String message =
+ "Application " + applicationId + " submitted by user : " + user
+ + " to non-leaf queue : " + queueName;
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return;
+ } else if (queue instanceof AutoCreatedLeafQueue && queue
+ .getParent() instanceof ManagedParentQueue) {
+
+ //If queue already exists and auto-queue creation was not required,
+ //placement context should not be null
+ if (placementContext == null) {
+ String message =
+ "Application " + applicationId + " submission by user : " + user
+ + " to specified queue : " + queueName + " is prohibited. "
+ + "Verify automatic queue mapping for user exists in " +
+ QUEUE_MAPPING;
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return;
+ // For a queue which exists already and
+ // not auto-created above, then its parent queue should match
+ // the parent queue specified in queue mapping
+ } else if (!queue.getParent().getQueueName().equals(
+ placementContext.getParentQueue())) {
+ String message =
+ "Auto created Leaf queue " + placementContext.getQueue() + " "
+ + "already exists under queue : " + queue
+ .getParent().getQueuePath()
+ + ".But Queue mapping configuration " +
+ CapacitySchedulerConfiguration.QUEUE_MAPPING + " has been "
+ + "updated to a different parent queue : "
+ + placementContext.getParentQueue()
+ + " for the specified user : " + user;
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return;
+ }
+ }
+
+ // Submit to the queue
+ try {
+ queue.submitApplication(applicationId, user, queueName);
+ } catch (AccessControlException ace) {
+ LOG.info("Failed to submit application " + applicationId + " to queue "
+ + queueName + " from user " + user, ace);
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ ace.toString()));
+ return;
+ }
+ // update the metrics
+ queue.getMetrics().submitApp(user);
+ SchedulerApplication application =
+ new SchedulerApplication(queue, user, priority);
+ applications.put(applicationId, application);
+ LOG.info("Accepted application " + applicationId + " from user: " + user
+ + ", in queue: " + queueName);
+ rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
+ }
+
+ private synchronized void addApplicationAttempt(
+ ApplicationAttemptId applicationAttemptId,
+ boolean transferStateFromPreviousAttempt,
+ boolean isAttemptRecovering) {
+ SchedulerApplication application = applications.get(
+ applicationAttemptId.getApplicationId());
+ if (application == null) {
+ LOG.warn("Application " + applicationAttemptId.getApplicationId()
+ + " cannot be found in scheduler.");
+ return;
+ }
+ CSQueue queue = (CSQueue) application.getQueue();
+
+ FiCaSchedulerApp attempt = new FiCaSchedulerApp(applicationAttemptId,
+ application.getUser(), queue, queue.getAbstractUsersManager(),
+ rmContext, application.getPriority(), isAttemptRecovering,
+ activitiesManager);
+ if (transferStateFromPreviousAttempt) {
+ attempt.transferStateFromPreviousAttempt(
+ application.getCurrentAppAttempt());
+ }
+ application.setCurrentAppAttempt(attempt);
+
+ // Update attempt priority to the latest to avoid race condition i.e
+ // SchedulerApplicationAttempt is created with old priority but it is not
+ // set to SchedulerApplication#setCurrentAppAttempt.
+ // Scenario would occur is
+ // 1. SchdulerApplicationAttempt is created with old priority.
+ // 2. updateApplicationPriority() updates SchedulerApplication. Since
+ // currentAttempt is null, it just return.
+ // 3. ScheduelerApplcationAttempt is set in
+ // SchedulerApplication#setCurrentAppAttempt.
+ attempt.setPriority(application.getPriority());
+
+ queue.submitApplicationAttempt(attempt, application.getUser());
+ LOG.info("Added Application Attempt " + applicationAttemptId
+ + " to scheduler from user " + application.getUser() + " in queue "
+ + queue.getQueueName());
+ if (isAttemptRecovering) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(applicationAttemptId
+ + " is recovering. Skipping notifying ATTEMPT_ADDED");
+ }
+ } else {
+ rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppAttemptEvent(applicationAttemptId,
+ RMAppAttemptEventType.ATTEMPT_ADDED));
+ }
+ }
+
+ private synchronized void doneApplication(ApplicationId applicationId,
+ RMAppState finalState) {
+ SchedulerApplication application = applications.get(
+ applicationId);
+ if (application == null) {
+ // The AppRemovedSchedulerEvent maybe sent on recovery for completed
+ // apps, ignore it.
+ LOG.warn("Couldn't find application " + applicationId);
+ return;
+ }
+ CSQueue queue = (CSQueue) application.getQueue();
+ if (!(queue instanceof LeafQueue)) {
+ LOG.error("Cannot finish application " + "from non-leaf queue: " + queue
+ .getQueueName());
+ } else {
+ queue.finishApplication(applicationId, application.getUser());
+ }
+ application.stop(finalState);
+ applications.remove(applicationId);
+ }
+
+ private synchronized void doneApplicationAttempt(
+ ApplicationAttemptId applicationAttemptId,
+ RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
+ LOG.info("Application Attempt " + applicationAttemptId + " is done."
+ + " finalState=" + rmAppAttemptFinalState);
+
+ FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
+ SchedulerApplication application = applications.get(
+ applicationAttemptId.getApplicationId());
+
+ if (application == null || attempt == null) {
+ LOG.info(
+ "Unknown application " + applicationAttemptId + " has completed!");
+ return;
+ }
+
+ // Release all the allocated, acquired, running containers
+ for (RMContainer rmContainer : attempt.getLiveContainers()) {
+ if (keepContainers && rmContainer.getState().equals(
+ RMContainerState.RUNNING)) {
+ // do not kill the running container in the case of work-preserving AM
+ // restart.
+ LOG.info("Skip killing " + rmContainer.getContainerId());
+ continue;
+ }
+ super.completedContainer(rmContainer, SchedulerUtils
+ .createAbnormalContainerStatus(rmContainer.getContainerId(),
+ SchedulerUtils.COMPLETED_APPLICATION),
+ RMContainerEventType.KILL);
+ }
+
+ // Release all reserved containers
+ for (RMContainer rmContainer : attempt.getReservedContainers()) {
+ super.completedContainer(rmContainer, SchedulerUtils
+ .createAbnormalContainerStatus(rmContainer.getContainerId(),
+ "Application Complete"), RMContainerEventType.KILL);
+ }
+
+ // Clean up pending requests, metrics etc.
+ attempt.stop(rmAppAttemptFinalState);
+
+ // Inform the queue
+ String queueName = attempt.getQueue().getQueueName();
+ CSQueue queue = this.getQueue(queueName);
+ if (!(queue instanceof LeafQueue)) {
+ LOG.error(
+ "Cannot finish application " + "from non-leaf queue: " + queueName);
+ } else {
+ queue.finishApplicationAttempt(attempt, queue.getQueueName());
+ }
+ }
+
+ /**
+ * Normalize a list of SchedulingRequest.
+ *
+ * @param asks scheduling request
+ */
+ private void normalizeSchedulingRequests(List asks) {
+ if (asks == null) {
+ return;
+ }
+ for (SchedulingRequest ask : asks) {
+ ResourceSizing sizing = ask.getResourceSizing();
+ if (sizing != null && sizing.getResources() != null) {
+ sizing.setResources(getNormalizedResource(sizing.getResources()));
+ }
+ }
+ }
+
+ @Override
+ @Lock(Lock.NoLock.class)
+ public Allocation allocate(ApplicationAttemptId applicationAttemptId,
+ List ask, List schedulingRequests,
+ List release, List blacklistAdditions,
+ List blacklistRemovals, ContainerUpdates updateRequests) {
+ FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
+ if (application == null) {
+ LOG.error("Calling allocate on removed or non existent application " +
+ applicationAttemptId.getApplicationId());
+ return EMPTY_ALLOCATION;
+ }
+
+ // The allocate may be the leftover from previous attempt, and it will
+ // impact current attempt, such as confuse the request and allocation for
+ // current attempt's AM container.
+ // Note outside precondition check for the attempt id may be
+ // outdated here, so double check it here is necessary.
+ if (!application.getApplicationAttemptId().equals(applicationAttemptId)) {
+ LOG.error("Calling allocate on previous or removed " +
+ "or non existent application attempt " + applicationAttemptId);
+ return EMPTY_ALLOCATION;
+ }
+
+ // Handle all container updates
+ handleContainerUpdates(application, updateRequests);
+
+ // Release containers
+ releaseContainers(release, application);
+
+ LeafQueue updateDemandForQueue = null;
+
+ // Sanity check for new allocation requests
+ normalizeResourceRequests(ask);
+
+ // Normalize scheduling requests
+ normalizeSchedulingRequests(schedulingRequests);
+
+ Allocation allocation;
+
+ // make sure we aren't stopping/removing the application
+ // when the allocate comes in
+ synchronized (application) {
+ if (application.isStopped()) {
+ return EMPTY_ALLOCATION;
+ }
+
+ // Process resource requests
+ if (!ask.isEmpty() || (schedulingRequests != null && !schedulingRequests
+ .isEmpty())) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "allocate: pre-update " + applicationAttemptId + " ask size ="
+ + ask.size());
+ application.showRequests();
+ }
+
+ // Update application requests
+ if (application.updateResourceRequests(ask) || application
+ .updateSchedulingRequests(schedulingRequests)) {
+ updateDemandForQueue = (LeafQueue) application.getQueue();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("allocate: post-update");
+ application.showRequests();
+ }
+ }
+
+ application.updateBlacklist(blacklistAdditions, blacklistRemovals);
+
+ allocation = application.getAllocation(getResourceCalculator(),
+ getClusterResource(), getMinimumResourceCapability());
+ }
+
+ if (updateDemandForQueue != null && !application
+ .isWaitingForAMContainer()) {
+ updateDemandForQueue.getOrderingPolicy().demandUpdated(application);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.info("Allocation for application " + applicationAttemptId + " : "
+ + allocation + " with cluster resource : " + getClusterResource());
+ }
+ return allocation;
+ }
+
+ @Override
+ @Lock(Lock.NoLock.class)
+ public QueueInfo getQueueInfo(String queueName,
+ boolean includeChildQueues, boolean recursive)
+ throws IOException {
+ CSQueue queue = null;
+ queue = this.getQueue(queueName);
+ if (queue == null) {
+ throw new IOException("Unknown queue: " + queueName);
+ }
+ return queue.getQueueInfo(includeChildQueues, recursive);
+ }
+
+ @Override
+ @Lock(Lock.NoLock.class)
+ public List getQueueUserAclInfo() {
+ UserGroupInformation user = null;
+ try {
+ user = UserGroupInformation.getCurrentUser();
+ } catch (IOException ioe) {
+ // should never happen
+ return new ArrayList();
+ }
+
+ return getRootQueue().getQueueUserAclInfo(user);
+ }
+
+ @Override
+ protected synchronized void nodeUpdate(RMNode rmNode) {
+ long begin = System.nanoTime();
+ setLastNodeUpdateTime(Time.now());
+ super.nodeUpdate(rmNode);
+
+ // Try to do scheduling
+ if (!scheduleAsynchronously) {
+ ActivitiesLogger.NODE.startNodeUpdateRecording(activitiesManager,
+ rmNode.getNodeID());
+
+ // reset allocation and reservation stats before we start doing any
+ // work
+ updateSchedulerHealth(lastNodeUpdateTime, rmNode.getNodeID(),
+ CSAssignment.NULL_ASSIGNMENT);
+
+ allocateContainersToNode(rmNode.getNodeID(), true);
+ ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager,
+ rmNode.getNodeID());
+ }
+
+ long latency = System.nanoTime() - begin;
+ CapacitySchedulerMetrics.getMetrics().addNodeUpdate(latency);
+ }
+
+ /**
+ * Process resource update on a node.
+ */
+ private synchronized void updateNodeAndQueueResource(RMNode nm,
+ ResourceOption resourceOption) {
+ updateNodeResource(nm, resourceOption);
+ Resource clusterResource = getClusterResource();
+ getRootQueue().updateClusterResource(clusterResource,
+ new ResourceLimits(clusterResource));
+ }
+
+ /**
+ * Process node labels update on a node.
+ */
+ private void updateLabelsOnNode(NodeId nodeId,
+ Set newLabels) {
+ FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
+ if (null == node) {
+ return;
+ }
+
+ // Get new partition, we have only one partition per node
+ String newPartition;
+ if (newLabels.isEmpty()) {
+ newPartition = RMNodeLabelsManager.NO_LABEL;
+ } else {
+ newPartition = newLabels.iterator().next();
+ }
+
+ // old partition as well
+ String oldPartition = node.getPartition();
+
+ // Update resources of these containers
+ for (RMContainer rmContainer : node.getCopiedListOfRunningContainers()) {
+ FiCaSchedulerApp application = getApplicationAttempt(
+ rmContainer.getApplicationAttemptId());
+ if (null != application) {
+ application.nodePartitionUpdated(rmContainer, oldPartition,
+ newPartition);
+ } else {
+ LOG.warn("There's something wrong, some RMContainers running on"
+ + " a node, but we cannot find SchedulerApplicationAttempt "
+ + "for it. Node=" + node.getNodeID() + " applicationAttemptId="
+ + rmContainer.getApplicationAttemptId());
+ continue;
+ }
+ }
+
+ // Unreserve container on this node
+ RMContainer reservedContainer = node.getReservedContainer();
+ if (null != reservedContainer) {
+ killReservedContainer(reservedContainer);
+ }
+
+ // Update node labels after we've done this
+ node.updateLabels(newLabels);
+ }
+
+ private void updateSchedulerHealth(long now, NodeId nodeId,
+ CSAssignment assignment) {
+ List allocations =
+ assignment.getAssignmentInformation().getAllocationDetails();
+ List reservations =
+ assignment.getAssignmentInformation().getReservationDetails();
+ if (!allocations.isEmpty()) {
+ ContainerId allocatedContainerId =
+ allocations.get(allocations.size() - 1).containerId;
+ String allocatedQueue = allocations.get(allocations.size() - 1).queue;
+ schedulerHealth.updateAllocation(now, nodeId, allocatedContainerId,
+ allocatedQueue);
+ }
+ if (!reservations.isEmpty()) {
+ ContainerId reservedContainerId =
+ reservations.get(reservations.size() - 1).containerId;
+ String reservedQueue = reservations.get(reservations.size() - 1).queue;
+ schedulerHealth.updateReservation(now, nodeId, reservedContainerId,
+ reservedQueue);
+ }
+ schedulerHealth.updateSchedulerReservationCounts(assignment
+ .getAssignmentInformation().getNumReservations());
+ schedulerHealth.updateSchedulerAllocationCounts(assignment
+ .getAssignmentInformation().getNumAllocations());
+ schedulerHealth.updateSchedulerRunDetails(now, assignment
+ .getAssignmentInformation().getAllocated(), assignment
+ .getAssignmentInformation().getReserved());
+ }
+
+ private boolean canAllocateMore(CSAssignment assignment, int offswitchCount,
+ int assignedContainers) {
+ // Current assignment shouldn't be empty
+ if (assignment == null
+ || Resources.equals(assignment.getResource(), Resources.none())) {
+ return false;
+ }
+
+ // offswitch assignment should be under threshold
+ if (offswitchCount >= offswitchPerHeartbeatLimit) {
+ return false;
+ }
+
+ // And it should not be a reserved container
+ if (assignment.getAssignmentInformation().getNumReservations() > 0) {
+ return false;
+ }
+
+ // assignMultipleEnabled should be ON,
+ // and assignedContainers should be under threshold
+ return assignMultipleEnabled
+ && (maxAssignPerHeartbeat == -1
+ || assignedContainers < maxAssignPerHeartbeat);
+ }
+
+ /**
+ * We need to make sure when doing allocation, Node should be existed
+ * And we will construct a {@link CandidateNodeSet} before proceeding
+ */
+ private void allocateContainersToNode(NodeId nodeId,
+ boolean withNodeHeartbeat) {
+ FiCaSchedulerNode node = getNode(nodeId);
+ if (null != node) {
+ int offswitchCount = 0;
+ int assignedContainers = 0;
+
+ CandidateNodeSet candidates =
+ new SimpleCandidateNodeSet<>(node);
+ CSAssignment assignment = allocateContainersToNode(candidates,
+ withNodeHeartbeat);
+ // Only check if we can allocate more container on the same node when
+ // scheduling is triggered by node heartbeat
+ if (null != assignment && withNodeHeartbeat) {
+ if (assignment.getType() == NodeType.OFF_SWITCH) {
+ offswitchCount++;
+ }
+
+ if (Resources.greaterThan(calculator, getClusterResource(),
+ assignment.getResource(), Resources.none())) {
+ assignedContainers++;
+ }
+
+ while (canAllocateMore(assignment, offswitchCount,
+ assignedContainers)) {
+ // Try to see if it is possible to allocate multiple container for
+ // the same node heartbeat
+ assignment = allocateContainersToNode(candidates, true);
+
+ if (null != assignment
+ && assignment.getType() == NodeType.OFF_SWITCH) {
+ offswitchCount++;
+ }
+
+ if (null != assignment
+ && Resources.greaterThan(calculator, getClusterResource(),
+ assignment.getResource(), Resources.none())) {
+ assignedContainers++;
+ }
+ }
+
+ if (offswitchCount >= offswitchPerHeartbeatLimit) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Assigned maximum number of off-switch containers: "
+ + offswitchCount + ", assignments so far: " + assignment);
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Logics of allocate container on a single node (Old behavior)
+ */
+ private CSAssignment allocateContainerOnSingleNode(
+ CandidateNodeSet candidates, FiCaSchedulerNode node,
+ boolean withNodeHeartbeat) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Trying to schedule on node: " + node.getNodeName() + ", available: "
+ + node.getUnallocatedResource());
+ }
+
+ // Backward compatible way to make sure previous behavior which allocation
+ // driven by node heartbeat works.
+ if (getNode(node.getNodeID()) != node) {
+ LOG.error("Trying to schedule on a removed node, please double check, "
+ + "nodeId=" + node.getNodeID());
+ return null;
+ }
+
+ CSAssignment assignment;
+
+ // Assign new containers...
+ // 1. Check for reserved applications
+ // 2. Schedule if there are no reservations
+ RMContainer reservedContainer = node.getReservedContainer();
+ if (reservedContainer != null) {
+ FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer(
+ reservedContainer.getContainerId());
+ if (reservedApplication == null) {
+ LOG.error(
+ "Trying to schedule for a finished app, please double check. nodeId="
+ + node.getNodeID() + " container=" + reservedContainer
+ .getContainerId());
+ return null;
+ }
+
+ // Try to fulfill the reservation
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Trying to fulfill reservation for application "
+ + reservedApplication.getApplicationId() + " on node: " + node
+ .getNodeID());
+ }
+
+ LeafQueue queue = ((LeafQueue) reservedApplication.getQueue());
+ assignment = queue.assignContainers(getClusterResource(), candidates,
+ // TODO, now we only consider limits for parent for non-labeled
+ // resources, should consider labeled resources as well.
+ new ResourceLimits(labelManager
+ .getResourceByLabel(RMNodeLabelsManager.NO_LABEL,
+ getClusterResource())),
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+
+ if (assignment.isFulfilledReservation()) {
+ if (withNodeHeartbeat) {
+ // Only update SchedulerHealth in sync scheduling, existing
+ // Data structure of SchedulerHealth need to be updated for
+ // Async mode
+ updateSchedulerHealth(lastNodeUpdateTime, node.getNodeID(),
+ assignment);
+ }
+
+ schedulerHealth.updateSchedulerFulfilledReservationCounts(1);
+
+ ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
+ queue.getParent().getQueueName(), queue.getQueueName(),
+ ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
+ ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
+ node, reservedContainer.getContainerId(),
+ AllocationState.ALLOCATED_FROM_RESERVED);
+ } else {
+ ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
+ queue.getParent().getQueueName(), queue.getQueueName(),
+ ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
+ ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
+ node, reservedContainer.getContainerId(), AllocationState.SKIPPED);
+ }
+
+ assignment.setSchedulingMode(
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+ submitResourceCommitRequest(getClusterResource(), assignment);
+ }
+
+ // Do not schedule if there are any reservations to fulfill on the node
+ if (node.getReservedContainer() != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Skipping scheduling since node " + node.getNodeID()
+ + " is reserved by application " + node.getReservedContainer()
+ .getContainerId().getApplicationAttemptId());
+ }
+ return null;
+ }
+
+ // First check if we can schedule
+ // When this time look at one node only, try schedule if the node
+ // has any available or killable resource
+ if (calculator.computeAvailableContainers(Resources
+ .add(node.getUnallocatedResource(), node.getTotalKillableResources()),
+ minimumAllocation) <= 0) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("This node or this node partition doesn't have available or"
+ + "killable resource");
+ }
+ return null;
+ }
+
+ return allocateOrReserveNewContainers(candidates, withNodeHeartbeat);
+ }
+
+ private CSAssignment allocateOrReserveNewContainers(
+ CandidateNodeSet candidates,
+ boolean withNodeHeartbeat) {
+ CSAssignment assignment = getRootQueue().assignContainers(
+ getClusterResource(), candidates, new ResourceLimits(labelManager
+ .getResourceByLabel(candidates.getPartition(),
+ getClusterResource())),
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+
+ assignment.setSchedulingMode(SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+ submitResourceCommitRequest(getClusterResource(), assignment);
+
+ if (Resources.greaterThan(calculator, getClusterResource(),
+ assignment.getResource(), Resources.none())) {
+ if (withNodeHeartbeat) {
+ updateSchedulerHealth(lastNodeUpdateTime,
+ CandidateNodeSetUtils.getSingleNode(candidates).getNodeID(),
+ assignment);
+ }
+ return assignment;
+ }
+
+ // Only do non-exclusive allocation when node has node-labels.
+ if (StringUtils.equals(candidates.getPartition(),
+ RMNodeLabelsManager.NO_LABEL)) {
+ return null;
+ }
+
+ // Only do non-exclusive allocation when the node-label supports that
+ try {
+ if (rmContext.getNodeLabelManager().isExclusiveNodeLabel(
+ candidates.getPartition())) {
+ return null;
+ }
+ } catch (IOException e) {
+ LOG.warn(
+ "Exception when trying to get exclusivity of node label=" + candidates
+ .getPartition(), e);
+ return null;
+ }
+
+ // Try to use NON_EXCLUSIVE
+ assignment = getRootQueue().assignContainers(getClusterResource(),
+ candidates,
+ // TODO, now we only consider limits for parent for non-labeled
+ // resources, should consider labeled resources as well.
+ new ResourceLimits(labelManager
+ .getResourceByLabel(RMNodeLabelsManager.NO_LABEL,
+ getClusterResource())),
+ SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
+ assignment.setSchedulingMode(SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
+ submitResourceCommitRequest(getClusterResource(), assignment);
+
+ return assignment;
+ }
+
+ /*
+ * New behavior, allocate containers considering multiple nodes
+ */
+ private CSAssignment allocateContainersOnMultiNodes(
+ CandidateNodeSet candidates) {
+ // When this time look at multiple nodes, try schedule if the
+ // partition has any available resource or killable resource
+ if (getRootQueue().getQueueCapacities().getUsedCapacity(
+ candidates.getPartition()) >= 1.0f
+ && preemptionManager.getKillableResource(
+ CapacitySchedulerConfiguration.ROOT, candidates.getPartition())
+ == Resources.none()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("This node or this node partition doesn't have available or"
+ + "killable resource");
+ }
+ return null;
+ }
+
+ return allocateOrReserveNewContainers(candidates, false);
+ }
+
+ @VisibleForTesting
+ CSAssignment allocateContainersToNode(
+ CandidateNodeSet candidates,
+ boolean withNodeHeartbeat) {
+ if (rmContext.isWorkPreservingRecoveryEnabled() && !rmContext
+ .isSchedulerReadyForAllocatingContainers()) {
+ return null;
+ }
+
+ long startTime = System.nanoTime();
+
+ // Backward compatible way to make sure previous behavior which allocation
+ // driven by node heartbeat works.
+ FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates);
+
+ // We have two different logics to handle allocation on single node / multi
+ // nodes.
+ CSAssignment assignment;
+ if (null != node) {
+ assignment = allocateContainerOnSingleNode(candidates,
+ node, withNodeHeartbeat);
+ } else {
+ assignment = allocateContainersOnMultiNodes(candidates);
+ }
+
+ if (assignment != null && assignment.getAssignmentInformation() != null
+ && assignment.getAssignmentInformation().getNumAllocations() > 0) {
+ long allocateTime = System.nanoTime() - startTime;
+ CapacitySchedulerMetrics.getMetrics().addAllocate(allocateTime);
+ }
+ return assignment;
+ }
+
+ @Override
+ public void handle(SchedulerEvent event) {
+ switch (event.getType()) {
+ case NODE_ADDED: {
+ NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent) event;
+ addNode(nodeAddedEvent.getAddedRMNode());
+ recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
+ nodeAddedEvent.getAddedRMNode());
+ }
+ break;
+ case NODE_REMOVED: {
+ NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent) event;
+ removeNode(nodeRemovedEvent.getRemovedRMNode());
+ }
+ break;
+ case NODE_RESOURCE_UPDATE: {
+ NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent =
+ (NodeResourceUpdateSchedulerEvent) event;
+ updateNodeAndQueueResource(nodeResourceUpdatedEvent.getRMNode(),
+ nodeResourceUpdatedEvent.getResourceOption());
+ }
+ break;
+ case NODE_LABELS_UPDATE: {
+ NodeLabelsUpdateSchedulerEvent labelUpdateEvent =
+ (NodeLabelsUpdateSchedulerEvent) event;
+
+ updateNodeLabelsAndQueueResource(labelUpdateEvent);
+ }
+ break;
+ case NODE_UPDATE: {
+ NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent) event;
+ nodeUpdate(nodeUpdatedEvent.getRMNode());
+ }
+ break;
+ case APP_ADDED: {
+ AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
+ String queueName = resolveReservationQueueName(appAddedEvent.getQueue(),
+ appAddedEvent.getApplicationId(), appAddedEvent.getReservationID(),
+ appAddedEvent.getIsAppRecovering());
+ if (queueName != null) {
+ if (!appAddedEvent.getIsAppRecovering()) {
+ addApplication(appAddedEvent.getApplicationId(), queueName,
+ appAddedEvent.getUser(), appAddedEvent.getApplicatonPriority(),
+ appAddedEvent.getPlacementContext());
+ } else {
+ addApplicationOnRecovery(appAddedEvent.getApplicationId(), queueName,
+ appAddedEvent.getUser(), appAddedEvent.getApplicatonPriority(),
+ appAddedEvent.getPlacementContext());
+ }
+ }
+ }
+ break;
+ case APP_REMOVED: {
+ AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent) event;
+ doneApplication(appRemovedEvent.getApplicationID(),
+ appRemovedEvent.getFinalState());
+ }
+ break;
+ case APP_ATTEMPT_ADDED: {
+ AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
+ (AppAttemptAddedSchedulerEvent) event;
+ addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
+ appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
+ appAttemptAddedEvent.getIsAttemptRecovering());
+ }
+ break;
+ case APP_ATTEMPT_REMOVED: {
+ AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
+ (AppAttemptRemovedSchedulerEvent) event;
+ doneApplicationAttempt(appAttemptRemovedEvent.getApplicationAttemptID(),
+ appAttemptRemovedEvent.getFinalAttemptState(),
+ appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
+ }
+ break;
+ case CONTAINER_EXPIRED: {
+ ContainerExpiredSchedulerEvent containerExpiredEvent =
+ (ContainerExpiredSchedulerEvent) event;
+ ContainerId containerId = containerExpiredEvent.getContainerId();
+ if (containerExpiredEvent.isIncrease()) {
+ rollbackContainerUpdate(containerId);
+ } else {
+ completedContainer(getRMContainer(containerId),
+ SchedulerUtils.createAbnormalContainerStatus(
+ containerId,
+ SchedulerUtils.EXPIRED_CONTAINER),
+ RMContainerEventType.EXPIRE);
+ }
+ }
+ break;
+ case RELEASE_CONTAINER: {
+ RMContainer container = ((ReleaseContainerEvent) event).getContainer();
+ completedContainer(container,
+ SchedulerUtils.createAbnormalContainerStatus(
+ container.getContainerId(),
+ SchedulerUtils.RELEASED_CONTAINER),
+ RMContainerEventType.RELEASED);
+ }
+ break;
+ case KILL_RESERVED_CONTAINER: {
+ ContainerPreemptEvent killReservedContainerEvent =
+ (ContainerPreemptEvent) event;
+ RMContainer container = killReservedContainerEvent.getContainer();
+ killReservedContainer(container);
+ }
+ break;
+ case MARK_CONTAINER_FOR_PREEMPTION: {
+ ContainerPreemptEvent preemptContainerEvent =
+ (ContainerPreemptEvent) event;
+ ApplicationAttemptId aid = preemptContainerEvent.getAppId();
+ RMContainer containerToBePreempted = preemptContainerEvent.getContainer();
+ markContainerForPreemption(aid, containerToBePreempted);
+ }
+ break;
+ case MARK_CONTAINER_FOR_KILLABLE: {
+ ContainerPreemptEvent containerKillableEvent = (ContainerPreemptEvent) event;
+ RMContainer killableContainer = containerKillableEvent.getContainer();
+ markContainerForKillable(killableContainer);
+ }
+ break;
+ case MARK_CONTAINER_FOR_NONKILLABLE: {
+ if (isLazyPreemptionEnabled) {
+ ContainerPreemptEvent cancelKillContainerEvent =
+ (ContainerPreemptEvent) event;
+ markContainerForNonKillable(cancelKillContainerEvent.getContainer());
+ }
+ }
+ break;
+ case MANAGE_QUEUE: {
+ QueueManagementChangeEvent queueManagementChangeEvent =
+ (QueueManagementChangeEvent) event;
+ ParentQueue parentQueue = queueManagementChangeEvent.getParentQueue();
+ try {
+ final List queueManagementChanges =
+ queueManagementChangeEvent.getQueueManagementChanges();
+ ((ManagedParentQueue) parentQueue)
+ .validateAndApplyQueueManagementChanges(queueManagementChanges);
+ } catch (SchedulerDynamicEditException sde) {
+ LOG.error("Queue Management Change event cannot be applied for "
+ + "parent queue : " + parentQueue.getQueueName(), sde);
+ } catch (IOException ioe) {
+ LOG.error("Queue Management Change event cannot be applied for "
+ + "parent queue : " + parentQueue.getQueueName(), ioe);
+ }
+ }
+ break;
+ default:
+ LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!");
+ }
+ }
+
+ /**
+ * Process node labels update.
+ */
+ private synchronized void updateNodeLabelsAndQueueResource(
+ NodeLabelsUpdateSchedulerEvent labelUpdateEvent) {
+ for (Entry> entry : labelUpdateEvent
+ .getUpdatedNodeToLabels().entrySet()) {
+ NodeId id = entry.getKey();
+ Set labels = entry.getValue();
+ updateLabelsOnNode(id, labels);
+ }
+ Resource clusterResource = getClusterResource();
+ getRootQueue().updateClusterResource(clusterResource,
+ new ResourceLimits(clusterResource));
+ }
+
+ private synchronized void addNode(RMNode nodeManager) {
+ FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager,
+ usePortForNodeName, nodeManager.getNodeLabels());
+ nodeTracker.addNode(schedulerNode);
+
+ // update this node to node label manager
+ if (labelManager != null) {
+ labelManager.activateNode(nodeManager.getNodeID(),
+ schedulerNode.getTotalResource());
+ }
+
+ Resource clusterResource = getClusterResource();
+ getRootQueue().updateClusterResource(clusterResource,
+ new ResourceLimits(clusterResource));
+
+ LOG.info(
+ "Added node " + nodeManager.getNodeAddress() + " clusterResource: "
+ + clusterResource);
+
+ if (scheduleAsynchronously && getNumClusterNodes() == 1) {
+ for (AsyncScheduleThread t : asyncSchedulerThreads) {
+ t.beginSchedule();
+ }
+ }
+ }
+
+ private synchronized void removeNode(RMNode nodeInfo) {
+ // update this node to node label manager
+ if (labelManager != null) {
+ labelManager.deactivateNode(nodeInfo.getNodeID());
+ }
+
+ NodeId nodeId = nodeInfo.getNodeID();
+ FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
+ if (node == null) {
+ LOG.error("Attempting to remove non-existent node " + nodeId);
+ return;
+ }
+
+ // Remove running containers
+ List runningContainers =
+ node.getCopiedListOfRunningContainers();
+ for (RMContainer container : runningContainers) {
+ super.completedContainer(container, SchedulerUtils
+ .createAbnormalContainerStatus(container.getContainerId(),
+ SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
+ }
+
+ // Remove reservations, if any
+ RMContainer reservedContainer = node.getReservedContainer();
+ if (reservedContainer != null) {
+ super.completedContainer(reservedContainer, SchedulerUtils
+ .createAbnormalContainerStatus(reservedContainer.getContainerId(),
+ SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
+ }
+
+ nodeTracker.removeNode(nodeId);
+ Resource clusterResource = getClusterResource();
+ getRootQueue().updateClusterResource(clusterResource,
+ new ResourceLimits(clusterResource));
+ int numNodes = nodeTracker.nodeCount();
+
+ if (scheduleAsynchronously && numNodes == 0) {
+ for (AsyncScheduleThread t : asyncSchedulerThreads) {
+ t.suspendSchedule();
+ }
+ }
+
+ LOG.info(
+ "Removed node " + nodeInfo.getNodeAddress() + " clusterResource: "
+ + getClusterResource());
+ }
+
+ @Override
+ protected void completedContainerInternal(
+ RMContainer rmContainer, ContainerStatus containerStatus,
+ RMContainerEventType event) {
+ Container container = rmContainer.getContainer();
+ ContainerId containerId = container.getId();
+
+ // Get the application for the finished container
+ FiCaSchedulerApp application = getCurrentAttemptForContainer(
+ container.getId());
+ ApplicationId appId =
+ containerId.getApplicationAttemptId().getApplicationId();
+ if (application == null) {
+ LOG.info(
+ "Container " + container + " of" + " finished application " + appId
+ + " completed with event " + event);
+ return;
+ }
+
+ // Get the node on which the container was allocated
+ FiCaSchedulerNode node = getNode(container.getNodeId());
+ if (null == node) {
+ LOG.info("Container " + container + " of" + " removed node " + container
+ .getNodeId() + " completed with event " + event);
+ return;
+ }
+
+ // Inform the queue
+ LeafQueue queue = (LeafQueue) application.getQueue();
+ queue.completedContainer(getClusterResource(), application, node,
+ rmContainer, containerStatus, event, null, true);
+ if (ContainerExitStatus.PREEMPTED == containerStatus.getExitStatus()) {
+ updateQueuePreemptionMetrics(queue, rmContainer);
+ }
+ }
+
+ private void updateQueuePreemptionMetrics(
+ CSQueue queue, RMContainer rmc) {
+ QueueMetrics qMetrics = queue.getMetrics();
+ long usedMillis = rmc.getFinishTime() - rmc.getCreationTime();
+ Resource containerResource = rmc.getAllocatedResource();
+ qMetrics.preemptContainer();
+ long mbSeconds = (containerResource.getMemorySize() * usedMillis)
+ / DateUtils.MILLIS_PER_SECOND;
+ long vcSeconds = (containerResource.getVirtualCores() * usedMillis)
+ / DateUtils.MILLIS_PER_SECOND;
+ qMetrics.updatePreemptedMemoryMBSeconds(mbSeconds);
+ qMetrics.updatePreemptedVcoreSeconds(vcSeconds);
+ }
+
+ @Lock(Lock.NoLock.class)
+ @VisibleForTesting
+ @Override
+ public FiCaSchedulerApp getApplicationAttempt(
+ ApplicationAttemptId applicationAttemptId) {
+ return super.getApplicationAttempt(applicationAttemptId);
+ }
+
+ @Lock(Lock.NoLock.class)
+ public FiCaSchedulerNode getNode(NodeId nodeId) {
+ return nodeTracker.getNode(nodeId);
+ }
+
+ @Lock(Lock.NoLock.class)
+ public List getAllNodes() {
+ return nodeTracker.getAllNodes();
+ }
+
+ @Override
+ @Lock(Lock.NoLock.class)
+ public void recover(RMState state) throws Exception {
+ // NOT IMPLEMENTED
+ }
+
+ @Override
+ public void killReservedContainer(RMContainer container) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(SchedulerEventType.KILL_RESERVED_CONTAINER + ":"
+ + container.toString());
+ }
+ // To think: What happens if this is no longer a reserved container, for
+ // e.g if the reservation became an allocation.
+ super.completedContainer(container,
+ SchedulerUtils.createAbnormalContainerStatus(
+ container.getContainerId(),
+ SchedulerUtils.UNRESERVED_CONTAINER),
+ RMContainerEventType.KILL);
+ }
+
+ @Override
+ public void markContainerForPreemption(ApplicationAttemptId aid,
+ RMContainer cont) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(SchedulerEventType.MARK_CONTAINER_FOR_PREEMPTION
+ + ": appAttempt:" + aid.toString() + " container: "
+ + cont.toString());
+ }
+ FiCaSchedulerApp app = getApplicationAttempt(aid);
+ if (app != null) {
+ app.markContainerForPreemption(cont.getContainerId());
+ }
}
+ @VisibleForTesting
@Override
- public void run() {
- int debuggingLogCounter = 0;
- while (!Thread.currentThread().isInterrupted()) {
- try {
- if (!runSchedules.get()) {
- Thread.sleep(100);
- } else {
- // Don't run schedule if we have some pending backlogs already
- if (cs.getAsyncSchedulingPendingBacklogs()
- > cs.asyncMaxPendingBacklogs) {
- Thread.sleep(1);
- } else{
- schedule(cs);
- if(LOG.isDebugEnabled()) {
- // Adding a debug log here to ensure that the thread is alive
- // and running fine.
- if (debuggingLogCounter++ > 10000) {
- debuggingLogCounter = 0;
- LOG.debug("AsyncScheduleThread[" + getName() + "] is running!");
+ public void killContainer(RMContainer container) {
+ markContainerForKillable(container);
+ }
+
+ public synchronized void markContainerForKillable(
+ RMContainer killableContainer) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(SchedulerEventType.MARK_CONTAINER_FOR_KILLABLE + ": container"
+ + killableContainer.toString());
+ }
+
+ if (!isLazyPreemptionEnabled) {
+ super.completedContainer(killableContainer, SchedulerUtils
+ .createPreemptedContainerStatus(killableContainer.getContainerId(),
+ SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL);
+ } else {
+ FiCaSchedulerNode node = (FiCaSchedulerNode) getSchedulerNode(
+ killableContainer.getAllocatedNode());
+
+ FiCaSchedulerApp application = getCurrentAttemptForContainer(
+ killableContainer.getContainerId());
+
+ node.markContainerToKillable(killableContainer.getContainerId());
+
+ // notify PreemptionManager
+ // Get the application for the finished container
+ if (null != application) {
+ String leafQueueName = application.getCSLeafQueue().getQueueName();
+ getPreemptionManager().addKillableContainer(
+ new KillableContainer(killableContainer, node.getPartition(),
+ leafQueueName));
}
- }
}
- }
- } catch (InterruptedException ie) {
- // keep interrupt signal
- Thread.currentThread().interrupt();
+ }
+
+ private synchronized void markContainerForNonKillable(
+ RMContainer nonKillableContainer) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ SchedulerEventType.MARK_CONTAINER_FOR_NONKILLABLE + ": container"
+ + nonKillableContainer.toString());
+ }
+
+ FiCaSchedulerNode node = (FiCaSchedulerNode) getSchedulerNode(
+ nonKillableContainer.getAllocatedNode());
+
+ FiCaSchedulerApp application = getCurrentAttemptForContainer(
+ nonKillableContainer.getContainerId());
+
+ node.markContainerToNonKillable(nonKillableContainer.getContainerId());
+
+ // notify PreemptionManager
+ // Get the application for the finished container
+ if (null != application) {
+ String leafQueueName = application.getCSLeafQueue().getQueueName();
+ getPreemptionManager().removeKillableContainer(
+ new KillableContainer(nonKillableContainer, node.getPartition(),
+ leafQueueName));
+ }
+ }
+
+ @Override
+ public boolean checkAccess(UserGroupInformation callerUGI,
+ QueueACL acl, String queueName) {
+ CSQueue queue = getQueue(queueName);
+ if (queue == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("ACL not found for queue access-type " + acl + " for queue "
+ + queueName);
+ }
+ return false;
+ }
+ return queue.hasAccess(acl, callerUGI);
+ }
+
+ @Override
+ public List getAppsInQueue(String queueName) {
+ CSQueue queue = getQueue(queueName);
+ if (queue == null) {
+ return null;
+ }
+ List apps = new ArrayList();
+ queue.collectSchedulerApplications(apps);
+ return apps;
+ }
+
+ public boolean isSystemAppsLimitReached() {
+ if (getRootQueue().getNumApplications() < conf
+ .getMaximumSystemApplications()) {
+ return false;
}
- }
- LOG.info("AsyncScheduleThread[" + getName() + "] exited!");
+ return true;
+ }
+
+ private String getDefaultReservationQueueName(String planQueueName) {
+ return planQueueName + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
}
- public void beginSchedule() {
- runSchedules.set(true);
+ private synchronized String resolveReservationQueueName(String queueName,
+ ApplicationId applicationId, ReservationId reservationID,
+ boolean isRecovering) {
+ CSQueue queue = getQueue(queueName);
+ // Check if the queue is a plan queue
+ if ((queue == null) || !(queue instanceof PlanQueue)) {
+ return queueName;
+ }
+ if (reservationID != null) {
+ String resQName = reservationID.toString();
+ queue = getQueue(resQName);
+ if (queue == null) {
+ // reservation has terminated during failover
+ if (isRecovering && conf.getMoveOnExpiry(
+ getQueue(queueName).getQueuePath())) {
+ // move to the default child queue of the plan
+ return getDefaultReservationQueueName(queueName);
+ }
+ String message = "Application " + applicationId
+ + " submitted to a reservation which is not currently active: "
+ + resQName;
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return null;
+ }
+ if (!queue.getParent().getQueueName().equals(queueName)) {
+ String message =
+ "Application: " + applicationId + " submitted to a reservation "
+ + resQName + " which does not belong to the specified queue: "
+ + queueName;
+ this.rmContext.getDispatcher().getEventHandler().handle(
+ new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
+ message));
+ return null;
+ }
+ // use the reservation queue to run the app
+ queueName = resQName;
+ } else {
+ // use the default child queue of the plan for unreserved apps
+ queueName = getDefaultReservationQueueName(queueName);
+ }
+ return queueName;
}
- public void suspendSchedule() {
- runSchedules.set(false);
+ @Override
+ public synchronized void removeQueue(String queueName)
+ throws SchedulerDynamicEditException {
+ LOG.info("Removing queue: " + queueName);
+ CSQueue q = this.getQueue(queueName);
+ if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom(
+ q.getClass()))) {
+ throw new SchedulerDynamicEditException(
+ "The queue that we are asked " + "to remove (" + queueName
+ + ") is not a AutoCreatedLeafQueue or ReservationQueue");
+ }
+ AbstractAutoCreatedLeafQueue disposableLeafQueue =
+ (AbstractAutoCreatedLeafQueue) q;
+ // at this point we should have no more apps
+ if (disposableLeafQueue.getNumApplications() > 0) {
+ throw new SchedulerDynamicEditException(
+ "The queue " + queueName + " is not empty " + disposableLeafQueue
+ .getApplications().size() + " active apps "
+ + disposableLeafQueue.getPendingApplications().size()
+ + " pending apps");
+ }
+
+ ((AbstractManagedParentQueue) disposableLeafQueue.getParent())
+ .removeChildQueue(q);
+ this.queueManager.removeQueue(queueName);
+ LOG.info(
+ "Removal of AutoCreatedLeafQueue " + queueName + " has succeeded");
}
- }
+ @Override
+ public synchronized void addQueue(Queue queue)
+ throws SchedulerDynamicEditException, IOException {
+ if (queue == null) {
+ throw new SchedulerDynamicEditException(
+ "Queue specified is null. Should be an implementation of "
+ + "AbstractAutoCreatedLeafQueue");
+ } else if (!(AbstractAutoCreatedLeafQueue.class
+ .isAssignableFrom(queue.getClass()))) {
+ throw new SchedulerDynamicEditException(
+ "Queue is not an implementation of "
+ + "AbstractAutoCreatedLeafQueue : " + queue.getClass());
+ }
+
+ AbstractAutoCreatedLeafQueue newQueue =
+ (AbstractAutoCreatedLeafQueue) queue;
+
+ if (newQueue.getParent() == null || !(AbstractManagedParentQueue.class.
+ isAssignableFrom(newQueue.getParent().getClass()))) {
+ throw new SchedulerDynamicEditException(
+ "ParentQueue for " + newQueue + " is not properly set"
+ + " (should be set and be a PlanQueue or ManagedParentQueue)");
+ }
- static class ResourceCommitterService extends Thread {
- private final CapacityScheduler cs;
- private BlockingQueue>
- backlogs = new LinkedBlockingQueue<>();
+ AbstractManagedParentQueue parent =
+ (AbstractManagedParentQueue) newQueue.getParent();
+ String queuename = newQueue.getQueueName();
+ parent.addChildQueue(newQueue);
+ this.queueManager.addQueue(queuename, newQueue);
- public ResourceCommitterService(CapacityScheduler cs) {
- this.cs = cs;
- setDaemon(true);
+ LOG.info("Creation of AutoCreatedLeafQueue " + newQueue + " succeeded");
}
@Override
- public void run() {
- while (!Thread.currentThread().isInterrupted()) {
- try {
- ResourceCommitRequest request =
- backlogs.take();
-
- try {
- cs.writeLock.lock();
- cs.tryCommit(cs.getClusterResource(), request, true);
- } finally {
- cs.writeLock.unlock();
- }
-
- } catch (InterruptedException e) {
- LOG.error(e);
- Thread.currentThread().interrupt();
- }
- }
- LOG.info("ResourceCommitterService exited!");
- }
-
- public void addNewCommitRequest(
- ResourceCommitRequest proposal) {
- backlogs.add(proposal);
- }
-
- public int getPendingBacklogs() {
- return backlogs.size();
- }
- }
-
- @VisibleForTesting
- public PlacementRule getUserGroupMappingPlacementRule() throws IOException {
- try {
- readLock.lock();
- UserGroupMappingPlacementRule ugRule = new UserGroupMappingPlacementRule();
- ugRule.initialize(this);
- return ugRule;
- } finally {
- readLock.unlock();
- }
- }
-
- public PlacementRule getAppNameMappingPlacementRule() throws IOException {
- try {
- readLock.lock();
- AppNameMappingPlacementRule anRule = new AppNameMappingPlacementRule();
- anRule.initialize(this);
- return anRule;
- } finally {
- readLock.unlock();
- }
- }
-
- @VisibleForTesting
- public void updatePlacementRules() throws IOException {
- // Initialize placement rules
- Collection placementRuleStrs = conf.getStringCollection(
- YarnConfiguration.QUEUE_PLACEMENT_RULES);
- List placementRules = new ArrayList<>();
- Set distingushRuleSet = new HashSet<>();
- // fail the case if we get duplicate placementRule add in
- for (String pls : placementRuleStrs) {
- if (!distingushRuleSet.add(pls)) {
- throw new IOException("Invalid PlacementRule inputs which "
- + "contains duplicate rule strings");
- }
- }
-
- // add UserGroupMappingPlacementRule if absent
- distingushRuleSet.add(YarnConfiguration.USER_GROUP_PLACEMENT_RULE);
-
- placementRuleStrs = new ArrayList<>(distingushRuleSet);
-
- for (String placementRuleStr : placementRuleStrs) {
- switch (placementRuleStr) {
- case YarnConfiguration.USER_GROUP_PLACEMENT_RULE:
- PlacementRule ugRule = getUserGroupMappingPlacementRule();
- if (null != ugRule) {
- placementRules.add(ugRule);
- }
- break;
- case YarnConfiguration.APP_NAME_PLACEMENT_RULE:
- PlacementRule anRule = getAppNameMappingPlacementRule();
- if (null != anRule) {
- placementRules.add(anRule);
- }
- break;
- default:
- boolean isMappingNotEmpty;
- try {
- PlacementRule rule = PlacementFactory.getPlacementRule(
- placementRuleStr, conf);
- if (null != rule) {
+ public synchronized void setEntitlement(String inQueue, QueueEntitlement entitlement)
+ throws YarnException {
+ LeafQueue queue = this.queueManager.getAndCheckLeafQueue(inQueue);
+ AbstractManagedParentQueue parent =
+ (AbstractManagedParentQueue) queue.getParent();
+
+ if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom(
+ queue.getClass()))) {
+ throw new SchedulerDynamicEditException(
+ "Entitlement can not be" + " modified dynamically since queue "
+ + inQueue + " is not a AutoCreatedLeafQueue");
+ }
+
+ if (parent == null || !(AbstractManagedParentQueue.class.isAssignableFrom(
+ parent.getClass()))) {
+ throw new SchedulerDynamicEditException(
+ "The parent of AutoCreatedLeafQueue " + inQueue
+ + " must be a PlanQueue/ManagedParentQueue");
+ }
+
+ AbstractAutoCreatedLeafQueue newQueue =
+ (AbstractAutoCreatedLeafQueue) queue;
+ parent.validateQueueEntitlementChange(newQueue, entitlement);
+
+ newQueue.setEntitlement(entitlement);
+
+ LOG.info("Set entitlement for AutoCreatedLeafQueue " + inQueue + " to "
+ + queue.getCapacity() + " request was (" + entitlement.getCapacity()
+ + ")");
+ }
+
+ @Override
+ public synchronized String moveApplication(ApplicationId appId,
+ String targetQueueName) throws YarnException {
+ SchedulerApplication application =
+ applications.get(appId);
+ if (application == null) {
+ throw new YarnException("App to be moved " + appId + " not found.");
+ }
+ String sourceQueueName = application.getQueue().getQueueName();
+ LeafQueue source =
+ this.queueManager.getAndCheckLeafQueue(sourceQueueName);
+ String destQueueName = handleMoveToPlanQueue(targetQueueName);
+ LeafQueue dest = this.queueManager.getAndCheckLeafQueue(destQueueName);
+
+ String user = application.getUser();
try {
- isMappingNotEmpty = rule.initialize(this);
- } catch (IOException ie) {
- throw new IOException(ie);
- }
- if (isMappingNotEmpty) {
- placementRules.add(rule);
- }
- }
- } catch (ClassNotFoundException cnfe) {
- throw new IOException(cnfe);
- }
- }
- }
-
- rmContext.getQueuePlacementManager().updateRules(placementRules);
- }
-
- @Lock(CapacityScheduler.class)
- private void initializeQueues(CapacitySchedulerConfiguration conf)
- throws IOException {
-
- this.queueManager.initializeQueues(conf);
-
- updatePlacementRules();
-
- // Notify Preemption Manager
- preemptionManager.refreshQueues(null, this.getRootQueue());
- }
-
- @Lock(CapacityScheduler.class)
- private void reinitializeQueues(CapacitySchedulerConfiguration newConf)
- throws IOException {
- this.queueManager.reinitializeQueues(newConf);
- updatePlacementRules();
-
- // Notify Preemption Manager
- preemptionManager.refreshQueues(null, this.getRootQueue());
- }
-
- @Override
- public CSQueue getQueue(String queueName) {
- if (queueName == null) {
- return null;
- }
- return this.queueManager.getQueue(queueName);
- }
-
- private void addApplicationOnRecovery(ApplicationId applicationId,
- String queueName, String user,
- Priority priority, ApplicationPlacementContext placementContext) {
- try {
- writeLock.lock();
- //check if the queue needs to be auto-created during recovery
- CSQueue queue = getOrCreateQueueFromPlacementContext(applicationId, user,
- queueName, placementContext, true);
-
- if (queue == null) {
- //During a restart, this indicates a queue was removed, which is
- //not presently supported
- if (!getConfiguration().shouldAppFailFast(getConfig())) {
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.KILL,
- "Application killed on recovery as it"
- + " was submitted to queue " + queueName
- + " which no longer exists after restart."));
- return;
- } else{
- String queueErrorMsg = "Queue named " + queueName + " missing "
- + "during application recovery."
- + " Queue removal during recovery is not presently "
- + "supported by the capacity scheduler, please "
- + "restart with all queues configured"
- + " which were present before shutdown/restart.";
- LOG.fatal(queueErrorMsg);
- throw new QueueInvalidException(queueErrorMsg);
- }
- }
- if (!(queue instanceof LeafQueue)) {
- // During RM restart, this means leaf queue was converted to a parent
- // queue, which is not supported for running apps.
- if (!getConfiguration().shouldAppFailFast(getConfig())) {
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.KILL,
- "Application killed on recovery as it was "
- + "submitted to queue " + queueName
- + " which is no longer a leaf queue after restart."));
- return;
- } else{
- String queueErrorMsg = "Queue named " + queueName
- + " is no longer a leaf queue during application recovery."
- + " Changing a leaf queue to a parent queue during recovery is"
- + " not presently supported by the capacity scheduler. Please"
- + " restart with leaf queues before shutdown/restart continuing"
- + " as leaf queues.";
- LOG.fatal(queueErrorMsg);
- throw new QueueInvalidException(queueErrorMsg);
- }
- }
- // When recovering apps in this queue but queue is in STOPPED state,
- // that means its previous state was DRAINING. So we auto transit
- // the state to DRAINING for recovery.
- if (queue.getState() == QueueState.STOPPED) {
- ((LeafQueue) queue).recoverDrainingState();
- }
- // Submit to the queue
- try {
- queue.submitApplication(applicationId, user, queueName);
- } catch (AccessControlException ace) {
- // Ignore the exception for recovered app as the app was previously
- // accepted.
- }
- queue.getMetrics().submitApp(user);
- SchedulerApplication application =
- new SchedulerApplication(queue, user, priority);
- applications.put(applicationId, application);
- LOG.info("Accepted application " + applicationId + " from user: " + user
- + ", in queue: " + queueName);
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- applicationId + " is recovering. Skip notifying APP_ACCEPTED");
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- private CSQueue getOrCreateQueueFromPlacementContext(ApplicationId
- applicationId, String user, String queueName,
- ApplicationPlacementContext placementContext,
- boolean isRecovery) {
-
- CSQueue queue = getQueue(queueName);
-
- if (queue == null) {
- if (placementContext != null && placementContext.hasParentQueue()) {
- try {
- return autoCreateLeafQueue(placementContext);
- } catch (YarnException | IOException e) {
- if (isRecovery) {
- if (!getConfiguration().shouldAppFailFast(getConfig())) {
- LOG.error("Could not auto-create leaf queue " + queueName +
- " due to : ", e);
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.KILL,
- "Application killed on recovery"
- + " as it was submitted to queue " + queueName
- + " which could not be auto-created"));
- } else{
- String queueErrorMsg =
- "Queue named " + queueName + " could not be "
- + "auto-created during application recovery.";
- LOG.fatal(queueErrorMsg, e);
- throw new QueueInvalidException(queueErrorMsg);
- }
- } else{
- LOG.error("Could not auto-create leaf queue due to : ", e);
- final String message =
- "Application " + applicationId + " submission by user : "
- + user
- + " to queue : " + queueName + " failed : " + e
- .getMessage();
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- }
- }
- }
- }
- return queue;
- }
-
- private void addApplication(ApplicationId applicationId, String queueName,
- String user, Priority priority,
- ApplicationPlacementContext placementContext) {
- try {
- writeLock.lock();
- if (isSystemAppsLimitReached()) {
- String message = "Maximum system application limit reached,"
- + "cannot accept submission of application: " + applicationId;
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return;
- }
-
- //Could be a potential auto-created leaf queue
- CSQueue queue = getOrCreateQueueFromPlacementContext(applicationId, user,
- queueName, placementContext, false);
-
- if (queue == null) {
- final String message =
- "Application " + applicationId + " submitted by user " + user
- + " to unknown queue: " + queueName;
-
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return;
- }
-
- if (!(queue instanceof LeafQueue)) {
- String message =
- "Application " + applicationId + " submitted by user : " + user
- + " to non-leaf queue : " + queueName;
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return;
- } else if (queue instanceof AutoCreatedLeafQueue && queue
- .getParent() instanceof ManagedParentQueue) {
-
- //If queue already exists and auto-queue creation was not required,
- //placement context should not be null
- if (placementContext == null) {
- String message =
- "Application " + applicationId + " submission by user : " + user
- + " to specified queue : " + queueName + " is prohibited. "
- + "Verify automatic queue mapping for user exists in " +
- QUEUE_MAPPING;
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return;
- // For a queue which exists already and
- // not auto-created above, then its parent queue should match
- // the parent queue specified in queue mapping
- } else if (!queue.getParent().getQueueName().equals(
- placementContext.getParentQueue())) {
- String message =
- "Auto created Leaf queue " + placementContext.getQueue() + " "
- + "already exists under queue : " + queue
- .getParent().getQueuePath()
- + ".But Queue mapping configuration " +
- CapacitySchedulerConfiguration.QUEUE_MAPPING + " has been "
- + "updated to a different parent queue : "
- + placementContext.getParentQueue()
- + " for the specified user : " + user;
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return;
- }
- }
-
- // Submit to the queue
- try {
- queue.submitApplication(applicationId, user, queueName);
- } catch (AccessControlException ace) {
- LOG.info("Failed to submit application " + applicationId + " to queue "
- + queueName + " from user " + user, ace);
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- ace.toString()));
- return;
- }
- // update the metrics
- queue.getMetrics().submitApp(user);
- SchedulerApplication application =
- new SchedulerApplication(queue, user, priority);
- applications.put(applicationId, application);
- LOG.info("Accepted application " + applicationId + " from user: " + user
- + ", in queue: " + queueName);
- rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
- } finally {
- writeLock.unlock();
- }
- }
-
- private void addApplicationAttempt(
- ApplicationAttemptId applicationAttemptId,
- boolean transferStateFromPreviousAttempt,
- boolean isAttemptRecovering) {
- try {
- writeLock.lock();
- SchedulerApplication application = applications.get(
- applicationAttemptId.getApplicationId());
- if (application == null) {
- LOG.warn("Application " + applicationAttemptId.getApplicationId()
- + " cannot be found in scheduler.");
- return;
- }
- CSQueue queue = (CSQueue) application.getQueue();
-
- FiCaSchedulerApp attempt = new FiCaSchedulerApp(applicationAttemptId,
- application.getUser(), queue, queue.getAbstractUsersManager(),
- rmContext, application.getPriority(), isAttemptRecovering,
- activitiesManager);
- if (transferStateFromPreviousAttempt) {
- attempt.transferStateFromPreviousAttempt(
- application.getCurrentAppAttempt());
- }
- application.setCurrentAppAttempt(attempt);
-
- // Update attempt priority to the latest to avoid race condition i.e
- // SchedulerApplicationAttempt is created with old priority but it is not
- // set to SchedulerApplication#setCurrentAppAttempt.
- // Scenario would occur is
- // 1. SchdulerApplicationAttempt is created with old priority.
- // 2. updateApplicationPriority() updates SchedulerApplication. Since
- // currentAttempt is null, it just return.
- // 3. ScheduelerApplcationAttempt is set in
- // SchedulerApplication#setCurrentAppAttempt.
- attempt.setPriority(application.getPriority());
-
- queue.submitApplicationAttempt(attempt, application.getUser());
- LOG.info("Added Application Attempt " + applicationAttemptId
- + " to scheduler from user " + application.getUser() + " in queue "
- + queue.getQueueName());
- if (isAttemptRecovering) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(applicationAttemptId
- + " is recovering. Skipping notifying ATTEMPT_ADDED");
- }
- } else{
- rmContext.getDispatcher().getEventHandler().handle(
- new RMAppAttemptEvent(applicationAttemptId,
- RMAppAttemptEventType.ATTEMPT_ADDED));
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- private void doneApplication(ApplicationId applicationId,
- RMAppState finalState) {
- try {
- writeLock.lock();
- SchedulerApplication application = applications.get(
- applicationId);
- if (application == null) {
- // The AppRemovedSchedulerEvent maybe sent on recovery for completed
- // apps, ignore it.
- LOG.warn("Couldn't find application " + applicationId);
- return;
- }
- CSQueue queue = (CSQueue) application.getQueue();
- if (!(queue instanceof LeafQueue)) {
- LOG.error("Cannot finish application " + "from non-leaf queue: " + queue
- .getQueueName());
- } else{
- queue.finishApplication(applicationId, application.getUser());
- }
- application.stop(finalState);
- applications.remove(applicationId);
- } finally {
- writeLock.unlock();
- }
- }
-
- private void doneApplicationAttempt(
- ApplicationAttemptId applicationAttemptId,
- RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
- try {
- writeLock.lock();
- LOG.info("Application Attempt " + applicationAttemptId + " is done."
- + " finalState=" + rmAppAttemptFinalState);
-
- FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
- SchedulerApplication application = applications.get(
- applicationAttemptId.getApplicationId());
-
- if (application == null || attempt == null) {
- LOG.info(
- "Unknown application " + applicationAttemptId + " has completed!");
- return;
- }
-
- // Release all the allocated, acquired, running containers
- for (RMContainer rmContainer : attempt.getLiveContainers()) {
- if (keepContainers && rmContainer.getState().equals(
- RMContainerState.RUNNING)) {
- // do not kill the running container in the case of work-preserving AM
- // restart.
- LOG.info("Skip killing " + rmContainer.getContainerId());
- continue;
- }
- super.completedContainer(rmContainer, SchedulerUtils
- .createAbnormalContainerStatus(rmContainer.getContainerId(),
- SchedulerUtils.COMPLETED_APPLICATION),
- RMContainerEventType.KILL);
- }
-
- // Release all reserved containers
- for (RMContainer rmContainer : attempt.getReservedContainers()) {
- super.completedContainer(rmContainer, SchedulerUtils
- .createAbnormalContainerStatus(rmContainer.getContainerId(),
- "Application Complete"), RMContainerEventType.KILL);
- }
-
- // Clean up pending requests, metrics etc.
- attempt.stop(rmAppAttemptFinalState);
-
- // Inform the queue
- String queueName = attempt.getQueue().getQueueName();
- CSQueue queue = this.getQueue(queueName);
- if (!(queue instanceof LeafQueue)) {
- LOG.error(
- "Cannot finish application " + "from non-leaf queue: " + queueName);
- } else{
- queue.finishApplicationAttempt(attempt, queue.getQueueName());
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- /**
- * Normalize a list of SchedulingRequest.
- *
- * @param asks scheduling request
- */
- private void normalizeSchedulingRequests(List asks) {
- if (asks == null) {
- return;
- }
- for (SchedulingRequest ask: asks) {
- ResourceSizing sizing = ask.getResourceSizing();
- if (sizing != null && sizing.getResources() != null) {
- sizing.setResources(getNormalizedResource(sizing.getResources()));
- }
- }
- }
-
- @Override
- @Lock(Lock.NoLock.class)
- public Allocation allocate(ApplicationAttemptId applicationAttemptId,
- List ask, List schedulingRequests,
- List release, List blacklistAdditions,
- List blacklistRemovals, ContainerUpdates updateRequests) {
- FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
- if (application == null) {
- LOG.error("Calling allocate on removed or non existent application " +
- applicationAttemptId.getApplicationId());
- return EMPTY_ALLOCATION;
- }
-
- // The allocate may be the leftover from previous attempt, and it will
- // impact current attempt, such as confuse the request and allocation for
- // current attempt's AM container.
- // Note outside precondition check for the attempt id may be
- // outdated here, so double check it here is necessary.
- if (!application.getApplicationAttemptId().equals(applicationAttemptId)) {
- LOG.error("Calling allocate on previous or removed " +
- "or non existent application attempt " + applicationAttemptId);
- return EMPTY_ALLOCATION;
- }
-
- // Handle all container updates
- handleContainerUpdates(application, updateRequests);
-
- // Release containers
- releaseContainers(release, application);
-
- LeafQueue updateDemandForQueue = null;
-
- // Sanity check for new allocation requests
- normalizeResourceRequests(ask);
-
- // Normalize scheduling requests
- normalizeSchedulingRequests(schedulingRequests);
-
- Allocation allocation;
-
- // make sure we aren't stopping/removing the application
- // when the allocate comes in
- try {
- application.getWriteLock().lock();
- if (application.isStopped()) {
- return EMPTY_ALLOCATION;
- }
-
- // Process resource requests
- if (!ask.isEmpty() || (schedulingRequests != null && !schedulingRequests
- .isEmpty())) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "allocate: pre-update " + applicationAttemptId + " ask size ="
- + ask.size());
- application.showRequests();
+ dest.submitApplication(appId, user, destQueueName);
+ } catch (AccessControlException e) {
+ throw new YarnException(e);
+ }
+
+ FiCaSchedulerApp app = application.getCurrentAppAttempt();
+ if (app != null) {
+ // Move all live containers even when stopped.
+ // For transferStateFromPreviousAttempt required
+ for (RMContainer rmContainer : app.getLiveContainers()) {
+ source.detachContainer(getClusterResource(), app, rmContainer);
+ // attach the Container to another queue
+ dest.attachContainer(getClusterResource(), app, rmContainer);
+ }
+ if (!app.isStopped()) {
+ source.finishApplicationAttempt(app, sourceQueueName);
+ // Submit to a new queue
+ dest.submitApplicationAttempt(app, user);
+ }
+ // Finish app & update metrics
+ app.move(dest);
+ }
+ source.appFinished();
+ // Detach the application..
+ source.getParent().finishApplication(appId, user);
+ application.setQueue(dest);
+ LOG.info("App: " + appId + " successfully moved from " + sourceQueueName
+ + " to: " + destQueueName);
+ return targetQueueName;
+ }
+
+ @Override
+ public synchronized void preValidateMoveApplication(ApplicationId appId,
+ String newQueue) throws YarnException {
+ SchedulerApplication application =
+ applications.get(appId);
+ if (application == null) {
+ throw new YarnException("App to be moved " + appId + " not found.");
+ }
+ String sourceQueueName = application.getQueue().getQueueName();
+ this.queueManager.getAndCheckLeafQueue(sourceQueueName);
+ String destQueueName = handleMoveToPlanQueue(newQueue);
+ LeafQueue dest = this.queueManager.getAndCheckLeafQueue(destQueueName);
+ // Validation check - ACLs, submission limits for user & queue
+ String user = application.getUser();
+ // Check active partition only when attempt is available
+ FiCaSchedulerApp appAttempt =
+ getApplicationAttempt(ApplicationAttemptId.newInstance(appId, 0));
+ if (null != appAttempt) {
+ checkQueuePartition(appAttempt, dest);
+ }
+ try {
+ dest.validateSubmitApplication(appId, user, destQueueName);
+ } catch (AccessControlException e) {
+ throw new YarnException(e);
+ }
+ }
+
+ /**
+ * Check application can be moved to queue with labels enabled. All labels in
+ * application life time will be checked
+ *
+ * @param app
+ * @param dest
+ * @throws YarnException
+ */
+ private void checkQueuePartition(FiCaSchedulerApp app, LeafQueue dest)
+ throws YarnException {
+ if (!YarnConfiguration.areNodeLabelsEnabled(conf)) {
+ return;
+ }
+ Set targetqueuelabels = dest.getAccessibleNodeLabels();
+ AppSchedulingInfo schedulingInfo = app.getAppSchedulingInfo();
+ Set appLabelexpressions = schedulingInfo.getRequestedPartitions();
+ // default partition access always available remove empty label
+ appLabelexpressions.remove(RMNodeLabelsManager.NO_LABEL);
+ Set nonAccessiblelabels = new HashSet();
+ for (String label : appLabelexpressions) {
+ if (!SchedulerUtils.checkQueueLabelExpression(targetqueuelabels, label,
+ null)) {
+ nonAccessiblelabels.add(label);
+ }
+ }
+ if (nonAccessiblelabels.size() > 0) {
+ throw new YarnException(
+ "Specified queue=" + dest.getQueueName() + " can't satisfy following "
+ + "apps label expressions =" + nonAccessiblelabels
+ + " accessible node labels =" + targetqueuelabels);
+ }
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public EnumSet getSchedulingResourceTypes() {
+ if (calculator.getClass().getName()
+ .equals(DefaultResourceCalculator.class.getName())) {
+ return EnumSet.of(SchedulerResourceTypes.MEMORY);
}
+ return EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU);
+ }
- // Update application requests
- if (application.updateResourceRequests(ask) || application
- .updateSchedulingRequests(schedulingRequests)) {
- updateDemandForQueue = (LeafQueue) application.getQueue();
+ @Override
+ public Resource getMaximumResourceCapability(String queueName) {
+ CSQueue queue = getQueue(queueName);
+ if (queue == null) {
+ LOG.error("Unknown queue: " + queueName);
+ return getMaximumResourceCapability();
+ }
+ if (!(queue instanceof LeafQueue)) {
+ LOG.error("queue " + queueName + " is not an leaf queue");
+ return getMaximumResourceCapability();
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("allocate: post-update");
- application.showRequests();
- }
- }
-
- application.updateBlacklist(blacklistAdditions, blacklistRemovals);
-
- allocation = application.getAllocation(getResourceCalculator(),
- getClusterResource(), getMinimumResourceCapability());
- } finally {
- application.getWriteLock().unlock();
- }
-
- if (updateDemandForQueue != null && !application
- .isWaitingForAMContainer()) {
- updateDemandForQueue.getOrderingPolicy().demandUpdated(application);
- }
-
- if (LOG.isDebugEnabled()) {
- LOG.info("Allocation for application " + applicationAttemptId + " : "
- + allocation + " with cluster resource : " + getClusterResource());
- }
- return allocation;
- }
-
- @Override
- @Lock(Lock.NoLock.class)
- public QueueInfo getQueueInfo(String queueName,
- boolean includeChildQueues, boolean recursive)
- throws IOException {
- CSQueue queue = null;
- queue = this.getQueue(queueName);
- if (queue == null) {
- throw new IOException("Unknown queue: " + queueName);
- }
- return queue.getQueueInfo(includeChildQueues, recursive);
- }
-
- @Override
- @Lock(Lock.NoLock.class)
- public List getQueueUserAclInfo() {
- UserGroupInformation user = null;
- try {
- user = UserGroupInformation.getCurrentUser();
- } catch (IOException ioe) {
- // should never happen
- return new ArrayList();
- }
-
- return getRootQueue().getQueueUserAclInfo(user);
- }
-
- @Override
- protected void nodeUpdate(RMNode rmNode) {
- long begin = System.nanoTime();
- try {
- readLock.lock();
- setLastNodeUpdateTime(Time.now());
- super.nodeUpdate(rmNode);
- } finally {
- readLock.unlock();
- }
-
- // Try to do scheduling
- if (!scheduleAsynchronously) {
- try {
- writeLock.lock();
- ActivitiesLogger.NODE.startNodeUpdateRecording(activitiesManager,
- rmNode.getNodeID());
-
- // reset allocation and reservation stats before we start doing any
- // work
- updateSchedulerHealth(lastNodeUpdateTime, rmNode.getNodeID(),
- CSAssignment.NULL_ASSIGNMENT);
-
- allocateContainersToNode(rmNode.getNodeID(), true);
- ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager,
- rmNode.getNodeID());
- } finally {
- writeLock.unlock();
- }
- }
-
- long latency = System.nanoTime() - begin;
- CapacitySchedulerMetrics.getMetrics().addNodeUpdate(latency);
- }
-
- /**
- * Process resource update on a node.
- */
- private void updateNodeAndQueueResource(RMNode nm,
- ResourceOption resourceOption) {
- try {
- writeLock.lock();
- updateNodeResource(nm, resourceOption);
- Resource clusterResource = getClusterResource();
- getRootQueue().updateClusterResource(clusterResource,
- new ResourceLimits(clusterResource));
- } finally {
- writeLock.unlock();
- }
- }
-
- /**
- * Process node labels update on a node.
- */
- private void updateLabelsOnNode(NodeId nodeId,
- Set newLabels) {
- FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
- if (null == node) {
- return;
- }
-
- // Get new partition, we have only one partition per node
- String newPartition;
- if (newLabels.isEmpty()) {
- newPartition = RMNodeLabelsManager.NO_LABEL;
- } else{
- newPartition = newLabels.iterator().next();
- }
-
- // old partition as well
- String oldPartition = node.getPartition();
-
- // Update resources of these containers
- for (RMContainer rmContainer : node.getCopiedListOfRunningContainers()) {
- FiCaSchedulerApp application = getApplicationAttempt(
- rmContainer.getApplicationAttemptId());
- if (null != application) {
- application.nodePartitionUpdated(rmContainer, oldPartition,
- newPartition);
- } else{
- LOG.warn("There's something wrong, some RMContainers running on"
- + " a node, but we cannot find SchedulerApplicationAttempt "
- + "for it. Node=" + node.getNodeID() + " applicationAttemptId="
- + rmContainer.getApplicationAttemptId());
- continue;
- }
- }
-
- // Unreserve container on this node
- RMContainer reservedContainer = node.getReservedContainer();
- if (null != reservedContainer) {
- killReservedContainer(reservedContainer);
- }
-
- // Update node labels after we've done this
- node.updateLabels(newLabels);
- }
-
- private void updateSchedulerHealth(long now, NodeId nodeId,
- CSAssignment assignment) {
- List allocations =
- assignment.getAssignmentInformation().getAllocationDetails();
- List reservations =
- assignment.getAssignmentInformation().getReservationDetails();
- if (!allocations.isEmpty()) {
- ContainerId allocatedContainerId =
- allocations.get(allocations.size() - 1).containerId;
- String allocatedQueue = allocations.get(allocations.size() - 1).queue;
- schedulerHealth.updateAllocation(now, nodeId, allocatedContainerId,
- allocatedQueue);
- }
- if (!reservations.isEmpty()) {
- ContainerId reservedContainerId =
- reservations.get(reservations.size() - 1).containerId;
- String reservedQueue = reservations.get(reservations.size() - 1).queue;
- schedulerHealth.updateReservation(now, nodeId, reservedContainerId,
- reservedQueue);
- }
- schedulerHealth.updateSchedulerReservationCounts(assignment
- .getAssignmentInformation().getNumReservations());
- schedulerHealth.updateSchedulerAllocationCounts(assignment
- .getAssignmentInformation().getNumAllocations());
- schedulerHealth.updateSchedulerRunDetails(now, assignment
- .getAssignmentInformation().getAllocated(), assignment
- .getAssignmentInformation().getReserved());
- }
-
- private boolean canAllocateMore(CSAssignment assignment, int offswitchCount,
- int assignedContainers) {
- // Current assignment shouldn't be empty
- if (assignment == null
- || Resources.equals(assignment.getResource(), Resources.none())) {
- return false;
- }
-
- // offswitch assignment should be under threshold
- if (offswitchCount >= offswitchPerHeartbeatLimit) {
- return false;
- }
-
- // And it should not be a reserved container
- if (assignment.getAssignmentInformation().getNumReservations() > 0) {
- return false;
- }
-
- // assignMultipleEnabled should be ON,
- // and assignedContainers should be under threshold
- return assignMultipleEnabled
- && (maxAssignPerHeartbeat == -1
- || assignedContainers < maxAssignPerHeartbeat);
- }
-
- /**
- * We need to make sure when doing allocation, Node should be existed
- * And we will construct a {@link CandidateNodeSet} before proceeding
- */
- private void allocateContainersToNode(NodeId nodeId,
- boolean withNodeHeartbeat) {
- FiCaSchedulerNode node = getNode(nodeId);
- if (null != node) {
- int offswitchCount = 0;
- int assignedContainers = 0;
-
- CandidateNodeSet candidates =
- new SimpleCandidateNodeSet<>(node);
- CSAssignment assignment = allocateContainersToNode(candidates,
- withNodeHeartbeat);
- // Only check if we can allocate more container on the same node when
- // scheduling is triggered by node heartbeat
- if (null != assignment && withNodeHeartbeat) {
- if (assignment.getType() == NodeType.OFF_SWITCH) {
- offswitchCount++;
+ // queue.getMaxAllocation returns *configured* maximum allocation.
+ // getMaximumResourceCapability() returns maximum allocation considers
+ // per-node maximum resources. So return (component-wise) min of the two.
+
+ Resource queueMaxAllocation = ((LeafQueue) queue).getMaximumAllocation();
+ Resource clusterMaxAllocationConsiderNodeMax =
+ getMaximumResourceCapability();
+
+ return Resources.componentwiseMin(queueMaxAllocation,
+ clusterMaxAllocationConsiderNodeMax);
+ }
+
+ private String handleMoveToPlanQueue(String targetQueueName) {
+ CSQueue dest = getQueue(targetQueueName);
+ if (dest != null && dest instanceof PlanQueue) {
+ // use the default child reservation queue of the plan
+ targetQueueName = targetQueueName + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
}
+ return targetQueueName;
+ }
- if (Resources.greaterThan(calculator, getClusterResource(),
- assignment.getResource(), Resources.none())) {
- assignedContainers++;
+ @Override
+ public Set getPlanQueues() {
+ Set ret = new HashSet();
+ for (Map.Entry l : queueManager.getQueues().entrySet()) {
+ if (l.getValue() instanceof PlanQueue) {
+ ret.add(l.getKey());
+ }
}
+ return ret;
+ }
- while (canAllocateMore(assignment, offswitchCount,
- assignedContainers)) {
- // Try to see if it is possible to allocate multiple container for
- // the same node heartbeat
- assignment = allocateContainersToNode(candidates, true);
+ @Override
+ public Priority checkAndGetApplicationPriority(
+ Priority priorityRequestedByApp, UserGroupInformation user,
+ String queueName, ApplicationId applicationId) throws YarnException {
+ Priority appPriority = priorityRequestedByApp;
- if (null != assignment
- && assignment.getType() == NodeType.OFF_SWITCH) {
- offswitchCount++;
- }
+ // Verify the scenario where priority is null from submissionContext.
+ if (null == appPriority) {
+ // Verify whether submitted user has any default priority set. If so,
+ // user's default priority will get precedence over queue default.
+ // for updateApplicationPriority call flow, this check is done in
+ // CientRMService itself.
+ appPriority = this.appPriorityACLManager.getDefaultPriority(queueName,
+ user);
+
+ // Get the default priority for the Queue. If Queue is non-existent,
+ // then
+ // use default priority. Do it only if user doesn't have any default.
+ if (null == appPriority) {
+ appPriority = this.queueManager.getDefaultPriorityForQueue(queueName);
+ }
- if (null != assignment
- && Resources.greaterThan(calculator, getClusterResource(),
- assignment.getResource(), Resources.none())) {
- assignedContainers++;
- }
+ LOG.info(
+ "Application '" + applicationId + "' is submitted without priority "
+ + "hence considering default queue/cluster priority: "
+ + appPriority.getPriority());
}
- if (offswitchCount >= offswitchPerHeartbeatLimit) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Assigned maximum number of off-switch containers: "
- + offswitchCount + ", assignments so far: " + assignment);
- }
- }
- }
- }
- }
-
- /*
- * Logics of allocate container on a single node (Old behavior)
- */
- private CSAssignment allocateContainerOnSingleNode(
- CandidateNodeSet candidates, FiCaSchedulerNode node,
- boolean withNodeHeartbeat) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "Trying to schedule on node: " + node.getNodeName() + ", available: "
- + node.getUnallocatedResource());
- }
-
- // Backward compatible way to make sure previous behavior which allocation
- // driven by node heartbeat works.
- if (getNode(node.getNodeID()) != node) {
- LOG.error("Trying to schedule on a removed node, please double check, "
- + "nodeId=" + node.getNodeID());
- return null;
- }
-
- CSAssignment assignment;
-
- // Assign new containers...
- // 1. Check for reserved applications
- // 2. Schedule if there are no reservations
- RMContainer reservedContainer = node.getReservedContainer();
- if (reservedContainer != null) {
- FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer(
- reservedContainer.getContainerId());
- if (reservedApplication == null) {
- LOG.error(
- "Trying to schedule for a finished app, please double check. nodeId="
- + node.getNodeID() + " container=" + reservedContainer
- .getContainerId());
- return null;
- }
-
- // Try to fulfill the reservation
- if (LOG.isDebugEnabled()) {
- LOG.debug("Trying to fulfill reservation for application "
- + reservedApplication.getApplicationId() + " on node: " + node
- .getNodeID());
- }
-
- LeafQueue queue = ((LeafQueue) reservedApplication.getQueue());
- assignment = queue.assignContainers(getClusterResource(), candidates,
- // TODO, now we only consider limits for parent for non-labeled
- // resources, should consider labeled resources as well.
- new ResourceLimits(labelManager
- .getResourceByLabel(RMNodeLabelsManager.NO_LABEL,
- getClusterResource())),
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
-
- if (assignment.isFulfilledReservation()) {
- if (withNodeHeartbeat) {
- // Only update SchedulerHealth in sync scheduling, existing
- // Data structure of SchedulerHealth need to be updated for
- // Async mode
- updateSchedulerHealth(lastNodeUpdateTime, node.getNodeID(),
- assignment);
- }
-
- schedulerHealth.updateSchedulerFulfilledReservationCounts(1);
-
- ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
- queue.getParent().getQueueName(), queue.getQueueName(),
- ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
- ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
- node, reservedContainer.getContainerId(),
- AllocationState.ALLOCATED_FROM_RESERVED);
- } else{
- ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
- queue.getParent().getQueueName(), queue.getQueueName(),
- ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
- ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
- node, reservedContainer.getContainerId(), AllocationState.SKIPPED);
- }
-
- assignment.setSchedulingMode(
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
- submitResourceCommitRequest(getClusterResource(), assignment);
- }
-
- // Do not schedule if there are any reservations to fulfill on the node
- if (node.getReservedContainer() != null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Skipping scheduling since node " + node.getNodeID()
- + " is reserved by application " + node.getReservedContainer()
- .getContainerId().getApplicationAttemptId());
- }
- return null;
- }
-
- // First check if we can schedule
- // When this time look at one node only, try schedule if the node
- // has any available or killable resource
- if (calculator.computeAvailableContainers(Resources
- .add(node.getUnallocatedResource(), node.getTotalKillableResources()),
- minimumAllocation) <= 0) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("This node or this node partition doesn't have available or"
- + "killable resource");
- }
- return null;
- }
-
- return allocateOrReserveNewContainers(candidates, withNodeHeartbeat);
- }
-
- private CSAssignment allocateOrReserveNewContainers(
- CandidateNodeSet candidates,
- boolean withNodeHeartbeat) {
- CSAssignment assignment = getRootQueue().assignContainers(
- getClusterResource(), candidates, new ResourceLimits(labelManager
- .getResourceByLabel(candidates.getPartition(),
- getClusterResource())),
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
-
- assignment.setSchedulingMode(SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
- submitResourceCommitRequest(getClusterResource(), assignment);
-
- if (Resources.greaterThan(calculator, getClusterResource(),
- assignment.getResource(), Resources.none())) {
- if (withNodeHeartbeat) {
- updateSchedulerHealth(lastNodeUpdateTime,
- CandidateNodeSetUtils.getSingleNode(candidates).getNodeID(),
- assignment);
- }
- return assignment;
- }
-
- // Only do non-exclusive allocation when node has node-labels.
- if (StringUtils.equals(candidates.getPartition(),
- RMNodeLabelsManager.NO_LABEL)) {
- return null;
- }
-
- // Only do non-exclusive allocation when the node-label supports that
- try {
- if (rmContext.getNodeLabelManager().isExclusiveNodeLabel(
- candidates.getPartition())) {
- return null;
- }
- } catch (IOException e) {
- LOG.warn(
- "Exception when trying to get exclusivity of node label=" + candidates
- .getPartition(), e);
- return null;
- }
-
- // Try to use NON_EXCLUSIVE
- assignment = getRootQueue().assignContainers(getClusterResource(),
- candidates,
- // TODO, now we only consider limits for parent for non-labeled
- // resources, should consider labeled resources as well.
- new ResourceLimits(labelManager
- .getResourceByLabel(RMNodeLabelsManager.NO_LABEL,
- getClusterResource())),
- SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
- assignment.setSchedulingMode(SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
- submitResourceCommitRequest(getClusterResource(), assignment);
-
- return assignment;
- }
-
- /*
- * New behavior, allocate containers considering multiple nodes
- */
- private CSAssignment allocateContainersOnMultiNodes(
- CandidateNodeSet candidates) {
- // When this time look at multiple nodes, try schedule if the
- // partition has any available resource or killable resource
- if (getRootQueue().getQueueCapacities().getUsedCapacity(
- candidates.getPartition()) >= 1.0f
- && preemptionManager.getKillableResource(
- CapacitySchedulerConfiguration.ROOT, candidates.getPartition())
- == Resources.none()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("This node or this node partition doesn't have available or"
- + "killable resource");
- }
- return null;
- }
-
- return allocateOrReserveNewContainers(candidates, false);
- }
-
- @VisibleForTesting
- CSAssignment allocateContainersToNode(
- CandidateNodeSet candidates,
- boolean withNodeHeartbeat) {
- if (rmContext.isWorkPreservingRecoveryEnabled() && !rmContext
- .isSchedulerReadyForAllocatingContainers()) {
- return null;
- }
-
- long startTime = System.nanoTime();
-
- // Backward compatible way to make sure previous behavior which allocation
- // driven by node heartbeat works.
- FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates);
-
- // We have two different logics to handle allocation on single node / multi
- // nodes.
- CSAssignment assignment;
- if (null != node) {
- assignment = allocateContainerOnSingleNode(candidates,
- node, withNodeHeartbeat);
- } else{
- assignment = allocateContainersOnMultiNodes(candidates);
- }
-
- if (assignment != null && assignment.getAssignmentInformation() != null
- && assignment.getAssignmentInformation().getNumAllocations() > 0) {
- long allocateTime = System.nanoTime() - startTime;
- CapacitySchedulerMetrics.getMetrics().addAllocate(allocateTime);
- }
- return assignment;
- }
-
- @Override
- public void handle(SchedulerEvent event) {
- switch(event.getType()) {
- case NODE_ADDED:
- {
- NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
- addNode(nodeAddedEvent.getAddedRMNode());
- recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
- nodeAddedEvent.getAddedRMNode());
- }
- break;
- case NODE_REMOVED:
- {
- NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
- removeNode(nodeRemovedEvent.getRemovedRMNode());
- }
- break;
- case NODE_RESOURCE_UPDATE:
- {
- NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent =
- (NodeResourceUpdateSchedulerEvent)event;
- updateNodeAndQueueResource(nodeResourceUpdatedEvent.getRMNode(),
- nodeResourceUpdatedEvent.getResourceOption());
- }
- break;
- case NODE_LABELS_UPDATE:
- {
- NodeLabelsUpdateSchedulerEvent labelUpdateEvent =
- (NodeLabelsUpdateSchedulerEvent) event;
-
- updateNodeLabelsAndQueueResource(labelUpdateEvent);
- }
- break;
- case NODE_UPDATE:
- {
- NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
- nodeUpdate(nodeUpdatedEvent.getRMNode());
- }
- break;
- case APP_ADDED:
- {
- AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
- String queueName = resolveReservationQueueName(appAddedEvent.getQueue(),
- appAddedEvent.getApplicationId(), appAddedEvent.getReservationID(),
- appAddedEvent.getIsAppRecovering());
- if (queueName != null) {
- if (!appAddedEvent.getIsAppRecovering()) {
- addApplication(appAddedEvent.getApplicationId(), queueName,
- appAddedEvent.getUser(), appAddedEvent.getApplicatonPriority(),
- appAddedEvent.getPlacementContext());
- } else {
- addApplicationOnRecovery(appAddedEvent.getApplicationId(), queueName,
- appAddedEvent.getUser(), appAddedEvent.getApplicatonPriority(),
- appAddedEvent.getPlacementContext());
- }
- }
- }
- break;
- case APP_REMOVED:
- {
- AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
- doneApplication(appRemovedEvent.getApplicationID(),
- appRemovedEvent.getFinalState());
- }
- break;
- case APP_ATTEMPT_ADDED:
- {
- AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
- (AppAttemptAddedSchedulerEvent) event;
- addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
- appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
- appAttemptAddedEvent.getIsAttemptRecovering());
- }
- break;
- case APP_ATTEMPT_REMOVED:
- {
- AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
- (AppAttemptRemovedSchedulerEvent) event;
- doneApplicationAttempt(appAttemptRemovedEvent.getApplicationAttemptID(),
- appAttemptRemovedEvent.getFinalAttemptState(),
- appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
- }
- break;
- case CONTAINER_EXPIRED:
- {
- ContainerExpiredSchedulerEvent containerExpiredEvent =
- (ContainerExpiredSchedulerEvent) event;
- ContainerId containerId = containerExpiredEvent.getContainerId();
- if (containerExpiredEvent.isIncrease()) {
- rollbackContainerUpdate(containerId);
- } else {
- completedContainer(getRMContainer(containerId),
- SchedulerUtils.createAbnormalContainerStatus(
- containerId,
- SchedulerUtils.EXPIRED_CONTAINER),
- RMContainerEventType.EXPIRE);
- }
- }
- break;
- case RELEASE_CONTAINER:
- {
- RMContainer container = ((ReleaseContainerEvent) event).getContainer();
- completedContainer(container,
- SchedulerUtils.createAbnormalContainerStatus(
- container.getContainerId(),
- SchedulerUtils.RELEASED_CONTAINER),
- RMContainerEventType.RELEASED);
- }
- break;
- case KILL_RESERVED_CONTAINER:
- {
- ContainerPreemptEvent killReservedContainerEvent =
- (ContainerPreemptEvent) event;
- RMContainer container = killReservedContainerEvent.getContainer();
- killReservedContainer(container);
- }
- break;
- case MARK_CONTAINER_FOR_PREEMPTION:
- {
- ContainerPreemptEvent preemptContainerEvent =
- (ContainerPreemptEvent)event;
- ApplicationAttemptId aid = preemptContainerEvent.getAppId();
- RMContainer containerToBePreempted = preemptContainerEvent.getContainer();
- markContainerForPreemption(aid, containerToBePreempted);
- }
- break;
- case MARK_CONTAINER_FOR_KILLABLE:
- {
- ContainerPreemptEvent containerKillableEvent = (ContainerPreemptEvent)event;
- RMContainer killableContainer = containerKillableEvent.getContainer();
- markContainerForKillable(killableContainer);
- }
- break;
- case MARK_CONTAINER_FOR_NONKILLABLE:
- {
- if (isLazyPreemptionEnabled) {
- ContainerPreemptEvent cancelKillContainerEvent =
- (ContainerPreemptEvent) event;
- markContainerForNonKillable(cancelKillContainerEvent.getContainer());
- }
- }
- break;
- case MANAGE_QUEUE:
- {
- QueueManagementChangeEvent queueManagementChangeEvent =
- (QueueManagementChangeEvent) event;
- ParentQueue parentQueue = queueManagementChangeEvent.getParentQueue();
- try {
- final List queueManagementChanges =
- queueManagementChangeEvent.getQueueManagementChanges();
- ((ManagedParentQueue) parentQueue)
- .validateAndApplyQueueManagementChanges(queueManagementChanges);
- } catch (SchedulerDynamicEditException sde) {
- LOG.error("Queue Management Change event cannot be applied for "
- + "parent queue : " + parentQueue.getQueueName(), sde);
- } catch (IOException ioe) {
- LOG.error("Queue Management Change event cannot be applied for "
- + "parent queue : " + parentQueue.getQueueName(), ioe);
- }
- }
- break;
- default:
- LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!");
- }
- }
-
- /**
- * Process node labels update.
- */
- private void updateNodeLabelsAndQueueResource(
- NodeLabelsUpdateSchedulerEvent labelUpdateEvent) {
- try {
- writeLock.lock();
- for (Entry> entry : labelUpdateEvent
- .getUpdatedNodeToLabels().entrySet()) {
- NodeId id = entry.getKey();
- Set labels = entry.getValue();
- updateLabelsOnNode(id, labels);
- }
- Resource clusterResource = getClusterResource();
- getRootQueue().updateClusterResource(clusterResource,
- new ResourceLimits(clusterResource));
- } finally {
- writeLock.unlock();
- }
- }
-
- private void addNode(RMNode nodeManager) {
- try {
- writeLock.lock();
- FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager,
- usePortForNodeName, nodeManager.getNodeLabels());
- nodeTracker.addNode(schedulerNode);
-
- // update this node to node label manager
- if (labelManager != null) {
- labelManager.activateNode(nodeManager.getNodeID(),
- schedulerNode.getTotalResource());
- }
-
- Resource clusterResource = getClusterResource();
- getRootQueue().updateClusterResource(clusterResource,
- new ResourceLimits(clusterResource));
-
- LOG.info(
- "Added node " + nodeManager.getNodeAddress() + " clusterResource: "
- + clusterResource);
-
- if (scheduleAsynchronously && getNumClusterNodes() == 1) {
- for (AsyncScheduleThread t : asyncSchedulerThreads) {
- t.beginSchedule();
- }
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- private void removeNode(RMNode nodeInfo) {
- try {
- writeLock.lock();
- // update this node to node label manager
- if (labelManager != null) {
- labelManager.deactivateNode(nodeInfo.getNodeID());
- }
-
- NodeId nodeId = nodeInfo.getNodeID();
- FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
- if (node == null) {
- LOG.error("Attempting to remove non-existent node " + nodeId);
- return;
- }
-
- // Remove running containers
- List runningContainers =
- node.getCopiedListOfRunningContainers();
- for (RMContainer container : runningContainers) {
- super.completedContainer(container, SchedulerUtils
- .createAbnormalContainerStatus(container.getContainerId(),
- SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
- }
-
- // Remove reservations, if any
- RMContainer reservedContainer = node.getReservedContainer();
- if (reservedContainer != null) {
- super.completedContainer(reservedContainer, SchedulerUtils
- .createAbnormalContainerStatus(reservedContainer.getContainerId(),
- SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
- }
-
- nodeTracker.removeNode(nodeId);
- Resource clusterResource = getClusterResource();
- getRootQueue().updateClusterResource(clusterResource,
- new ResourceLimits(clusterResource));
- int numNodes = nodeTracker.nodeCount();
-
- if (scheduleAsynchronously && numNodes == 0) {
- for (AsyncScheduleThread t : asyncSchedulerThreads) {
- t.suspendSchedule();
- }
- }
-
- LOG.info(
- "Removed node " + nodeInfo.getNodeAddress() + " clusterResource: "
- + getClusterResource());
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- protected void completedContainerInternal(
- RMContainer rmContainer, ContainerStatus containerStatus,
- RMContainerEventType event) {
- Container container = rmContainer.getContainer();
- ContainerId containerId = container.getId();
-
- // Get the application for the finished container
- FiCaSchedulerApp application = getCurrentAttemptForContainer(
- container.getId());
- ApplicationId appId =
- containerId.getApplicationAttemptId().getApplicationId();
- if (application == null) {
- LOG.info(
- "Container " + container + " of" + " finished application " + appId
- + " completed with event " + event);
- return;
- }
-
- // Get the node on which the container was allocated
- FiCaSchedulerNode node = getNode(container.getNodeId());
- if (null == node) {
- LOG.info("Container " + container + " of" + " removed node " + container
- .getNodeId() + " completed with event " + event);
- return;
- }
-
- // Inform the queue
- LeafQueue queue = (LeafQueue) application.getQueue();
- queue.completedContainer(getClusterResource(), application, node,
- rmContainer, containerStatus, event, null, true);
- if (ContainerExitStatus.PREEMPTED == containerStatus.getExitStatus()) {
- updateQueuePreemptionMetrics(queue, rmContainer);
- }
- }
-
- private void updateQueuePreemptionMetrics(
- CSQueue queue, RMContainer rmc) {
- QueueMetrics qMetrics = queue.getMetrics();
- long usedMillis = rmc.getFinishTime() - rmc.getCreationTime();
- Resource containerResource = rmc.getAllocatedResource();
- qMetrics.preemptContainer();
- long mbSeconds = (containerResource.getMemorySize() * usedMillis)
- / DateUtils.MILLIS_PER_SECOND;
- long vcSeconds = (containerResource.getVirtualCores() * usedMillis)
- / DateUtils.MILLIS_PER_SECOND;
- qMetrics.updatePreemptedMemoryMBSeconds(mbSeconds);
- qMetrics.updatePreemptedVcoreSeconds(vcSeconds);
- }
-
- @Lock(Lock.NoLock.class)
- @VisibleForTesting
- @Override
- public FiCaSchedulerApp getApplicationAttempt(
- ApplicationAttemptId applicationAttemptId) {
- return super.getApplicationAttempt(applicationAttemptId);
- }
-
- @Lock(Lock.NoLock.class)
- public FiCaSchedulerNode getNode(NodeId nodeId) {
- return nodeTracker.getNode(nodeId);
- }
-
- @Lock(Lock.NoLock.class)
- public List getAllNodes() {
- return nodeTracker.getAllNodes();
- }
-
- @Override
- @Lock(Lock.NoLock.class)
- public void recover(RMState state) throws Exception {
- // NOT IMPLEMENTED
- }
-
- @Override
- public void killReservedContainer(RMContainer container) {
- if(LOG.isDebugEnabled()){
- LOG.debug(SchedulerEventType.KILL_RESERVED_CONTAINER + ":"
- + container.toString());
- }
- // To think: What happens if this is no longer a reserved container, for
- // e.g if the reservation became an allocation.
- super.completedContainer(container,
- SchedulerUtils.createAbnormalContainerStatus(
- container.getContainerId(),
- SchedulerUtils.UNRESERVED_CONTAINER),
- RMContainerEventType.KILL);
- }
-
- @Override
- public void markContainerForPreemption(ApplicationAttemptId aid,
- RMContainer cont) {
- if(LOG.isDebugEnabled()){
- LOG.debug(SchedulerEventType.MARK_CONTAINER_FOR_PREEMPTION
- + ": appAttempt:" + aid.toString() + " container: "
- + cont.toString());
- }
- FiCaSchedulerApp app = getApplicationAttempt(aid);
- if (app != null) {
- app.markContainerForPreemption(cont.getContainerId());
- }
- }
-
- @VisibleForTesting
- @Override
- public void killContainer(RMContainer container) {
- markContainerForKillable(container);
- }
-
- public void markContainerForKillable(
- RMContainer killableContainer) {
- try {
- writeLock.lock();
- if (LOG.isDebugEnabled()) {
- LOG.debug(SchedulerEventType.MARK_CONTAINER_FOR_KILLABLE + ": container"
- + killableContainer.toString());
- }
-
- if (!isLazyPreemptionEnabled) {
- super.completedContainer(killableContainer, SchedulerUtils
- .createPreemptedContainerStatus(killableContainer.getContainerId(),
- SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL);
- } else{
- FiCaSchedulerNode node = (FiCaSchedulerNode) getSchedulerNode(
- killableContainer.getAllocatedNode());
+ // Verify whether submitted priority is lesser than max priority
+ // in the cluster. If it is out of found, defining a max cap.
+ if (appPriority.getPriority() > getMaxClusterLevelAppPriority()
+ .getPriority()) {
+ appPriority = Priority
+ .newInstance(getMaxClusterLevelAppPriority().getPriority());
+ }
- FiCaSchedulerApp application = getCurrentAttemptForContainer(
- killableContainer.getContainerId());
+ // Lets check for ACLs here.
+ if (!appPriorityACLManager.checkAccess(user, queueName, appPriority)) {
+ throw new YarnException(new AccessControlException(
+ "User " + user + " does not have permission to submit/update "
+ + applicationId + " for " + appPriority));
+ }
- node.markContainerToKillable(killableContainer.getContainerId());
+ LOG.info("Priority '" + appPriority.getPriority()
+ + "' is acceptable in queue : " + queueName + " for application: "
+ + applicationId);
- // notify PreemptionManager
- // Get the application for the finished container
- if (null != application) {
- String leafQueueName = application.getCSLeafQueue().getQueueName();
- getPreemptionManager().addKillableContainer(
- new KillableContainer(killableContainer, node.getPartition(),
- leafQueueName));
- }
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- private void markContainerForNonKillable(
- RMContainer nonKillableContainer) {
- try {
- writeLock.lock();
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- SchedulerEventType.MARK_CONTAINER_FOR_NONKILLABLE + ": container"
- + nonKillableContainer.toString());
- }
-
- FiCaSchedulerNode node = (FiCaSchedulerNode) getSchedulerNode(
- nonKillableContainer.getAllocatedNode());
-
- FiCaSchedulerApp application = getCurrentAttemptForContainer(
- nonKillableContainer.getContainerId());
-
- node.markContainerToNonKillable(nonKillableContainer.getContainerId());
-
- // notify PreemptionManager
- // Get the application for the finished container
- if (null != application) {
- String leafQueueName = application.getCSLeafQueue().getQueueName();
- getPreemptionManager().removeKillableContainer(
- new KillableContainer(nonKillableContainer, node.getPartition(),
- leafQueueName));
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public boolean checkAccess(UserGroupInformation callerUGI,
- QueueACL acl, String queueName) {
- CSQueue queue = getQueue(queueName);
- if (queue == null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("ACL not found for queue access-type " + acl + " for queue "
- + queueName);
- }
- return false;
- }
- return queue.hasAccess(acl, callerUGI);
- }
-
- @Override
- public List getAppsInQueue(String queueName) {
- CSQueue queue = getQueue(queueName);
- if (queue == null) {
- return null;
- }
- List apps = new ArrayList();
- queue.collectSchedulerApplications(apps);
- return apps;
- }
-
- public boolean isSystemAppsLimitReached() {
- if (getRootQueue().getNumApplications() < conf
- .getMaximumSystemApplications()) {
- return false;
- }
- return true;
- }
-
- private String getDefaultReservationQueueName(String planQueueName) {
- return planQueueName + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
- }
-
- private String resolveReservationQueueName(String queueName,
- ApplicationId applicationId, ReservationId reservationID,
- boolean isRecovering) {
- try {
- readLock.lock();
- CSQueue queue = getQueue(queueName);
- // Check if the queue is a plan queue
- if ((queue == null) || !(queue instanceof PlanQueue)) {
- return queueName;
- }
- if (reservationID != null) {
- String resQName = reservationID.toString();
- queue = getQueue(resQName);
- if (queue == null) {
- // reservation has terminated during failover
- if (isRecovering && conf.getMoveOnExpiry(
- getQueue(queueName).getQueuePath())) {
- // move to the default child queue of the plan
- return getDefaultReservationQueueName(queueName);
- }
- String message = "Application " + applicationId
- + " submitted to a reservation which is not currently active: "
- + resQName;
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return null;
- }
- if (!queue.getParent().getQueueName().equals(queueName)) {
- String message =
- "Application: " + applicationId + " submitted to a reservation "
- + resQName + " which does not belong to the specified queue: "
- + queueName;
- this.rmContext.getDispatcher().getEventHandler().handle(
- new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
- message));
- return null;
- }
- // use the reservation queue to run the app
- queueName = resQName;
- } else{
- // use the default child queue of the plan for unreserved apps
- queueName = getDefaultReservationQueueName(queueName);
- }
- return queueName;
- } finally {
- readLock.unlock();
- }
-
- }
-
- @Override
- public void removeQueue(String queueName)
- throws SchedulerDynamicEditException {
- try {
- writeLock.lock();
- LOG.info("Removing queue: " + queueName);
- CSQueue q = this.getQueue(queueName);
- if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom(
- q.getClass()))) {
- throw new SchedulerDynamicEditException(
- "The queue that we are asked " + "to remove (" + queueName
- + ") is not a AutoCreatedLeafQueue or ReservationQueue");
- }
- AbstractAutoCreatedLeafQueue disposableLeafQueue =
- (AbstractAutoCreatedLeafQueue) q;
- // at this point we should have no more apps
- if (disposableLeafQueue.getNumApplications() > 0) {
- throw new SchedulerDynamicEditException(
- "The queue " + queueName + " is not empty " + disposableLeafQueue
- .getApplications().size() + " active apps "
- + disposableLeafQueue.getPendingApplications().size()
- + " pending apps");
- }
-
- ((AbstractManagedParentQueue) disposableLeafQueue.getParent())
- .removeChildQueue(q);
- this.queueManager.removeQueue(queueName);
- LOG.info(
- "Removal of AutoCreatedLeafQueue " + queueName + " has succeeded");
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public void addQueue(Queue queue)
- throws SchedulerDynamicEditException, IOException {
- try {
- writeLock.lock();
- if (queue == null) {
- throw new SchedulerDynamicEditException(
- "Queue specified is null. Should be an implementation of "
- + "AbstractAutoCreatedLeafQueue");
- } else if (!(AbstractAutoCreatedLeafQueue.class
- .isAssignableFrom(queue.getClass()))) {
- throw new SchedulerDynamicEditException(
- "Queue is not an implementation of "
- + "AbstractAutoCreatedLeafQueue : " + queue.getClass());
- }
-
- AbstractAutoCreatedLeafQueue newQueue =
- (AbstractAutoCreatedLeafQueue) queue;
-
- if (newQueue.getParent() == null || !(AbstractManagedParentQueue.class.
- isAssignableFrom(newQueue.getParent().getClass()))) {
- throw new SchedulerDynamicEditException(
- "ParentQueue for " + newQueue + " is not properly set"
- + " (should be set and be a PlanQueue or ManagedParentQueue)");
- }
-
- AbstractManagedParentQueue parent =
- (AbstractManagedParentQueue) newQueue.getParent();
- String queuename = newQueue.getQueueName();
- parent.addChildQueue(newQueue);
- this.queueManager.addQueue(queuename, newQueue);
-
- LOG.info("Creation of AutoCreatedLeafQueue " + newQueue + " succeeded");
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public void setEntitlement(String inQueue, QueueEntitlement entitlement)
- throws YarnException {
- try {
- writeLock.lock();
- LeafQueue queue = this.queueManager.getAndCheckLeafQueue(inQueue);
- AbstractManagedParentQueue parent =
- (AbstractManagedParentQueue) queue.getParent();
-
- if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom(
- queue.getClass()))) {
- throw new SchedulerDynamicEditException(
- "Entitlement can not be" + " modified dynamically since queue "
- + inQueue + " is not a AutoCreatedLeafQueue");
- }
-
- if (parent == null || !(AbstractManagedParentQueue.class.isAssignableFrom(
- parent.getClass()))) {
- throw new SchedulerDynamicEditException(
- "The parent of AutoCreatedLeafQueue " + inQueue
- + " must be a PlanQueue/ManagedParentQueue");
- }
-
- AbstractAutoCreatedLeafQueue newQueue =
- (AbstractAutoCreatedLeafQueue) queue;
- parent.validateQueueEntitlementChange(newQueue, entitlement);
-
- newQueue.setEntitlement(entitlement);
-
- LOG.info("Set entitlement for AutoCreatedLeafQueue " + inQueue + " to "
- + queue.getCapacity() + " request was (" + entitlement.getCapacity()
- + ")");
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public String moveApplication(ApplicationId appId,
- String targetQueueName) throws YarnException {
- try {
- writeLock.lock();
- SchedulerApplication application =
- applications.get(appId);
- if (application == null) {
- throw new YarnException("App to be moved " + appId + " not found.");
- }
- String sourceQueueName = application.getQueue().getQueueName();
- LeafQueue source =
- this.queueManager.getAndCheckLeafQueue(sourceQueueName);
- String destQueueName = handleMoveToPlanQueue(targetQueueName);
- LeafQueue dest = this.queueManager.getAndCheckLeafQueue(destQueueName);
-
- String user = application.getUser();
- try {
- dest.submitApplication(appId, user, destQueueName);
- } catch (AccessControlException e) {
- throw new YarnException(e);
- }
-
- FiCaSchedulerApp app = application.getCurrentAppAttempt();
- if (app != null) {
- // Move all live containers even when stopped.
- // For transferStateFromPreviousAttempt required
- for (RMContainer rmContainer : app.getLiveContainers()) {
- source.detachContainer(getClusterResource(), app, rmContainer);
- // attach the Container to another queue
- dest.attachContainer(getClusterResource(), app, rmContainer);
- }
- if (!app.isStopped()) {
- source.finishApplicationAttempt(app, sourceQueueName);
- // Submit to a new queue
- dest.submitApplicationAttempt(app, user);
- }
- // Finish app & update metrics
- app.move(dest);
- }
- source.appFinished();
- // Detach the application..
- source.getParent().finishApplication(appId, user);
- application.setQueue(dest);
- LOG.info("App: " + appId + " successfully moved from " + sourceQueueName
- + " to: " + destQueueName);
- return targetQueueName;
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public void preValidateMoveApplication(ApplicationId appId,
- String newQueue) throws YarnException {
- try {
- writeLock.lock();
- SchedulerApplication application =
- applications.get(appId);
- if (application == null) {
- throw new YarnException("App to be moved " + appId + " not found.");
- }
- String sourceQueueName = application.getQueue().getQueueName();
- this.queueManager.getAndCheckLeafQueue(sourceQueueName);
- String destQueueName = handleMoveToPlanQueue(newQueue);
- LeafQueue dest = this.queueManager.getAndCheckLeafQueue(destQueueName);
- // Validation check - ACLs, submission limits for user & queue
- String user = application.getUser();
- // Check active partition only when attempt is available
- FiCaSchedulerApp appAttempt =
- getApplicationAttempt(ApplicationAttemptId.newInstance(appId, 0));
- if (null != appAttempt) {
- checkQueuePartition(appAttempt, dest);
- }
- try {
- dest.validateSubmitApplication(appId, user, destQueueName);
- } catch (AccessControlException e) {
- throw new YarnException(e);
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- /**
- * Check application can be moved to queue with labels enabled. All labels in
- * application life time will be checked
- *
- * @param app
- * @param dest
- * @throws YarnException
- */
- private void checkQueuePartition(FiCaSchedulerApp app, LeafQueue dest)
- throws YarnException {
- if (!YarnConfiguration.areNodeLabelsEnabled(conf)) {
- return;
- }
- Set targetqueuelabels = dest.getAccessibleNodeLabels();
- AppSchedulingInfo schedulingInfo = app.getAppSchedulingInfo();
- Set appLabelexpressions = schedulingInfo.getRequestedPartitions();
- // default partition access always available remove empty label
- appLabelexpressions.remove(RMNodeLabelsManager.NO_LABEL);
- Set nonAccessiblelabels = new HashSet();
- for (String label : appLabelexpressions) {
- if (!SchedulerUtils.checkQueueLabelExpression(targetqueuelabels, label,
- null)) {
- nonAccessiblelabels.add(label);
- }
- }
- if (nonAccessiblelabels.size() > 0) {
- throw new YarnException(
- "Specified queue=" + dest.getQueueName() + " can't satisfy following "
- + "apps label expressions =" + nonAccessiblelabels
- + " accessible node labels =" + targetqueuelabels);
- }
- }
-
- /** {@inheritDoc} */
- @Override
- public EnumSet getSchedulingResourceTypes() {
- if (calculator.getClass().getName()
- .equals(DefaultResourceCalculator.class.getName())) {
- return EnumSet.of(SchedulerResourceTypes.MEMORY);
- }
- return EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU);
- }
-
- @Override
- public Resource getMaximumResourceCapability(String queueName) {
- CSQueue queue = getQueue(queueName);
- if (queue == null) {
- LOG.error("Unknown queue: " + queueName);
- return getMaximumResourceCapability();
- }
- if (!(queue instanceof LeafQueue)) {
- LOG.error("queue " + queueName + " is not an leaf queue");
- return getMaximumResourceCapability();
- }
-
- // queue.getMaxAllocation returns *configured* maximum allocation.
- // getMaximumResourceCapability() returns maximum allocation considers
- // per-node maximum resources. So return (component-wise) min of the two.
-
- Resource queueMaxAllocation = ((LeafQueue)queue).getMaximumAllocation();
- Resource clusterMaxAllocationConsiderNodeMax =
- getMaximumResourceCapability();
-
- return Resources.componentwiseMin(queueMaxAllocation,
- clusterMaxAllocationConsiderNodeMax);
- }
-
- private String handleMoveToPlanQueue(String targetQueueName) {
- CSQueue dest = getQueue(targetQueueName);
- if (dest != null && dest instanceof PlanQueue) {
- // use the default child reservation queue of the plan
- targetQueueName = targetQueueName + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
- }
- return targetQueueName;
- }
-
- @Override
- public Set getPlanQueues() {
- Set ret = new HashSet();
- for (Map.Entry l : queueManager.getQueues().entrySet()) {
- if (l.getValue() instanceof PlanQueue) {
- ret.add(l.getKey());
- }
- }
- return ret;
- }
-
- @Override
- public Priority checkAndGetApplicationPriority(
- Priority priorityRequestedByApp, UserGroupInformation user,
- String queueName, ApplicationId applicationId) throws YarnException {
- try {
- readLock.lock();
- Priority appPriority = priorityRequestedByApp;
-
- // Verify the scenario where priority is null from submissionContext.
- if (null == appPriority) {
- // Verify whether submitted user has any default priority set. If so,
- // user's default priority will get precedence over queue default.
- // for updateApplicationPriority call flow, this check is done in
- // CientRMService itself.
- appPriority = this.appPriorityACLManager.getDefaultPriority(queueName,
- user);
-
- // Get the default priority for the Queue. If Queue is non-existent,
- // then
- // use default priority. Do it only if user doesn't have any default.
- if (null == appPriority) {
- appPriority = this.queueManager.getDefaultPriorityForQueue(queueName);
- }
-
- LOG.info(
- "Application '" + applicationId + "' is submitted without priority "
- + "hence considering default queue/cluster priority: "
- + appPriority.getPriority());
- }
-
- // Verify whether submitted priority is lesser than max priority
- // in the cluster. If it is out of found, defining a max cap.
- if (appPriority.getPriority() > getMaxClusterLevelAppPriority()
- .getPriority()) {
- appPriority = Priority
- .newInstance(getMaxClusterLevelAppPriority().getPriority());
- }
-
- // Lets check for ACLs here.
- if (!appPriorityACLManager.checkAccess(user, queueName, appPriority)) {
- throw new YarnException(new AccessControlException(
- "User " + user + " does not have permission to submit/update "
- + applicationId + " for " + appPriority));
- }
-
- LOG.info("Priority '" + appPriority.getPriority()
- + "' is acceptable in queue : " + queueName + " for application: "
- + applicationId);
-
- return appPriority;
- } finally {
- readLock.unlock();
- }
- }
-
- @Override
- public Priority updateApplicationPriority(Priority newPriority,
- ApplicationId applicationId, SettableFuture future,
- UserGroupInformation user)
- throws YarnException {
- try {
- writeLock.lock();
- Priority appPriority = null;
- SchedulerApplication application = applications
- .get(applicationId);
-
- if (application == null) {
- throw new YarnException("Application '" + applicationId
- + "' is not present, hence could not change priority.");
- }
-
- RMApp rmApp = rmContext.getRMApps().get(applicationId);
-
- appPriority = checkAndGetApplicationPriority(newPriority, user,
- rmApp.getQueue(), applicationId);
-
- if (application.getPriority().equals(appPriority)) {
- future.set(null);
return appPriority;
- }
-
- // Update new priority in Submission Context to update to StateStore.
- rmApp.getApplicationSubmissionContext().setPriority(appPriority);
-
- // Update to state store
- ApplicationStateData appState = ApplicationStateData.newInstance(
- rmApp.getSubmitTime(), rmApp.getStartTime(),
- rmApp.getApplicationSubmissionContext(), rmApp.getUser(),
- rmApp.getCallerContext());
- appState.setApplicationTimeouts(rmApp.getApplicationTimeouts());
- rmContext.getStateStore().updateApplicationStateSynchronously(appState,
- false, future);
-
- // As we use iterator over a TreeSet for OrderingPolicy, once we change
- // priority then reinsert back to make order correct.
- LeafQueue queue = (LeafQueue) getQueue(rmApp.getQueue());
- queue.updateApplicationPriority(application, appPriority);
-
- LOG.info("Priority '" + appPriority + "' is updated in queue :"
- + rmApp.getQueue() + " for application: " + applicationId
- + " for the user: " + rmApp.getUser());
- return appPriority;
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public PreemptionManager getPreemptionManager() {
- return preemptionManager;
- }
-
- @Override
- public ResourceUsage getClusterResourceUsage() {
- return getRootQueue().getQueueResourceUsage();
- }
-
- private SchedulerContainer getSchedulerContainer(
- RMContainer rmContainer, boolean allocated) {
- if (null == rmContainer) {
- return null;
- }
-
- FiCaSchedulerApp app = getApplicationAttempt(
- rmContainer.getApplicationAttemptId());
- if (null == app) { return null; }
-
- NodeId nodeId;
- // Get nodeId
- if (rmContainer.getState() == RMContainerState.RESERVED) {
- nodeId = rmContainer.getReservedNode();
- } else {
- nodeId = rmContainer.getNodeId();
- }
-
- FiCaSchedulerNode node = getNode(nodeId);
- if (null == node) {
- return null;
- }
- return new SchedulerContainer<>(app, node, rmContainer,
- // TODO, node partition should come from CSAssignment to avoid partition
- // get updated before submitting the commit
- node.getPartition(), allocated);
- }
-
- private List>
- getSchedulerContainersToRelease(
- CSAssignment csAssignment) {
- List> list = null;
-
- if (csAssignment.getContainersToKill() != null && !csAssignment
- .getContainersToKill().isEmpty()) {
- list = new ArrayList<>();
- for (RMContainer rmContainer : csAssignment.getContainersToKill()) {
- list.add(getSchedulerContainer(rmContainer, false));
- }
- }
-
- if (csAssignment.getExcessReservation() != null) {
- if (null == list) {
- list = new ArrayList<>();
- }
- list.add(
- getSchedulerContainer(csAssignment.getExcessReservation(), false));
- }
-
- return list;
- }
-
- @VisibleForTesting
- public void submitResourceCommitRequest(Resource cluster,
- CSAssignment csAssignment) {
- ResourceCommitRequest request =
- createResourceCommitRequest(csAssignment);
-
- if (null == request) {
- return;
- }
-
- if (scheduleAsynchronously) {
- // Submit to a commit thread and commit it async-ly
- resourceCommitterService.addNewCommitRequest(request);
- } else{
- // Otherwise do it sync-ly.
- tryCommit(cluster, request, true);
- }
- }
-
- @Override
- public boolean attemptAllocationOnNode(SchedulerApplicationAttempt appAttempt,
- SchedulingRequest schedulingRequest, SchedulerNode schedulerNode) {
- if (schedulingRequest.getResourceSizing() != null) {
- if (schedulingRequest.getResourceSizing().getNumAllocations() > 1) {
- LOG.warn("The SchedulingRequest has requested more than 1 allocation," +
- " but only 1 will be attempted !!");
- }
- if (!appAttempt.isStopped()) {
- ResourceCommitRequest
- resourceCommitRequest = createResourceCommitRequest(
- appAttempt, schedulingRequest, schedulerNode);
-
- // Validate placement constraint is satisfied before
- // committing the request.
- try {
- if (!PlacementConstraintsUtil.canSatisfyConstraints(
- appAttempt.getApplicationId(),
- schedulingRequest, schedulerNode,
- rmContext.getPlacementConstraintManager(),
- rmContext.getAllocationTagsManager())) {
- LOG.debug("Failed to allocate container for application "
- + appAttempt.getApplicationId() + " on node "
- + schedulerNode.getNodeName()
- + " because this allocation violates the"
- + " placement constraint.");
- return false;
- }
- } catch (InvalidAllocationTagsQueryException e) {
- LOG.warn("Unable to allocate container", e);
- return false;
- }
- return tryCommit(getClusterResource(), resourceCommitRequest, false);
- }
- }
- return false;
- }
-
- // This assumes numContainers = 1 for the request.
- private ResourceCommitRequest
- createResourceCommitRequest(SchedulerApplicationAttempt appAttempt,
- SchedulingRequest schedulingRequest, SchedulerNode schedulerNode) {
- ContainerAllocationProposal allocated =
- null;
- Resource resource = schedulingRequest.getResourceSizing().getResources();
- if (Resources.greaterThan(calculator, getClusterResource(),
- resource, Resources.none())) {
- ContainerId cId =
- ContainerId.newContainerId(appAttempt.getApplicationAttemptId(),
- appAttempt.getAppSchedulingInfo().getNewContainerId());
- Container container = BuilderUtils.newContainer(
- cId, schedulerNode.getNodeID(), schedulerNode.getHttpAddress(),
- resource, schedulingRequest.getPriority(), null,
- ExecutionType.GUARANTEED,
- schedulingRequest.getAllocationRequestId());
- RMContainer rmContainer = new RMContainerImpl(container,
- SchedulerRequestKey.extractFrom(container),
- appAttempt.getApplicationAttemptId(), container.getNodeId(),
- appAttempt.getUser(), rmContext, false);
- ((RMContainerImpl)rmContainer).setAllocationTags(
- new HashSet<>(schedulingRequest.getAllocationTags()));
-
- allocated = new ContainerAllocationProposal<>(
- getSchedulerContainer(rmContainer, true),
- null, null, NodeType.NODE_LOCAL, NodeType.NODE_LOCAL,
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY,
- resource);
- }
-
- if (null != allocated) {
- List>
- allocationsList = new ArrayList<>();
- allocationsList.add(allocated);
-
- return new ResourceCommitRequest<>(allocationsList, null, null);
- }
- return null;
- }
-
- @VisibleForTesting
- public ResourceCommitRequest
- createResourceCommitRequest(CSAssignment csAssignment) {
- ContainerAllocationProposal allocated =
- null;
- ContainerAllocationProposal reserved =
- null;
- List> released =
- null;
-
- if (Resources.greaterThan(calculator, getClusterResource(),
- csAssignment.getResource(), Resources.none())) {
- // Allocated something
- List allocations =
- csAssignment.getAssignmentInformation().getAllocationDetails();
- if (!allocations.isEmpty()) {
- RMContainer rmContainer = allocations.get(0).rmContainer;
- allocated = new ContainerAllocationProposal<>(
- getSchedulerContainer(rmContainer, true),
- getSchedulerContainersToRelease(csAssignment),
- getSchedulerContainer(csAssignment.getFulfilledReservedContainer(),
- false), csAssignment.getType(),
- csAssignment.getRequestLocalityType(),
- csAssignment.getSchedulingMode() != null ?
- csAssignment.getSchedulingMode() :
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY,
- csAssignment.getResource());
- }
-
- // Reserved something
- List reservation =
- csAssignment.getAssignmentInformation().getReservationDetails();
- if (!reservation.isEmpty()) {
- RMContainer rmContainer = reservation.get(0).rmContainer;
- reserved = new ContainerAllocationProposal<>(
- getSchedulerContainer(rmContainer, false),
- getSchedulerContainersToRelease(csAssignment),
- getSchedulerContainer(csAssignment.getFulfilledReservedContainer(),
- false), csAssignment.getType(),
- csAssignment.getRequestLocalityType(),
- csAssignment.getSchedulingMode() != null ?
- csAssignment.getSchedulingMode() :
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY,
- csAssignment.getResource());
- }
- }
-
- // When we don't need to allocate/reserve anything, we can feel free to
- // kill all to-release containers in the request.
- if (null == allocated && null == reserved) {
- released = getSchedulerContainersToRelease(csAssignment);
- }
-
- if (null != allocated || null != reserved || (null != released && !released
- .isEmpty())) {
- List>
- allocationsList = null;
- if (allocated != null) {
- allocationsList = new ArrayList<>();
- allocationsList.add(allocated);
- }
-
- List>
- reservationsList = null;
- if (reserved != null) {
- reservationsList = new ArrayList<>();
- reservationsList.add(reserved);
- }
-
- return new ResourceCommitRequest<>(allocationsList, reservationsList,
- released);
- }
-
- return null;
- }
-
- @Override
- public boolean tryCommit(Resource cluster, ResourceCommitRequest r,
- boolean updatePending) {
- long commitStart = System.nanoTime();
- ResourceCommitRequest request =
- (ResourceCommitRequest) r;
-
- ApplicationAttemptId attemptId = null;
-
- // We need to update unconfirmed allocated resource of application when
- // any container allocated.
- boolean updateUnconfirmedAllocatedResource =
- request.getContainersToAllocate() != null && !request
- .getContainersToAllocate().isEmpty();
-
- // find the application to accept and apply the ResourceCommitRequest
- if (request.anythingAllocatedOrReserved()) {
- ContainerAllocationProposal c =
- request.getFirstAllocatedOrReservedContainer();
- attemptId =
- c.getAllocatedOrReservedContainer().getSchedulerApplicationAttempt()
- .getApplicationAttemptId();
- } else {
- if (!request.getContainersToRelease().isEmpty()) {
- attemptId = request.getContainersToRelease().get(0)
- .getSchedulerApplicationAttempt().getApplicationAttemptId();
- }
- }
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("Try to commit allocation proposal=" + request);
- }
-
- boolean isSuccess = false;
- if (attemptId != null) {
- FiCaSchedulerApp app = getApplicationAttempt(attemptId);
- // Required sanity check for attemptId - when async-scheduling enabled,
- // proposal might be outdated if AM failover just finished
- // and proposal queue was not be consumed in time
- if (app != null && attemptId.equals(app.getApplicationAttemptId())) {
- if (app.accept(cluster, request, updatePending)
- && app.apply(cluster, request, updatePending)) {
- long commitSuccess = System.nanoTime() - commitStart;
- CapacitySchedulerMetrics.getMetrics()
- .addCommitSuccess(commitSuccess);
- LOG.info("Allocation proposal accepted");
- isSuccess = true;
- } else{
- long commitFailed = System.nanoTime() - commitStart;
- CapacitySchedulerMetrics.getMetrics()
- .addCommitFailure(commitFailed);
- LOG.info("Failed to accept allocation proposal");
+
+ }
+
+ @Override
+ public synchronized Priority updateApplicationPriority(Priority newPriority,
+ ApplicationId applicationId, SettableFuture future,
+ UserGroupInformation user)
+ throws YarnException {
+ Priority appPriority = null;
+ SchedulerApplication application = applications
+ .get(applicationId);
+
+ if (application == null) {
+ throw new YarnException("Application '" + applicationId
+ + "' is not present, hence could not change priority.");
+ }
+
+ RMApp rmApp = rmContext.getRMApps().get(applicationId);
+
+ appPriority = checkAndGetApplicationPriority(newPriority, user,
+ rmApp.getQueue(), applicationId);
+
+ if (application.getPriority().equals(appPriority)) {
+ future.set(null);
+ return appPriority;
+ }
+
+ // Update new priority in Submission Context to update to StateStore.
+ rmApp.getApplicationSubmissionContext().setPriority(appPriority);
+
+ // Update to state store
+ ApplicationStateData appState = ApplicationStateData.newInstance(
+ rmApp.getSubmitTime(), rmApp.getStartTime(),
+ rmApp.getApplicationSubmissionContext(), rmApp.getUser(),
+ rmApp.getCallerContext());
+ appState.setApplicationTimeouts(rmApp.getApplicationTimeouts());
+ rmContext.getStateStore().updateApplicationStateSynchronously(appState,
+ false, future);
+
+ // As we use iterator over a TreeSet for OrderingPolicy, once we change
+ // priority then reinsert back to make order correct.
+ LeafQueue queue = (LeafQueue) getQueue(rmApp.getQueue());
+ queue.updateApplicationPriority(application, appPriority);
+
+ LOG.info("Priority '" + appPriority + "' is updated in queue :"
+ + rmApp.getQueue() + " for application: " + applicationId
+ + " for the user: " + rmApp.getUser());
+ return appPriority;
+ }
+
+ @Override
+ public PreemptionManager getPreemptionManager() {
+ return preemptionManager;
+ }
+
+ @Override
+ public ResourceUsage getClusterResourceUsage() {
+ return getRootQueue().getQueueResourceUsage();
+ }
+
+ private SchedulerContainer getSchedulerContainer(
+ RMContainer rmContainer, boolean allocated) {
+ if (null == rmContainer) {
+ return null;
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("Allocation proposal accepted=" + isSuccess + ", proposal="
- + request);
- }
-
- // Update unconfirmed allocated resource.
- if (updateUnconfirmedAllocatedResource) {
- app.decUnconfirmedRes(request.getTotalAllocatedResource());
- }
- }
- }
- return isSuccess;
- }
-
- public int getAsyncSchedulingPendingBacklogs() {
- if (scheduleAsynchronously) {
- return resourceCommitterService.getPendingBacklogs();
- }
- return 0;
- }
-
- @Override
- public CapacitySchedulerQueueManager getCapacitySchedulerQueueManager() {
- return this.queueManager;
- }
-
- /**
- * Try to move a reserved container to a targetNode.
- * If the targetNode is reserved by another application (other than this one).
- * The previous reservation will be cancelled.
- *
- * @param toBeMovedContainer reserved container will be moved
- * @param targetNode targetNode
- * @return true if move succeeded. Return false if the targetNode is reserved by
- * a different container or move failed because of any other reasons.
- */
- public boolean moveReservedContainer(RMContainer toBeMovedContainer,
- FiCaSchedulerNode targetNode) {
- try {
- writeLock.lock();
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("Trying to move container=" + toBeMovedContainer + " to node="
- + targetNode.getNodeID());
- }
-
- FiCaSchedulerNode sourceNode = getNode(toBeMovedContainer.getNodeId());
- if (null == sourceNode) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to move reservation, cannot find source node="
- + toBeMovedContainer.getNodeId());
+ FiCaSchedulerApp app = getApplicationAttempt(
+ rmContainer.getApplicationAttemptId());
+ if (null == app) {
+ return null;
}
- return false;
- }
- // Target node updated?
- if (getNode(targetNode.getNodeID()) != targetNode) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "Failed to move reservation, node updated or removed, moving "
- + "cancelled.");
+ NodeId nodeId;
+ // Get nodeId
+ if (rmContainer.getState() == RMContainerState.RESERVED) {
+ nodeId = rmContainer.getReservedNode();
+ } else {
+ nodeId = rmContainer.getNodeId();
}
- return false;
- }
- // Target node's reservation status changed?
- if (targetNode.getReservedContainer() != null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "Target node's reservation status changed, moving cancelled.");
+ FiCaSchedulerNode node = getNode(nodeId);
+ if (null == node) {
+ return null;
+ }
+ return new SchedulerContainer<>(app, node, rmContainer,
+ // TODO, node partition should come from CSAssignment to avoid partition
+ // get updated before submitting the commit
+ node.getPartition(), allocated);
+ }
+
+ private List>
+ getSchedulerContainersToRelease(
+ CSAssignment csAssignment) {
+ List> list = null;
+
+ if (csAssignment.getContainersToKill() != null && !csAssignment
+ .getContainersToKill().isEmpty()) {
+ list = new ArrayList<>();
+ for (RMContainer rmContainer : csAssignment.getContainersToKill()) {
+ list.add(getSchedulerContainer(rmContainer, false));
+ }
+ }
+
+ if (csAssignment.getExcessReservation() != null) {
+ if (null == list) {
+ list = new ArrayList<>();
+ }
+ list.add(
+ getSchedulerContainer(csAssignment.getExcessReservation(), false));
+ }
+
+ return list;
+ }
+
+ @VisibleForTesting
+ public void submitResourceCommitRequest(Resource cluster,
+ CSAssignment csAssignment) {
+ ResourceCommitRequest request =
+ createResourceCommitRequest(csAssignment);
+
+ if (null == request) {
+ return;
+ }
+
+ if (scheduleAsynchronously) {
+ // Submit to a commit thread and commit it async-ly
+ resourceCommitterService.addNewCommitRequest(request);
+ } else {
+ // Otherwise do it sync-ly.
+ tryCommit(cluster, request, true);
+ }
+ }
+
+ @Override
+ public boolean attemptAllocationOnNode(SchedulerApplicationAttempt appAttempt,
+ SchedulingRequest schedulingRequest, SchedulerNode schedulerNode) {
+ if (schedulingRequest.getResourceSizing() != null) {
+ if (schedulingRequest.getResourceSizing().getNumAllocations() > 1) {
+ LOG.warn("The SchedulingRequest has requested more than 1 allocation," +
+ " but only 1 will be attempted !!");
+ }
+ if (!appAttempt.isStopped()) {
+ ResourceCommitRequest
+ resourceCommitRequest = createResourceCommitRequest(
+ appAttempt, schedulingRequest, schedulerNode);
+
+ // Validate placement constraint is satisfied before
+ // committing the request.
+ try {
+ if (!PlacementConstraintsUtil.canSatisfyConstraints(
+ appAttempt.getApplicationId(),
+ schedulingRequest, schedulerNode,
+ rmContext.getPlacementConstraintManager(),
+ rmContext.getAllocationTagsManager())) {
+ LOG.debug("Failed to allocate container for application "
+ + appAttempt.getApplicationId() + " on node "
+ + schedulerNode.getNodeName()
+ + " because this allocation violates the"
+ + " placement constraint.");
+ return false;
+ }
+ } catch (InvalidAllocationTagsQueryException e) {
+ LOG.warn("Unable to allocate container", e);
+ return false;
+ }
+ return tryCommit(getClusterResource(), resourceCommitRequest, false);
+ }
}
return false;
- }
+ }
+
+ // This assumes numContainers = 1 for the request.
+ private ResourceCommitRequest
+ createResourceCommitRequest(SchedulerApplicationAttempt appAttempt,
+ SchedulingRequest schedulingRequest, SchedulerNode schedulerNode) {
+ ContainerAllocationProposal allocated =
+ null;
+ Resource resource = schedulingRequest.getResourceSizing().getResources();
+ if (Resources.greaterThan(calculator, getClusterResource(),
+ resource, Resources.none())) {
+ ContainerId cId =
+ ContainerId.newContainerId(appAttempt.getApplicationAttemptId(),
+ appAttempt.getAppSchedulingInfo().getNewContainerId());
+ Container container = BuilderUtils.newContainer(
+ cId, schedulerNode.getNodeID(), schedulerNode.getHttpAddress(),
+ resource, schedulingRequest.getPriority(), null,
+ ExecutionType.GUARANTEED,
+ schedulingRequest.getAllocationRequestId());
+ RMContainer rmContainer = new RMContainerImpl(container,
+ SchedulerRequestKey.extractFrom(container),
+ appAttempt.getApplicationAttemptId(), container.getNodeId(),
+ appAttempt.getUser(), rmContext, false);
+ ((RMContainerImpl) rmContainer).setAllocationTags(
+ new HashSet<>(schedulingRequest.getAllocationTags()));
+
+ allocated = new ContainerAllocationProposal<>(
+ getSchedulerContainer(rmContainer, true),
+ null, null, NodeType.NODE_LOCAL, NodeType.NODE_LOCAL,
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY,
+ resource);
+ }
+
+ if (null != allocated) {
+ List>
+ allocationsList = new ArrayList<>();
+ allocationsList.add(allocated);
+
+ return new ResourceCommitRequest<>(allocationsList, null, null);
+ }
+ return null;
+ }
+
+ @VisibleForTesting
+ public ResourceCommitRequest
+ createResourceCommitRequest(CSAssignment csAssignment) {
+ ContainerAllocationProposal allocated =
+ null;
+ ContainerAllocationProposal reserved =
+ null;
+ List> released =
+ null;
+
+ if (Resources.greaterThan(calculator, getClusterResource(),
+ csAssignment.getResource(), Resources.none())) {
+ // Allocated something
+ List allocations =
+ csAssignment.getAssignmentInformation().getAllocationDetails();
+ if (!allocations.isEmpty()) {
+ RMContainer rmContainer = allocations.get(0).rmContainer;
+ allocated = new ContainerAllocationProposal<>(
+ getSchedulerContainer(rmContainer, true),
+ getSchedulerContainersToRelease(csAssignment),
+ getSchedulerContainer(csAssignment.getFulfilledReservedContainer(),
+ false), csAssignment.getType(),
+ csAssignment.getRequestLocalityType(),
+ csAssignment.getSchedulingMode() != null ?
+ csAssignment.getSchedulingMode() :
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY,
+ csAssignment.getResource());
+ }
+
+ // Reserved something
+ List reservation =
+ csAssignment.getAssignmentInformation().getReservationDetails();
+ if (!reservation.isEmpty()) {
+ RMContainer rmContainer = reservation.get(0).rmContainer;
+ reserved = new ContainerAllocationProposal<>(
+ getSchedulerContainer(rmContainer, false),
+ getSchedulerContainersToRelease(csAssignment),
+ getSchedulerContainer(csAssignment.getFulfilledReservedContainer(),
+ false), csAssignment.getType(),
+ csAssignment.getRequestLocalityType(),
+ csAssignment.getSchedulingMode() != null ?
+ csAssignment.getSchedulingMode() :
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY,
+ csAssignment.getResource());
+ }
+ }
+
+ // When we don't need to allocate/reserve anything, we can feel free to
+ // kill all to-release containers in the request.
+ if (null == allocated && null == reserved) {
+ released = getSchedulerContainersToRelease(csAssignment);
+ }
+
+ if (null != allocated || null != reserved || (null != released && !released
+ .isEmpty())) {
+ List>
+ allocationsList = null;
+ if (allocated != null) {
+ allocationsList = new ArrayList<>();
+ allocationsList.add(allocated);
+ }
+
+ List>
+ reservationsList = null;
+ if (reserved != null) {
+ reservationsList = new ArrayList<>();
+ reservationsList.add(reserved);
+ }
+
+ return new ResourceCommitRequest<>(allocationsList, reservationsList,
+ released);
+ }
+
+ return null;
+ }
+
+ @Override
+ public boolean tryCommit(Resource cluster, ResourceCommitRequest r,
+ boolean updatePending) {
+ long commitStart = System.nanoTime();
+ ResourceCommitRequest request =
+ (ResourceCommitRequest) r;
+
+ ApplicationAttemptId attemptId = null;
+
+ // We need to update unconfirmed allocated resource of application when
+ // any container allocated.
+ boolean updateUnconfirmedAllocatedResource =
+ request.getContainersToAllocate() != null && !request
+ .getContainersToAllocate().isEmpty();
+
+ // find the application to accept and apply the ResourceCommitRequest
+ if (request.anythingAllocatedOrReserved()) {
+ ContainerAllocationProposal c =
+ request.getFirstAllocatedOrReservedContainer();
+ attemptId =
+ c.getAllocatedOrReservedContainer().getSchedulerApplicationAttempt()
+ .getApplicationAttemptId();
+ } else {
+ if (!request.getContainersToRelease().isEmpty()) {
+ attemptId = request.getContainersToRelease().get(0)
+ .getSchedulerApplicationAttempt().getApplicationAttemptId();
+ }
+ }
- FiCaSchedulerApp app = getApplicationAttempt(
- toBeMovedContainer.getApplicationAttemptId());
- if (null == app) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Cannot find to-be-moved container's application="
- + toBeMovedContainer.getApplicationAttemptId());
+ LOG.debug("Try to commit allocation proposal=" + request);
}
- return false;
- }
-
- // finally, move the reserved container
- return app.moveReservation(toBeMovedContainer, sourceNode, targetNode);
- } finally {
- writeLock.unlock();
- }
- }
-
- @Override
- public long checkAndGetApplicationLifetime(String queueName,
- long lifetimeRequestedByApp) {
- try {
- readLock.lock();
- CSQueue queue = getQueue(queueName);
- if (queue == null || !(queue instanceof LeafQueue)) {
- return lifetimeRequestedByApp;
- }
- long defaultApplicationLifetime =
- ((LeafQueue) queue).getDefaultApplicationLifetime();
- long maximumApplicationLifetime =
- ((LeafQueue) queue).getMaximumApplicationLifetime();
+ boolean isSuccess = false;
+ if (attemptId != null) {
+ FiCaSchedulerApp app = getApplicationAttempt(attemptId);
+ // Required sanity check for attemptId - when async-scheduling enabled,
+ // proposal might be outdated if AM failover just finished
+ // and proposal queue was not be consumed in time
+ if (app != null && attemptId.equals(app.getApplicationAttemptId())) {
+ if (app.accept(cluster, request, updatePending)
+ && app.apply(cluster, request, updatePending)) {
+ long commitSuccess = System.nanoTime() - commitStart;
+ CapacitySchedulerMetrics.getMetrics()
+ .addCommitSuccess(commitSuccess);
+ LOG.info("Allocation proposal accepted");
+ isSuccess = true;
+ } else {
+ long commitFailed = System.nanoTime() - commitStart;
+ CapacitySchedulerMetrics.getMetrics()
+ .addCommitFailure(commitFailed);
+ LOG.info("Failed to accept allocation proposal");
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Allocation proposal accepted=" + isSuccess + ", proposal="
+ + request);
+ }
+
+ // Update unconfirmed allocated resource.
+ if (updateUnconfirmedAllocatedResource) {
+ app.decUnconfirmedRes(request.getTotalAllocatedResource());
+ }
+ }
+ }
+ return isSuccess;
+ }
+
+ public int getAsyncSchedulingPendingBacklogs() {
+ if (scheduleAsynchronously) {
+ return resourceCommitterService.getPendingBacklogs();
+ }
+ return 0;
+ }
+
+ @Override
+ public CapacitySchedulerQueueManager getCapacitySchedulerQueueManager() {
+ return this.queueManager;
+ }
+
+ /**
+ * Try to move a reserved container to a targetNode.
+ * If the targetNode is reserved by another application (other than this one).
+ * The previous reservation will be cancelled.
+ *
+ * @param toBeMovedContainer reserved container will be moved
+ * @param targetNode targetNode
+ * @return true if move succeeded. Return false if the targetNode is reserved by
+ * a different container or move failed because of any other reasons.
+ */
+ public synchronized boolean moveReservedContainer(RMContainer toBeMovedContainer,
+ FiCaSchedulerNode targetNode) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Trying to move container=" + toBeMovedContainer + " to node="
+ + targetNode.getNodeID());
+ }
+
+ FiCaSchedulerNode sourceNode = getNode(toBeMovedContainer.getNodeId());
+ if (null == sourceNode) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to move reservation, cannot find source node="
+ + toBeMovedContainer.getNodeId());
+ }
+ return false;
+ }
+
+ // Target node updated?
+ if (getNode(targetNode.getNodeID()) != targetNode) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Failed to move reservation, node updated or removed, moving "
+ + "cancelled.");
+ }
+ return false;
+ }
+
+ // Target node's reservation status changed?
+ if (targetNode.getReservedContainer() != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Target node's reservation status changed, moving cancelled.");
+ }
+ return false;
+ }
+
+ FiCaSchedulerApp app = getApplicationAttempt(
+ toBeMovedContainer.getApplicationAttemptId());
+ if (null == app) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Cannot find to-be-moved container's application="
+ + toBeMovedContainer.getApplicationAttemptId());
+ }
+ return false;
+ }
+
+ // finally, move the reserved container
+ return app.moveReservation(toBeMovedContainer, sourceNode, targetNode);
+ }
+
+ @Override
+ public long checkAndGetApplicationLifetime(String queueName,
+ long lifetimeRequestedByApp) {
+ CSQueue queue = getQueue(queueName);
+ if (queue == null || !(queue instanceof LeafQueue)) {
+ return lifetimeRequestedByApp;
+ }
+
+ long defaultApplicationLifetime =
+ ((LeafQueue) queue).getDefaultApplicationLifetime();
+ long maximumApplicationLifetime =
+ ((LeafQueue) queue).getMaximumApplicationLifetime();
+
+ // check only for maximum, that's enough because default can't
+ // exceed maximum
+ if (maximumApplicationLifetime <= 0) {
+ return lifetimeRequestedByApp;
+ }
- // check only for maximum, that's enough because default can't
- // exceed maximum
- if (maximumApplicationLifetime <= 0) {
+ if (lifetimeRequestedByApp <= 0) {
+ return defaultApplicationLifetime;
+ } else if (lifetimeRequestedByApp > maximumApplicationLifetime) {
+ return maximumApplicationLifetime;
+ }
return lifetimeRequestedByApp;
- }
-
- if (lifetimeRequestedByApp <= 0) {
- return defaultApplicationLifetime;
- } else if (lifetimeRequestedByApp > maximumApplicationLifetime) {
- return maximumApplicationLifetime;
- }
- return lifetimeRequestedByApp;
- } finally {
- readLock.unlock();
- }
- }
-
- @Override
- public long getMaximumApplicationLifetime(String queueName) {
- CSQueue queue = getQueue(queueName);
- if (queue == null || !(queue instanceof LeafQueue)) {
- LOG.error("Unknown queue: " + queueName);
- return -1;
- }
- // In seconds
- return ((LeafQueue) queue).getMaximumApplicationLifetime();
- }
-
- @Override
- public boolean isConfigurationMutable() {
- return csConfProvider instanceof MutableConfigurationProvider;
- }
-
- @Override
- public MutableConfigurationProvider getMutableConfProvider() {
- if (isConfigurationMutable()) {
- return (MutableConfigurationProvider) csConfProvider;
- }
- return null;
- }
-
- private LeafQueue autoCreateLeafQueue(
- ApplicationPlacementContext placementContext)
- throws IOException, YarnException {
-
- AutoCreatedLeafQueue autoCreatedLeafQueue = null;
-
- String leafQueueName = placementContext.getQueue();
- String parentQueueName = placementContext.getParentQueue();
-
- if (!StringUtils.isEmpty(parentQueueName)) {
- CSQueue parentQueue = getQueue(parentQueueName);
-
- if (parentQueue != null && conf.isAutoCreateChildQueueEnabled(
- parentQueue.getQueuePath())) {
-
- ManagedParentQueue autoCreateEnabledParentQueue =
- (ManagedParentQueue) parentQueue;
- autoCreatedLeafQueue = new AutoCreatedLeafQueue(this, leafQueueName,
- autoCreateEnabledParentQueue);
-
- addQueue(autoCreatedLeafQueue);
-
- } else{
- throw new SchedulerDynamicEditException(
- "Could not auto-create leaf queue for " + leafQueueName
- + ". Queue mapping specifies an invalid parent queue "
- + "which does not exist "
- + parentQueueName);
- }
- } else{
- throw new SchedulerDynamicEditException(
- "Could not auto-create leaf queue for " + leafQueueName
- + ". Queue mapping does not specify"
- + " which parent queue it needs to be created under.");
- }
- return autoCreatedLeafQueue;
- }
-
- @Override
- public void resetSchedulerMetrics() {
- CapacitySchedulerMetrics.destroy();
- }
+ }
+
+ @Override
+ public long getMaximumApplicationLifetime(String queueName) {
+ CSQueue queue = getQueue(queueName);
+ if (queue == null || !(queue instanceof LeafQueue)) {
+ LOG.error("Unknown queue: " + queueName);
+ return -1;
+ }
+ // In seconds
+ return ((LeafQueue) queue).getMaximumApplicationLifetime();
+ }
+
+ @Override
+ public boolean isConfigurationMutable() {
+ return csConfProvider instanceof MutableConfigurationProvider;
+ }
+
+ @Override
+ public MutableConfigurationProvider getMutableConfProvider() {
+ if (isConfigurationMutable()) {
+ return (MutableConfigurationProvider) csConfProvider;
+ }
+ return null;
+ }
+
+ private LeafQueue autoCreateLeafQueue(
+ ApplicationPlacementContext placementContext)
+ throws IOException, YarnException {
+
+ AutoCreatedLeafQueue autoCreatedLeafQueue = null;
+
+ String leafQueueName = placementContext.getQueue();
+ String parentQueueName = placementContext.getParentQueue();
+
+ if (!StringUtils.isEmpty(parentQueueName)) {
+ CSQueue parentQueue = getQueue(parentQueueName);
+
+ if (parentQueue != null && conf.isAutoCreateChildQueueEnabled(
+ parentQueue.getQueuePath())) {
+
+ ManagedParentQueue autoCreateEnabledParentQueue =
+ (ManagedParentQueue) parentQueue;
+ autoCreatedLeafQueue = new AutoCreatedLeafQueue(this, leafQueueName,
+ autoCreateEnabledParentQueue);
+
+ addQueue(autoCreatedLeafQueue);
+
+ } else {
+ throw new SchedulerDynamicEditException(
+ "Could not auto-create leaf queue for " + leafQueueName
+ + ". Queue mapping specifies an invalid parent queue "
+ + "which does not exist "
+ + parentQueueName);
+ }
+ } else {
+ throw new SchedulerDynamicEditException(
+ "Could not auto-create leaf queue for " + leafQueueName
+ + ". Queue mapping does not specify"
+ + " which parent queue it needs to be created under.");
+ }
+ return autoCreatedLeafQueue;
+ }
+
+ @Override
+ public void resetSchedulerMetrics() {
+ CapacitySchedulerMetrics.destroy();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index 5c08251..6c96e20 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -168,11 +168,9 @@ protected void setupQueueConfigs(Resource clusterResource)
setupQueueConfigs(clusterResource, csContext.getConfiguration());
}
- protected void setupQueueConfigs(Resource clusterResource,
+ protected synchronized void setupQueueConfigs(Resource clusterResource,
CapacitySchedulerConfiguration conf) throws
IOException {
- try {
- writeLock.lock();
CapacitySchedulerConfiguration schedConf = csContext.getConfiguration();
super.setupQueueConfigs(clusterResource, conf);
@@ -335,9 +333,6 @@ protected void setupQueueConfigs(Resource clusterResource,
+ "\nmaxLifetime = " + maxApplicationLifetime + " seconds"
+ "\ndefaultLifetime = "
+ defaultApplicationLifetime + " seconds");
- } finally {
- writeLock.unlock();
- }
}
/**
@@ -405,61 +400,42 @@ void setUserLimitFactor(float userLimitFactor) {
@Override
public int getNumApplications() {
try {
- readLock.lock();
return getNumPendingApplications() + getNumActiveApplications();
} finally {
- readLock.unlock();
}
}
public int getNumPendingApplications() {
try {
- readLock.lock();
return pendingOrderingPolicy.getNumSchedulableEntities();
} finally {
- readLock.unlock();
}
}
- public int getNumActiveApplications() {
- try {
- readLock.lock();
+ public synchronized int getNumActiveApplications() {
return orderingPolicy.getNumSchedulableEntities();
- } finally {
- readLock.unlock();
- }
}
@Private
- public int getNumPendingApplications(String user) {
- try {
- readLock.lock();
+ public synchronized int getNumPendingApplications(String user) {
User u = getUser(user);
if (null == u) {
return 0;
}
return u.getPendingApplications();
- } finally {
- readLock.unlock();
- }
}
@Private
- public int getNumActiveApplications(String user) {
- try {
- readLock.lock();
+ public synchronized int getNumActiveApplications(String user) {
User u = getUser(user);
if (null == u) {
return 0;
}
return u.getActiveApplications();
- } finally {
- readLock.unlock();
- }
}
@Private
- public int getUserLimit() {
+ public synchronized int getUserLimit() {
return usersManager.getUserLimit();
}
@@ -476,10 +452,8 @@ public QueueInfo getQueueInfo(
}
@Override
- public List
+ public synchronized List
getQueueUserAclInfo(UserGroupInformation user) {
- try {
- readLock.lock();
QueueUserACLInfo userAclInfo = recordFactory.newRecordInstance(
QueueUserACLInfo.class);
List operations = new ArrayList<>();
@@ -492,15 +466,9 @@ public QueueInfo getQueueInfo(
userAclInfo.setQueueName(getQueueName());
userAclInfo.setUserAcls(operations);
return Collections.singletonList(userAclInfo);
- } finally {
- readLock.unlock();
- }
-
}
public String toString() {
- try {
- readLock.lock();
return queueName + ": " + "capacity=" + queueCapacities.getCapacity()
+ ", " + "absoluteCapacity=" + queueCapacities.getAbsoluteCapacity()
+ ", " + "usedResources=" + queueUsage.getUsed() + ", "
@@ -511,10 +479,6 @@ public String toString() {
getEffectiveCapacity(CommonNodeLabelsManager.NO_LABEL) +
" , effectiveMaxResource=" +
getEffectiveMaxCapacity(CommonNodeLabelsManager.NO_LABEL);
- } finally {
- readLock.unlock();
- }
-
}
@VisibleForTesting
@@ -523,22 +487,14 @@ public User getUser(String userName) {
}
@Private
- public List getPriorityACLs() {
- try {
- readLock.lock();
+ public synchronized List getPriorityACLs() {
return new ArrayList<>(priorityAcls);
- } finally {
- readLock.unlock();
- }
}
- protected void reinitialize(
+ protected synchronized void reinitialize(
CSQueue newlyParsedQueue, Resource clusterResource,
CapacitySchedulerConfiguration configuration) throws
IOException {
-
- try {
- writeLock.lock();
// Sanity check
if (!(newlyParsedQueue instanceof LeafQueue) || !newlyParsedQueue
.getQueuePath().equals(getQueuePath())) {
@@ -566,10 +522,6 @@ protected void reinitialize(
// queue metrics are updated, more resource may be available
// activate the pending applications if possible
activateApplications();
-
- } finally {
- writeLock.unlock();
- }
}
@Override
@@ -584,17 +536,13 @@ public void reinitialize(
public void submitApplicationAttempt(FiCaSchedulerApp application,
String userName) {
// Careful! Locking order is important!
- try {
- writeLock.lock();
-
+ synchronized (this){
// TODO, should use getUser, use this method just to avoid UT failure
// which is caused by wrong invoking order, will fix UT separately
User user = usersManager.getUserAndAddIfAbsent(userName);
// Add the attempt to our data-structures
addApplicationAttempt(application, user);
- } finally {
- writeLock.unlock();
}
// We don't want to update metrics for move app
@@ -624,8 +572,7 @@ public void submitApplication(ApplicationId applicationId, String userName,
public void validateSubmitApplication(ApplicationId applicationId,
String userName, String queue) throws AccessControlException {
- try {
- writeLock.lock();
+ synchronized(this) {
// Check if the queue is accepting jobs
if (getState() != QueueState.RUNNING) {
String msg = "Queue " + getQueuePath()
@@ -654,10 +601,7 @@ public void validateSubmitApplication(ApplicationId applicationId,
LOG.info(msg);
throw new AccessControlException(msg);
}
- } finally {
- writeLock.unlock();
}
-
try {
getParent().validateSubmitApplication(applicationId, userName, queue);
} catch (AccessControlException ace) {
@@ -687,14 +631,12 @@ public Resource getUserAMResourceLimit() {
null);
}
- public Resource getUserAMResourceLimitPerPartition(
+ public synchronized Resource getUserAMResourceLimitPerPartition(
String nodePartition, String userName) {
float userWeight = 1.0f;
if (userName != null && getUser(userName) != null) {
userWeight = getUser(userName).getWeight();
}
- try {
- readLock.lock();
/*
* The user am resource limit is based on the same approach as the user
* limit (as it should represent a subset of that). This means that it uses
@@ -735,16 +677,10 @@ public Resource getUserAMResourceLimitPerPartition(
+ userAMLimit + ". User weight: " + userWeight);
}
return userAMLimit;
- } finally {
- readLock.unlock();
- }
-
}
- public Resource calculateAndGetAMResourceLimitPerPartition(
+ public synchronized Resource calculateAndGetAMResourceLimitPerPartition(
String nodePartition) {
- try {
- writeLock.lock();
/*
* For non-labeled partition, get the max value from resources currently
* available to the queue and the absolute resources guaranteed for the
@@ -790,14 +726,9 @@ public Resource calculateAndGetAMResourceLimitPerPartition(
+ " amResourceLimit : " + amResouceLimit);
}
return amResouceLimit;
- } finally {
- writeLock.unlock();
- }
}
- protected void activateApplications() {
- try {
- writeLock.lock();
+ protected synchronized void activateApplications() {
// limit of allowed resource usage for application masters
Map userAmPartitionLimit =
new HashMap();
@@ -911,15 +842,10 @@ protected void activateApplications() {
LOG.info("Application " + applicationId + " from user: " + application
.getUser() + " activated in queue: " + getQueueName());
}
- } finally {
- writeLock.unlock();
- }
}
- private void addApplicationAttempt(FiCaSchedulerApp application,
+ private synchronized void addApplicationAttempt(FiCaSchedulerApp application,
User user) {
- try {
- writeLock.lock();
// Accept
user.submitApplication();
getPendingAppsOrderingPolicy().addSchedulableEntity(application);
@@ -946,9 +872,6 @@ private void addApplicationAttempt(FiCaSchedulerApp application,
.getActiveApplications() + " #queue-pending-applications: "
+ getNumPendingApplications() + " #queue-active-applications: "
+ getNumActiveApplications());
- } finally {
- writeLock.unlock();
- }
}
@Override
@@ -965,15 +888,14 @@ public void finishApplication(ApplicationId application, String user) {
@Override
public void finishApplicationAttempt(FiCaSchedulerApp application, String queue) {
// Careful! Locking order is important!
- removeApplicationAttempt(application, application.getUser());
+ synchronized (this) {
+ removeApplicationAttempt(application, application.getUser());
+ }
getParent().finishApplicationAttempt(application, queue);
}
- private void removeApplicationAttempt(
+ private synchronized void removeApplicationAttempt(
FiCaSchedulerApp application, String userName) {
- try {
- writeLock.lock();
-
// TODO, should use getUser, use this method just to avoid UT failure
// which is caused by wrong invoking order, will fix UT separately
User user = usersManager.getUserAndAddIfAbsent(userName);
@@ -1007,12 +929,9 @@ private void removeApplicationAttempt(
+ " #user-active-applications: " + user.getActiveApplications()
+ " #queue-pending-applications: " + getNumPendingApplications()
+ " #queue-active-applications: " + getNumActiveApplications());
- } finally {
- writeLock.unlock();
- }
}
- private FiCaSchedulerApp getApplication(
+ private synchronized FiCaSchedulerApp getApplication(
ApplicationAttemptId applicationAttemptId) {
return applicationAttemptMap.get(applicationAttemptId);
}
@@ -1227,8 +1146,7 @@ public boolean accept(Resource cluster,
// Do not check limits when allocation from a reserved container
if (allocation.getAllocateFromReservedContainer() == null) {
- try {
- readLock.lock();
+ synchronized (this){
FiCaSchedulerApp app =
schedulerContainer.getSchedulerApplicationAttempt();
String username = app.getUser();
@@ -1258,8 +1176,6 @@ public boolean accept(Resource cluster,
}
return false;
}
- } finally {
- readLock.unlock();
}
}
@@ -1328,9 +1244,7 @@ public void apply(Resource cluster,
releaseContainers(cluster, request);
- try {
- writeLock.lock();
-
+ synchronized (this){
if (request.anythingAllocatedOrReserved()) {
ContainerAllocationProposal
allocation = request.getFirstAllocatedOrReservedContainer();
@@ -1361,10 +1275,7 @@ public void apply(Resource cluster,
request.getTotalReservedResource());
}
}
- } finally {
- writeLock.unlock();
}
-
if (parent != null && applyToParentQueue) {
parent.apply(cluster, request);
}
@@ -1545,11 +1456,9 @@ public Resource getResourceLimitForAllUsers(String userName,
}
@Private
- protected boolean canAssignToUser(Resource clusterResource,
+ protected synchronized boolean canAssignToUser(Resource clusterResource,
String userName, Resource limit, FiCaSchedulerApp application,
String nodePartition, ResourceLimits currentResourceLimits) {
- try {
- readLock.lock();
User user = getUser(userName);
if (user == null) {
if (LOG.isDebugEnabled()) {
@@ -1595,9 +1504,6 @@ protected boolean canAssignToUser(Resource clusterResource,
return false;
}
return true;
- } finally {
- readLock.unlock();
- }
}
private void updateSchedulerHealthForCompletedContainer(
@@ -1628,12 +1534,9 @@ private void updateSchedulerHealthForCompletedContainer(
* @param nodePartition
* Partition
*/
- public void recalculateQueueUsageRatio(Resource clusterResource,
+ public synchronized void recalculateQueueUsageRatio(Resource clusterResource,
String nodePartition) {
- try {
- writeLock.lock();
ResourceUsage queueResourceUsage = getQueueResourceUsage();
-
if (nodePartition == null) {
for (String partition : Sets.union(
getQueueCapacities().getNodePartitionsSet(),
@@ -1643,9 +1546,6 @@ public void recalculateQueueUsageRatio(Resource clusterResource,
} else {
usersManager.updateUsageRatio(nodePartition, clusterResource);
}
- } finally {
- writeLock.unlock();
- }
}
@Override
@@ -1659,8 +1559,7 @@ public void completedContainer(Resource clusterResource,
if (application != null) {
boolean removed = false;
// Careful! Locking order is important!
- writeLock.lock();
- try {
+ synchronized (this){
Container container = rmContainer.getContainer();
// Inform the application & the node
// Note: It's safe to assume that all state changes to RMContainer
@@ -1685,11 +1584,8 @@ public void completedContainer(Resource clusterResource,
releaseResource(clusterResource, application, container.getResource(),
node.getPartition(), rmContainer);
}
- } finally {
- writeLock.unlock();
}
-
if (removed) {
// Inform the parent queue _outside_ of the leaf-queue lock
getParent().completedContainer(clusterResource, application, node,
@@ -1702,11 +1598,9 @@ public void completedContainer(Resource clusterResource,
new KillableContainer(rmContainer, node.getPartition(), queueName));
}
- void allocateResource(Resource clusterResource,
+ synchronized void allocateResource(Resource clusterResource,
SchedulerApplicationAttempt application, Resource resource,
String nodePartition, RMContainer rmContainer) {
- try {
- writeLock.lock();
super.allocateResource(clusterResource, resource, nodePartition);
// handle ignore exclusivity container
@@ -1742,16 +1636,11 @@ void allocateResource(Resource clusterResource,
+ numContainers + " headroom = " + application.getHeadroom()
+ " user-resources=" + user.getUsed());
}
- } finally {
- writeLock.unlock();
- }
}
- void releaseResource(Resource clusterResource,
+ synchronized void releaseResource(Resource clusterResource,
FiCaSchedulerApp application, Resource resource, String nodePartition,
RMContainer rmContainer) {
- try {
- writeLock.lock();
super.releaseResource(clusterResource, resource, nodePartition);
// handle ignore exclusivity container
@@ -1782,9 +1671,6 @@ void releaseResource(Resource clusterResource,
+ numContainers + " user=" + userName + " user-resources="
+ user.getUsed());
}
- } finally {
- writeLock.unlock();
- }
}
private void updateCurrentResourceLimits(
@@ -1804,10 +1690,8 @@ private void updateCurrentResourceLimits(
}
@Override
- public void updateClusterResource(Resource clusterResource,
+ public synchronized void updateClusterResource(Resource clusterResource,
ResourceLimits currentResourceLimits) {
- try {
- writeLock.lock();
updateCurrentResourceLimits(currentResourceLimits, clusterResource);
lastClusterResource = clusterResource;
@@ -1838,9 +1722,6 @@ public void updateClusterResource(Resource clusterResource,
RMNodeLabelsManager.NO_LABEL,
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY, null);
}
- } finally {
- writeLock.unlock();
- }
}
@Override
@@ -1885,15 +1766,12 @@ public void recoverContainer(Resource clusterResource,
return;
}
// Careful! Locking order is important!
- try {
- writeLock.lock();
+ synchronized (this) {
FiCaSchedulerNode node = scheduler.getNode(
rmContainer.getContainer().getNodeId());
allocateResource(clusterResource, attempt,
rmContainer.getContainer().getResource(), node.getPartition(),
rmContainer);
- } finally {
- writeLock.unlock();
}
getParent().recoverContainer(clusterResource, attempt, rmContainer);
@@ -1946,11 +1824,9 @@ public void recoverContainer(Resource clusterResource,
* excessive preemption.
* @return Total pending resource considering user limit
*/
- public Resource getTotalPendingResourcesConsideringUserLimit(
+ public synchronized Resource getTotalPendingResourcesConsideringUserLimit(
Resource clusterResources, String partition,
boolean deductReservedFromPending) {
- try {
- readLock.lock();
Map userNameToHeadroom =
new HashMap<>();
Resource totalPendingConsideringUserLimit = Resource.newInstance(0, 0);
@@ -1984,17 +1860,11 @@ public Resource getTotalPendingResourcesConsideringUserLimit(
minpendingConsideringUserLimit);
}
return totalPendingConsideringUserLimit;
- } finally {
- readLock.unlock();
- }
-
}
@Override
- public void collectSchedulerApplications(
+ public synchronized void collectSchedulerApplications(
Collection apps) {
- try {
- readLock.lock();
for (FiCaSchedulerApp pendingApp : pendingOrderingPolicy
.getSchedulableEntities()) {
apps.add(pendingApp.getApplicationAttemptId());
@@ -2002,10 +1872,6 @@ public void collectSchedulerApplications(
for (FiCaSchedulerApp app : orderingPolicy.getSchedulableEntities()) {
apps.add(app.getApplicationAttemptId());
}
- } finally {
- readLock.unlock();
- }
-
}
@Override
@@ -2050,12 +1916,9 @@ public void detachContainer(Resource clusterResource,
* @return all ignored partition exclusivity RMContainers in the LeafQueue,
* this will be used by preemption policy.
*/
- public Map>
+ public synchronized Map>
getIgnoreExclusivityRMContainers() {
Map> clonedMap = new HashMap<>();
- try {
- readLock.lock();
-
for (Map.Entry> entry : ignorePartitionExclusivityRMContainers
.entrySet()) {
clonedMap.put(entry.getKey(), new TreeSet<>(entry.getValue()));
@@ -2063,9 +1926,6 @@ public void detachContainer(Resource clusterResource,
return clonedMap;
- } finally {
- readLock.unlock();
- }
}
public void setCapacity(float capacity) {
@@ -2102,18 +1962,13 @@ public void setMaxAMResourcePerQueuePercent(
return orderingPolicy;
}
- void setOrderingPolicy(
+ synchronized void setOrderingPolicy(
OrderingPolicy orderingPolicy) {
- try {
- writeLock.lock();
if (null != this.orderingPolicy) {
orderingPolicy.addAllSchedulableEntities(
this.orderingPolicy.getSchedulableEntities());
}
this.orderingPolicy = orderingPolicy;
- } finally {
- writeLock.unlock();
- }
}
@Override
@@ -2121,10 +1976,9 @@ public Priority getDefaultApplicationPriority() {
return defaultAppPriorityPerQueue;
}
- public void updateApplicationPriority(SchedulerApplication app,
+ public synchronized void updateApplicationPriority(SchedulerApplication app,
Priority newAppPriority) {
- try {
- writeLock.lock();
+
FiCaSchedulerApp attempt = app.getCurrentAppAttempt();
boolean isActive = orderingPolicy.removeSchedulableEntity(attempt);
if (!isActive) {
@@ -2138,9 +1992,6 @@ public void updateApplicationPriority(SchedulerApplication app
} else {
pendingOrderingPolicy.addSchedulableEntity(attempt);
}
- } finally {
- writeLock.unlock();
- }
}
public OrderingPolicy
@@ -2174,17 +2025,12 @@ public Resource getClusterResource() {
}
@Override
- public void stopQueue() {
- try {
- writeLock.lock();
+ public synchronized void stopQueue() {
if (getNumApplications() > 0) {
updateQueueState(QueueState.DRAINING);
} else {
updateQueueState(QueueState.STOPPED);
}
- } finally {
- writeLock.unlock();
- }
}
/**
--
2.23.0.windows.1