diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 07f5a769293..c0b673381af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.curator.framework.AuthInfo; @@ -67,8 +68,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; import org.apache.hadoop.yarn.server.resourcemanager.metrics.TimelineServiceV1Publisher; import org.apache.hadoop.yarn.server.resourcemanager.metrics.TimelineServiceV2Publisher; -import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy; -import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMDelegatedNodeLabelsUpdater; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore; @@ -113,8 +112,6 @@ import org.apache.zookeeper.server.auth.DigestAuthenticationProvider; import org.eclipse.jetty.webapp.WebAppContext; -import com.google.common.annotations.VisibleForTesting; - import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; @@ -709,8 +706,6 @@ protected void serviceInit(Configuration configuration) throws Exception { } } - createSchedulerMonitors(); - masterService = createApplicationMasterService(); addService(masterService) ; rmContext.setApplicationMasterService(masterService); @@ -809,30 +804,6 @@ protected void serviceStop() throws Exception { } } - - protected void createSchedulerMonitors() { - if (conf.getBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, - YarnConfiguration.DEFAULT_RM_SCHEDULER_ENABLE_MONITORS)) { - LOG.info("Loading policy monitors"); - List policies = conf.getInstances( - YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, - SchedulingEditPolicy.class); - if (policies.size() > 0) { - for (SchedulingEditPolicy policy : policies) { - LOG.info("LOADING SchedulingEditPolicy:" + policy.getPolicyName()); - // periodically check whether we need to take action to guarantee - // constraints - SchedulingMonitor mon = new SchedulingMonitor(rmContext, policy); - addService(mon); - } - } else { - LOG.warn("Policy monitors configured (" + - YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS + - ") but none specified (" + - YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES + ")"); - } - } - } } @Private diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java index 631d1a0f835..3e8e8eef75f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java @@ -58,6 +58,7 @@ public synchronized SchedulingEditPolicy getSchedulingEditPolicy() { } public void serviceInit(Configuration conf) throws Exception { + LOG.info("Initializing SchedulingMonitor=" + getName()); scheduleEditPolicy.init(conf, rmContext, rmContext.getScheduler()); this.monitorInterval = scheduleEditPolicy.getMonitoringInterval(); super.serviceInit(conf); @@ -65,6 +66,7 @@ public void serviceInit(Configuration conf) throws Exception { @Override public void serviceStart() throws Exception { + LOG.info("Starting SchedulingMonitor=" + getName()); assert !stopped : "starting when already stopped"; ses = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() { public Thread newThread(Runnable r) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitorManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitorManager.java new file mode 100644 index 00000000000..11ab698435f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitorManager.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.monitor; + +import com.google.common.collect.Sets; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Manages scheduling monitors + */ +public class SchedulingMonitorManager { + private static final Log LOG = LogFactory.getLog( + SchedulingMonitorManager.class); + + private Map runningSchedulingMonitors = new HashMap<>(); + private RMContext rmContext; + + private void updateSchedulingMonitors(Configuration conf, + boolean startImmediately) throws YarnException { + boolean monitorsEnabled = conf.getBoolean( + YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ENABLE_MONITORS); + + if (!monitorsEnabled) { + if (!runningSchedulingMonitors.isEmpty()) { + // If monitors disabled while we have some running monitors, we should + // stop them. + LOG.info("Scheduling Monitor disabled, stopping all services"); + stopAndRemoveAll(); + } + + return; + } + + // When monitor is enabled, loading policies + String[] configuredPolicies = conf.getStrings( + YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES); + if (configuredPolicies == null || configuredPolicies.length == 0) { + return; + } + + Set configurePoliciesSet = new HashSet<>(); + for (String s : configuredPolicies) { + configurePoliciesSet.add(s); + } + + // Add new monitor when needed + for (String s : configurePoliciesSet) { + if (!runningSchedulingMonitors.containsKey(s)) { + Class policyClass; + try { + policyClass = Class.forName(s); + } catch (ClassNotFoundException e) { + String message = "Failed to find class of specified policy=" + s; + LOG.warn(message); + throw new YarnException(message); + } + + if (SchedulingEditPolicy.class.isAssignableFrom(policyClass)) { + SchedulingEditPolicy policyInstance = + (SchedulingEditPolicy) ReflectionUtils.newInstance(policyClass, + null); + SchedulingMonitor mon = new SchedulingMonitor(rmContext, + policyInstance); + mon.init(conf); + if (startImmediately) { + mon.start(); + } + runningSchedulingMonitors.put(s, mon); + } else { + String message = + "Specified policy=" + s + " is not a SchedulingEditPolicy class."; + LOG.warn(message); + throw new YarnException(message); + } + } + } + + // Stop monitor when needed. + Set disabledPolicies = Sets.difference( + runningSchedulingMonitors.keySet(), configurePoliciesSet); + for (String disabledPolicy : disabledPolicies) { + LOG.info("SchedulingEditPolicy=" + disabledPolicy + + " removed, stopping it now ..."); + silentlyStopSchedulingMonitor(disabledPolicy); + runningSchedulingMonitors.remove(disabledPolicy); + } + } + + public synchronized void initialize(RMContext rmContext, + Configuration configuration) throws YarnException { + this.rmContext = rmContext; + stopAndRemoveAll(); + + updateSchedulingMonitors(configuration, false); + } + + public synchronized void reinitialize(RMContext rmContext, + Configuration configuration) throws YarnException { + this.rmContext = rmContext; + + updateSchedulingMonitors(configuration, true); + } + + public synchronized void startAll() { + for (SchedulingMonitor schedulingMonitor : runningSchedulingMonitors + .values()) { + schedulingMonitor.start(); + } + } + + private void silentlyStopSchedulingMonitor(String name) { + SchedulingMonitor mon = runningSchedulingMonitors.get(name); + try { + mon.stop(); + LOG.info("Sucessfully stopped monitor=" + mon.getName()); + } catch (Exception e) { + LOG.warn("Exception while stopping monitor=" + mon.getName(), e); + } + } + + private void stopAndRemoveAll() { + if (!runningSchedulingMonitors.isEmpty()) { + for (String schedulingMonitorName : runningSchedulingMonitors + .keySet()) { + silentlyStopSchedulingMonitor(schedulingMonitorName); + } + runningSchedulingMonitors.clear(); + } + } + + public synchronized void stop() throws YarnException { + stopAndRemoveAll(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index f2da1fe61d0..14f170441b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -68,6 +68,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMCriticalThreadUncaughtExceptionHandler; import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitorManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -168,6 +169,8 @@ // the NM in the next heartbeat. private boolean autoUpdateContainers = false; + private SchedulingMonitorManager schedulingMonitorManager; + /** * Construct the service. * @@ -208,6 +211,10 @@ public void serviceInit(Configuration conf) throws Exception { updateThread.setDaemon(true); } + // Initialize SchedulingMonitorManager + schedulingMonitorManager = new SchedulingMonitorManager(); + schedulingMonitorManager.initialize(rmContext, conf); + super.serviceInit(conf); } @@ -216,6 +223,8 @@ protected void serviceStart() throws Exception { if (updateThread != null) { updateThread.start(); } + + schedulingMonitorManager.startAll(); super.serviceStart(); } @@ -225,6 +234,8 @@ protected void serviceStop() throws Exception { updateThread.interrupt(); updateThread.join(THREAD_JOIN_TIMEOUT_MS); } + + schedulingMonitorManager.stop(); super.serviceStop(); } @@ -1406,4 +1417,15 @@ protected void triggerUpdate() { updateThreadMonitor.notify(); } } + + @Override + public void reinitialize(Configuration conf, RMContext rmContext) + throws IOException { + try { + LOG.info("Reinitializing SchedulingMonitorManager ..."); + schedulingMonitorManager.reinitialize(rmContext, conf); + } catch (YarnException e) { + throw new IOException(e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index d91aa55a487..94b909b2048 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -443,6 +443,8 @@ public void reinitialize(Configuration newConf, RMContext rmContext) // Setup how many containers we can allocate for each round offswitchPerHeartbeatLimit = this.conf.getOffSwitchPerHeartbeatLimit(); + + super.reinitialize(newConf, rmContext); } finally { writeLock.unlock(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 37f583ec7db..3e526ecbfad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1396,6 +1396,7 @@ public void reinitialize(Configuration conf, RMContext rmContext) throws IOException { try { allocsLoader.reloadAllocations(); + super.reinitialize(conf, rmContext); } catch (Exception e) { LOG.error("Failed to reload allocations file", e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 185d426d717..4ea4c5613b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -312,6 +312,7 @@ public synchronized void setRMContext(RMContext rmContext) { reinitialize(Configuration conf, RMContext rmContext) throws IOException { setConf(conf); + super.reinitialize(conf, rmContext); } @Override