diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 3ae22ad..73f1c0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.AccessRequest; @@ -116,6 +117,10 @@ QueueResourceQuotas queueResourceQuotas; + // -1 indicates lifetime is disabled + protected volatile long maxApplicationLifetime = -1; + protected volatile long defaultApplicationLifetime = -1; + protected enum CapacityConfigType { NONE, PERCENTAGE, ABSOLUTE_RESOURCE }; @@ -425,6 +430,19 @@ protected void setupQueueConfigs(Resource clusterResource, configuration.getMultiNodesSortingAlgorithmPolicy(getQueuePath())); this.userWeights = getUserWeightsFromHierarchy(configuration); + + maxApplicationLifetime = getInheritedMaxAppLifetime(this, configuration); + defaultApplicationLifetime = + getInheritedDefaultAppLifetime(this, configuration, + maxApplicationLifetime); + if (maxApplicationLifetime > 0 && + defaultApplicationLifetime > maxApplicationLifetime) { + throw new YarnRuntimeException( + "Default lifetime " + defaultApplicationLifetime + + " can't exceed maximum lifetime " + maxApplicationLifetime); + } + defaultApplicationLifetime = defaultApplicationLifetime > 0 + ? defaultApplicationLifetime : maxApplicationLifetime; } finally { writeLock.unlock(); } @@ -860,6 +878,53 @@ private boolean isQueueHierarchyPreemptionDisabled(CSQueue q, parentQ.getPreemptionDisabled()); } + private long getInheritedMaxAppLifetime(CSQueue q, + CapacitySchedulerConfiguration conf) { + CSQueue parentQ = q.getParent(); + long maxAppLifetime = conf.getMaximumLifetimePerQueue(q.getQueuePath()); + + // If q is the root queue, then get max app lifetime from conf. + if (parentQ == null) { + return maxAppLifetime; + } + + // If this is not the root queue, get this queue's max app lifetime + // from the conf. The parent's max app lifetime will be used if it's + // not set for this queue. + long parentsMaxAppLifetime = getParent().getMaximumApplicationLifetime(); + return (maxAppLifetime >= 0) ? maxAppLifetime : parentsMaxAppLifetime; + } + + private long getInheritedDefaultAppLifetime(CSQueue q, + CapacitySchedulerConfiguration conf, long myMaxAppLifetime) { + CSQueue parentQ = q.getParent(); + long defaultAppLifetime = conf.getDefaultLifetimePerQueue((getQueuePath())); + + // If q is the root queue, then get default app lifetime from conf. + if (parentQ == null) { + return defaultAppLifetime; + } + + // If this is not the root queue, get this queue's default app lifetime + // from the conf. The parent's default app lifetime will be used if + // it's not set for this queue. + long parentsDefaultAppLifetime = + getParent().getDefaultApplicationLifetime(); + + // If the default lifetime is not set for the current queue, then use the + // parent's default value IF it is less than the current queue's max + // lifetime. Otherwise, use the current queue's max lifetime as the default + // lifetime. + if (defaultAppLifetime < 0) { + if (parentsDefaultAppLifetime < myMaxAppLifetime) { + defaultAppLifetime = parentsDefaultAppLifetime; + } else { + defaultAppLifetime = myMaxAppLifetime; + } + } + return defaultAppLifetime; + } + /** * The specified queue is intra-queue preemptable if * 1) system-wide intra-queue preemption is turned on @@ -1317,4 +1382,12 @@ public String getMultiNodeSortingPolicyName() { public void setMultiNodeSortingPolicyName(String policyName) { this.multiNodeSortingPolicyName = policyName; } + + public long getMaximumApplicationLifetime() { + return maxApplicationLifetime; + } + + public long getDefaultApplicationLifetime() { + return defaultApplicationLifetime; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java index d507e53..998aaa0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java @@ -449,4 +449,19 @@ public void validateSubmitApplication(ApplicationId applicationId, * @return policy name */ String getMultiNodeSortingPolicyName(); + + /** + * Get the maximum lifetime in seconds of an application which is submitted to + * this queue. Apps can set their own lifetime timeout up to this value. + * @return max lifetime in seconds + */ + public long getMaximumApplicationLifetime(); + + /** + * Get the default lifetime in seconds of an application which is submitted to + * this queue. If an app doesn't specify its own timeout when submitted, this + * value will be used. + * @return default app lifetime + */ + public long getDefaultApplicationLifetime(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 1028a7d..ff55b3c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -45,7 +45,6 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; @@ -130,10 +129,6 @@ List priorityAcls = new ArrayList(); - // -1 indicates lifetime is disabled - private volatile long maxApplicationLifetime = -1; - private volatile long defaultApplicationLifetime = -1; - @SuppressWarnings({ "unchecked", "rawtypes" }) public LeafQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { @@ -256,19 +251,6 @@ protected void setupQueueConfigs(Resource clusterResource, defaultAppPriorityPerQueue = Priority.newInstance( conf.getDefaultApplicationPriorityConfPerQueue(getQueuePath())); - maxApplicationLifetime = - conf.getMaximumLifetimePerQueue((getQueuePath())); - defaultApplicationLifetime = - conf.getDefaultLifetimePerQueue((getQueuePath())); - if (maxApplicationLifetime > 0 && - defaultApplicationLifetime > maxApplicationLifetime) { - throw new YarnRuntimeException( - "Default lifetime" + defaultApplicationLifetime - + " can't exceed maximum lifetime " + maxApplicationLifetime); - } - defaultApplicationLifetime = defaultApplicationLifetime > 0 - ? defaultApplicationLifetime : maxApplicationLifetime; - // Validate leaf queue's user's weights. int queueUL = Math.min(100, conf.getUserLimit(getQueuePath())); for (Entry e : getUserWeights().entrySet()) { @@ -2209,14 +2191,6 @@ public void stopQueue() { } } - public long getMaximumApplicationLifetime() { - return maxApplicationLifetime; - } - - public long getDefaultApplicationLifetime() { - return defaultApplicationLifetime; - } - private void updateQueuePreemptionMetrics(RMContainer rmc) { final long usedMillis = rmc.getFinishTime() - rmc.getCreationTime(); final long usedSeconds = usedMillis / DateUtils.MILLIS_PER_SECOND; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java index 4e40436..7c8a740 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; @@ -390,6 +391,57 @@ public synchronized void updateApplicationStateInternal( } } + @Test(timeout = 60000) + public void testInheritAppLifetimeFromParentQueue() throws Exception { + YarnConfiguration yarnConf = conf; + long maxRootLifetime = 20L; + long defaultRootLifetime = 10L; + if (scheduler.equals(CapacityScheduler.class)) { + CapacitySchedulerConfiguration csConf = + new CapacitySchedulerConfiguration(); + csConf.setQueues(CapacitySchedulerConfiguration.ROOT, + new String[] {"default"}); + csConf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".default", 100); + csConf.setMaximumLifetimePerQueue( + CapacitySchedulerConfiguration.ROOT, maxRootLifetime); + csConf.setDefaultLifetimePerQueue( + CapacitySchedulerConfiguration.ROOT, defaultRootLifetime); + yarnConf = new YarnConfiguration(csConf); + } + + MockRM rm = null; + try { + rm = new MockRM(yarnConf); + rm.start(); + + Priority appPriority = Priority.newInstance(0); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 16 * 1024); + + // user not set lifetime, so queue max lifetime will be considered. + RMApp app1 = MockRMAppSubmitter.submit(rm, + MockRMAppSubmissionData.Builder.createWithMemory(1024, rm) + .withAppPriority(appPriority) + .withApplicationTimeouts(Collections.emptyMap()) + .build()); + + nm1.nodeHeartbeat(true); + + if (scheduler.equals(CapacityScheduler.class)) { + // Supported only on capacity scheduler + + rm.waitForState(app1.getApplicationId(), RMAppState.KILLED); + long totalTimeRun = app1.getFinishTime() - app1.getSubmitTime(); + Assert.assertTrue("Application killed before default lifetime value", + totalTimeRun > (defaultRootLifetime * 1000)); + Assert.assertTrue( + "Application killed after max lifetime value " + totalTimeRun, + totalTimeRun < (maxRootLifetime * 1000)); + } + } finally { + stopRM(rm); + } + } + private CapacitySchedulerConfiguration setUpCSQueue(long maxLifetime, long defaultLifetime) { CapacitySchedulerConfiguration csConf =