diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 8acee57..d21c00a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -360,7 +360,53 @@ public static boolean isAclEnabled(Configuration conf) { public static final String RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS = RM_PREFIX + "rm.container-allocation.expiry-interval-ms"; public static final int DEFAULT_RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS = 600000; - + + /** If true, run the policy but do not affect the cluster with preemption and + * kill events. */ + public static final String PREEMPTION_OBSERVE_ONLY = + RM_PREFIX + "monitor.capacity.preemption.observe_only"; + public static final boolean DEFAULT_PREEMPTION_OBSERVE_ONLY = false; + + /** Time in milliseconds between invocations of this policy. */ + public static final String PREEMPTION_MONITORING_INTERVAL = + RM_PREFIX + "monitor.capacity.preemption.monitoring_interval"; + public static final long DEFAULT_PREEMPTION_MONITORING_INTERVAL = 3000L; + + /** Time in milliseconds between requesting a preemption from an application + * and killing the container. */ + public static final String PREEMPTION_WAIT_TIME_BEFORE_KILL = + RM_PREFIX + "monitor.capacity.preemption.max_wait_before_kill"; + public static final long DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL = 15000L; + + /** Maximum percentage of resources preemptionCandidates in a single round. + * By controlling this value one can throttle the pace at which containers + * are reclaimed from the cluster. After computing the total desired + * preemption, the policy scales it back within this limit. */ + public static final String TOTAL_PREEMPTION_PER_ROUND = + RM_PREFIX + "monitor.capacity.preemption.total_preemption_per_round"; + public static final float DEFAULT_TOTAL_PREEMPTION_PER_ROUND = 0.1f; + + /** Maximum amount of resources above the target capacity ignored for + * preemption. This defines a deadzone around the target capacity that helps + * prevent thrashing and oscillations around the computed target balance. + * High values would slow the time to capacity and (absent natural + * completions) it might prevent convergence to guaranteed capacity. */ + public static final String PREEMPTION_MAX_IGNORED_OVER_CAPACITY = + RM_PREFIX + "monitor.capacity.preemption.max_ignored_over_capacity"; + public static final float DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY = + 0.1f; + /** + * Given a computed preemption target, account for containers naturally + * expiring and preempt only this percentage of the delta. This determines + * the rate of geometric convergence into the deadzone ({@link + * #PREEMPTION_MAX_IGNORED_OVER_CAPACITY}). For example, a termination factor + * of 0.5 will reclaim almost 95% of resources within 5 * {@link + * #PREEMPTION_WAIT_TIME_BEFORE_KILL}, even absent natural termination. */ + public static final String PREEMPTION_NATURAL_TERMINATION_FACTOR = + RM_PREFIX + "monitor.capacity.preemption.natural_termination_factor"; + public static final float DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR = + 0.2f; + /** Path to file with nodes to include.*/ public static final String RM_NODES_INCLUDE_FILE_PATH = RM_PREFIX + "nodes.include-path"; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 506cf3d..cb3c73a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -908,6 +908,64 @@ 600000 + + + If true, run the policy but do not affect the cluster with preemption and kill events. + + yarn.resourcemanager.monitor.capacity.preemption.observe_only + false + + + + + Time in milliseconds between invocations of this ProportionalCapacityPreemptionPolicy + policy. + + yarn.resourcemanager.monitor.capacity.preemption.monitoring_interval + 3000 + + + + + Time in milliseconds between requesting a preemption from an application and killing + the container. + + yarn.resourcemanager.monitor.capacity.preemption.max_wait_before_kill + 15000 + + + + + Maximum percentage of resources preempted in a single round. By controlling this valueone + can throttle the pace at which containers are reclaimed from the cluster. After computing + the total desired preemption, the policy scales it back within this limit. + + yarn.resourcemanager.monitor.capacity.preemption.total_preemption_per_round + 0.1 + + + + + Maximum amount of resources above the target capacity ignored for preemption. + This defines a deadzone around the target capacity that helps prevent thrashing and + oscillations around the computed target balance. High values would slow the time to capacity + and (absent natural.completions) it might prevent convergence to guaranteed capacity. + + yarn.resourcemanager.monitor.capacity.preemption.max_ignored_over_capacity + 0.1 + + + + + Given a computed preemption target, account for containers naturally expiring and preempt + only this percentage of the delta. This determines the rate of geometric convergence into + the deadzone (MAX_IGNORED_OVER_CAPACITY). For example, a termination factor of 0.5 will reclaim + almost 95% of resources within 5 * #WAIT_TIME_BEFORE_KILL, even absent natural termination. + + yarn.resourcemanager.monitor.capacity.preemption.natural_termination_factor + 0.2 + + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java index 7e668b4..de6018e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java @@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy; @@ -143,28 +144,28 @@ public void init(Configuration config, RMContext context, CapacitySchedulerConfiguration csConfig = scheduler.getConfiguration(); maxIgnoredOverCapacity = csConfig.getDouble( - CapacitySchedulerConfiguration.PREEMPTION_MAX_IGNORED_OVER_CAPACITY, - CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY); + YarnConfiguration.PREEMPTION_MAX_IGNORED_OVER_CAPACITY, + YarnConfiguration.DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY); naturalTerminationFactor = csConfig.getDouble( - CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, - CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR); + YarnConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + YarnConfiguration.DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR); maxWaitTime = csConfig.getLong( - CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, - CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL); + YarnConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, + YarnConfiguration.DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL); monitoringInterval = csConfig.getLong( - CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL, - CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_MONITORING_INTERVAL); + YarnConfiguration.PREEMPTION_MONITORING_INTERVAL, + YarnConfiguration.DEFAULT_PREEMPTION_MONITORING_INTERVAL); percentageClusterPreemptionAllowed = csConfig.getFloat( - CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, - CapacitySchedulerConfiguration.DEFAULT_TOTAL_PREEMPTION_PER_ROUND); + YarnConfiguration.TOTAL_PREEMPTION_PER_ROUND, + YarnConfiguration.DEFAULT_TOTAL_PREEMPTION_PER_ROUND); observeOnly = csConfig.getBoolean( - CapacitySchedulerConfiguration.PREEMPTION_OBSERVE_ONLY, - CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_OBSERVE_ONLY); + YarnConfiguration.PREEMPTION_OBSERVE_ONLY, + YarnConfiguration.DEFAULT_PREEMPTION_OBSERVE_ONLY); lazyPreempionEnabled = csConfig.getBoolean( CapacitySchedulerConfiguration.LAZY_PREEMPTION_ENALBED, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index 88e39de..3729264 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -1020,49 +1020,4 @@ public void setOrderingPolicyParameter(String queue, public boolean getLazyPreemptionEnabled() { return getBoolean(LAZY_PREEMPTION_ENALBED, DEFAULT_LAZY_PREEMPTION_ENABLED); } - - /** If true, run the policy but do not affect the cluster with preemption and - * kill events. */ - public static final String PREEMPTION_OBSERVE_ONLY = - "yarn.resourcemanager.monitor.capacity.preemption.observe_only"; - public static final boolean DEFAULT_PREEMPTION_OBSERVE_ONLY = false; - - /** Time in milliseconds between invocations of this policy */ - public static final String PREEMPTION_MONITORING_INTERVAL = - "yarn.resourcemanager.monitor.capacity.preemption.monitoring_interval"; - public static final long DEFAULT_PREEMPTION_MONITORING_INTERVAL = 3000L; - - /** Time in milliseconds between requesting a preemption from an application - * and killing the container. */ - public static final String PREEMPTION_WAIT_TIME_BEFORE_KILL = - "yarn.resourcemanager.monitor.capacity.preemption.max_wait_before_kill"; - public static final long DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL = 15000L; - - /** Maximum percentage of resources preemptionCandidates in a single round. By - * controlling this value one can throttle the pace at which containers are - * reclaimed from the cluster. After computing the total desired preemption, - * the policy scales it back within this limit. */ - public static final String TOTAL_PREEMPTION_PER_ROUND = - "yarn.resourcemanager.monitor.capacity.preemption.total_preemption_per_round"; - public static final float DEFAULT_TOTAL_PREEMPTION_PER_ROUND = 0.1f; - - /** Maximum amount of resources above the target capacity ignored for - * preemption. This defines a deadzone around the target capacity that helps - * prevent thrashing and oscillations around the computed target balance. - * High values would slow the time to capacity and (absent natural - * completions) it might prevent convergence to guaranteed capacity. */ - public static final String PREEMPTION_MAX_IGNORED_OVER_CAPACITY = - "yarn.resourcemanager.monitor.capacity.preemption.max_ignored_over_capacity"; - public static final float DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY = 0.1f; - /** - * Given a computed preemption target, account for containers naturally - * expiring and preempt only this percentage of the delta. This determines - * the rate of geometric convergence into the deadzone ({@link - * #PREEMPTION_MAX_IGNORED_OVER_CAPACITY}). For example, a termination factor of 0.5 - * will reclaim almost 95% of resources within 5 * {@link - * #PREEMPTION_WAIT_TIME_BEFORE_KILL}, even absent natural termination. */ - public static final String PREEMPTION_NATURAL_TERMINATION_FACTOR = - "yarn.resourcemanager.monitor.capacity.preemption.natural_termination_factor"; - public static final float DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR = - 0.2f; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java index 3db4782..babe847 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java @@ -140,14 +140,14 @@ public int getValue() { public void setup() { conf = new CapacitySchedulerConfiguration(new Configuration(false)); conf.setLong( - CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 10000); - conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL, + YarnConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 10000); + conf.setLong(YarnConfiguration.PREEMPTION_MONITORING_INTERVAL, 3000); // report "ideal" preempt - conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, + conf.setFloat(YarnConfiguration.TOTAL_PREEMPTION_PER_ROUND, 1.0f); conf.setFloat( - CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + YarnConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, 1.0f); conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, ProportionalCapacityPreemptionPolicy.class.getCanonicalName()); @@ -268,7 +268,7 @@ public void testExpireKill() { { 3, 0, 0, 0 }, // subqueues }; conf.setLong( - CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, + YarnConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, killTime); ProportionalCapacityPreemptionPolicy policy = buildPolicy(qData); @@ -307,7 +307,7 @@ public void testDeadzone() { { 3, 0, 0, 0 }, // subqueues }; conf.setFloat( - CapacitySchedulerConfiguration.PREEMPTION_MAX_IGNORED_OVER_CAPACITY, + YarnConfiguration.PREEMPTION_MAX_IGNORED_OVER_CAPACITY, (float) 0.1); ProportionalCapacityPreemptionPolicy policy = buildPolicy(qData); policy.editSchedule(); @@ -597,7 +597,7 @@ public void testNaturalTermination() { { 3, 0, 0, 0 }, // subqueues }; conf.setFloat( - CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + YarnConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, (float) 0.1); ProportionalCapacityPreemptionPolicy policy = buildPolicy(qData); @@ -619,7 +619,7 @@ public void testObserveOnly() { { -1, 1, 1, 0 }, // req granularity { 3, 0, 0, 0 }, // subqueues }; - conf.setBoolean(CapacitySchedulerConfiguration.PREEMPTION_OBSERVE_ONLY, + conf.setBoolean(YarnConfiguration.PREEMPTION_OBSERVE_ONLY, true); when(mCS.getConfiguration()).thenReturn( new CapacitySchedulerConfiguration(conf)); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyForNodePartitions.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyForNodePartitions.java index b266665..9ade12c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyForNodePartitions.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyForNodePartitions.java @@ -104,14 +104,14 @@ public void setup() { conf = new CapacitySchedulerConfiguration(new Configuration(false)); conf.setLong( - CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 10000); - conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL, + YarnConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 10000); + conf.setLong(YarnConfiguration.PREEMPTION_MONITORING_INTERVAL, 3000); // report "ideal" preempt - conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, + conf.setFloat(YarnConfiguration.TOTAL_PREEMPTION_PER_ROUND, (float) 1.0); conf.setFloat( - CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + YarnConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, (float) 1.0); mClock = mock(Clock.class); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemption.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemption.java index 216ebab..a93b566 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemption.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemption.java @@ -82,14 +82,14 @@ public void setUp() throws Exception { conf = TestUtils.getConfigurationWithMultipleQueues(this.conf); // Set preemption related configurations - conf.setInt(CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, + conf.setInt(YarnConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 0); conf.setBoolean(CapacitySchedulerConfiguration.LAZY_PREEMPTION_ENALBED, true); - conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, + conf.setFloat(YarnConfiguration.TOTAL_PREEMPTION_PER_ROUND, 1.0f); conf.setFloat( - CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + YarnConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, 1.0f); mgr = new NullRMNodeLabelsManager(); mgr.init(this.conf);