diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 0d6caebac68..07c999cfca2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -18,35 +18,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; -import static org.apache.hadoop.yarn.conf.YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import javax.xml.parsers.ParserConfigurationException; - import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ha.HAServiceProtocol; @@ -54,6 +27,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.GroupMappingServiceProvider; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -73,8 +47,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.exceptions - .SchedulerInvalidResoureRequestException; +import org.apache.hadoop.yarn.exceptions.SchedulerInvalidResoureRequestException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; @@ -101,8 +74,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeResourceUpdateEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; - - import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; @@ -129,7 +100,33 @@ import org.mockito.Mockito; import org.xml.sax.SAXException; -import com.google.common.collect.Sets; +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; + +import static org.apache.hadoop.yarn.conf.YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; @SuppressWarnings("unchecked") public class TestFairScheduler extends FairSchedulerTestBase { @@ -137,6 +134,9 @@ private final static String ALLOC_FILE = new File(TEST_DIR, "test-queues").getAbsolutePath(); + private static final int TIMEOUT = 10000; + private static final int WAITFOR_CHECK_INTERVAL = 100; + @Before public void setUp() throws IOException { scheduler = new FairScheduler(); @@ -577,7 +577,8 @@ public void testFairShareWithLowMaxResources() throws IOException { * @throws IOException if scheduler reinitialization fails */ @Test - public void testChildMaxResources() throws IOException { + public void testChildMaxResources() throws IOException, InterruptedException, + TimeoutException { PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); out.println(""); @@ -610,14 +611,35 @@ public void testChildMaxResources() throws IOException { NodeUpdateSchedulerEvent nodeEvent = new NodeUpdateSchedulerEvent(node1); + // Send 4 node heartbeats, this should be enough to allocate 4 containers + // As we have 2 queues with capacity: 2GB,2cores, we could only have + // 4 containers at most scheduler.handle(nodeEvent); scheduler.handle(nodeEvent); scheduler.handle(nodeEvent); scheduler.handle(nodeEvent); + + //wait for that we have 4 allocated containers altogether + GenericTestUtils.waitFor(() -> + scheduler.getRootQueueMetrics().getAllocatedContainers() == 4, + WAITFOR_CHECK_INTERVAL, TIMEOUT); + + // Apps should be running with 2 containers + assertEquals("App 1 is not running with the correct number of containers", + 2, scheduler.getSchedulerApp(attId1).getLiveContainers().size()); + assertEquals("App 2 is not running with the correct number of containers", + 2, scheduler.getSchedulerApp(attId2).getLiveContainers().size()); + + //ensure that a 5th node heartbeat does not allocate more containers scheduler.handle(nodeEvent); - scheduler.handle(nodeEvent); - scheduler.handle(nodeEvent); - scheduler.handle(nodeEvent); + try { + GenericTestUtils.waitFor(() -> + scheduler.getRootQueueMetrics().getAllocatedContainers() == 5, + WAITFOR_CHECK_INTERVAL, 2000); + } catch (TimeoutException e) { + //it's okay we have timeout here, meaning that the number of allocated + //containers have never reached 5 + } // Apps should be running with 2 containers assertEquals("App 1 is not running with the correct number of containers", @@ -637,11 +659,19 @@ public void testChildMaxResources() throws IOException { scheduler.reinitialize(conf, resourceManager.getRMContext()); scheduler.update(); - scheduler.handle(nodeEvent); - scheduler.handle(nodeEvent); + + // Send 2 node heartbeats, this should be enough to allocate 2 + // more containers. + // As we have 2 queues with capacity: 3GB,3cores, we could only have + // 6 containers at most scheduler.handle(nodeEvent); scheduler.handle(nodeEvent); + //wait for that we have 6 allocated containers altogether + GenericTestUtils.waitFor(() -> + scheduler.getRootQueueMetrics().getAllocatedContainers() == 6, + WAITFOR_CHECK_INTERVAL, TIMEOUT); + // Apps should be running with 3 containers now assertEquals("App 1 is not running with the correct number of containers", 3, scheduler.getSchedulerApp(attId1).getLiveContainers().size()); @@ -657,6 +687,17 @@ public void testChildMaxResources() throws IOException { out.println(""); out.close(); + //ensure that a 5th node heartbeat does not allocate more containers + scheduler.handle(nodeEvent); + try { + GenericTestUtils.waitFor(() -> + scheduler.getRootQueueMetrics().getAllocatedContainers() == 5, + WAITFOR_CHECK_INTERVAL, 2000); + } catch (TimeoutException e) { + //it's okay we have timeout here, meaning that the number of allocated + //containers have never reached 5 + } + scheduler.reinitialize(conf, resourceManager.getRMContext()); scheduler.update();