From e8537f9919927db7e180a03a35d8c4b555ea7479 Mon Sep 17 00:00:00 2001 From: Prabhu Joseph Date: Thu, 28 May 2020 15:56:01 +0530 Subject: [PATCH] YARN-10287. Revert CS Config on any failures during reinitialize --- .../scheduler/capacity/CapacityScheduler.java | 1 + .../TestRMWebServicesConfigurationMutation.java | 73 ++++++++++++++++++---- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 5cef57a..be47699 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -468,6 +468,7 @@ public void reinitialize(Configuration newConf, RMContext rmContext, reinitializeQueues(this.conf); } catch (Throwable t) { this.conf = oldConf; + reinitializeQueues(this.conf); refreshMaximumAllocation( ResourceUtils.fetchMaximumAllocationFromConfig(this.conf)); throw new IOException("Failed to re-init queues : " + t.getMessage(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesConfigurationMutation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesConfigurationMutation.java index 34b7c12..3c96c6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesConfigurationMutation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesConfigurationMutation.java @@ -60,6 +60,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.ORDERING_POLICY; /** @@ -131,7 +132,7 @@ public void setUp() throws Exception { private static void setupQueueConfiguration( CapacitySchedulerConfiguration config) { config.setQueues(CapacitySchedulerConfiguration.ROOT, - new String[]{"a", "b", "c"}); + new String[]{"a", "b", "c", "mappedqueue"}); final String a = CapacitySchedulerConfiguration.ROOT + ".a"; config.setCapacity(a, 25f); @@ -152,6 +153,11 @@ private static void setupQueueConfiguration( final String c1 = c + ".c1"; config.setQueues(c, new String[] {"c1"}); config.setCapacity(c1, 0f); + + final String d = CapacitySchedulerConfiguration.ROOT + ".d"; + config.setCapacity(d, 0f); + config.set(CapacitySchedulerConfiguration.QUEUE_MAPPING, + "g:hadoop:mappedqueue"); } public TestRMWebServicesConfigurationMutation() { @@ -187,14 +193,14 @@ private CapacitySchedulerConfiguration getSchedulerConf() public void testGetSchedulerConf() throws Exception { CapacitySchedulerConfiguration orgConf = getSchedulerConf(); assertNotNull(orgConf); - assertEquals(3, orgConf.getQueues("root").length); + assertEquals(4, orgConf.getQueues("root").length); } @Test public void testFormatSchedulerConf() throws Exception { CapacitySchedulerConfiguration newConf = getSchedulerConf(); assertNotNull(newConf); - assertEquals(3, newConf.getQueues("root").length); + assertEquals(4, newConf.getQueues("root").length); SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo(); Map nearEmptyCapacity = new HashMap<>(); @@ -220,7 +226,7 @@ public void testFormatSchedulerConf() throws Exception { .put(ClientResponse.class); newConf = getSchedulerConf(); assertNotNull(newConf); - assertEquals(4, newConf.getQueues("root").length); + assertEquals(5, newConf.getQueues("root").length); // Format the scheduler config and validate root.formattest is not present response = r.path("ws").path("v1").path("cluster") @@ -229,7 +235,7 @@ public void testFormatSchedulerConf() throws Exception { .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(Status.OK.getStatusCode(), response.getStatus()); newConf = getSchedulerConf(); - assertEquals(3, newConf.getQueues("root").length); + assertEquals(4, newConf.getQueues("root").length); } private long getConfigVersion() throws Exception { @@ -255,7 +261,7 @@ public void testSchedulerConfigVersion() throws Exception { public void testAddNestedQueue() throws Exception { CapacitySchedulerConfiguration orgConf = getSchedulerConf(); assertNotNull(orgConf); - assertEquals(3, orgConf.getQueues("root").length); + assertEquals(4, orgConf.getQueues("root").length); WebResource r = resource(); @@ -290,7 +296,7 @@ public void testAddNestedQueue() throws Exception { assertEquals(Status.OK.getStatusCode(), response.getStatus()); CapacitySchedulerConfiguration newCSConf = ((CapacityScheduler) rm.getResourceScheduler()).getConfiguration(); - assertEquals(4, newCSConf.getQueues("root").length); + assertEquals(5, newCSConf.getQueues("root").length); assertEquals(2, newCSConf.getQueues("root.d").length); assertEquals(25.0f, newCSConf.getNonLabeledQueueCapacity("root.d.d1"), 0.01f); @@ -299,7 +305,7 @@ public void testAddNestedQueue() throws Exception { CapacitySchedulerConfiguration newConf = getSchedulerConf(); assertNotNull(newConf); - assertEquals(4, newConf.getQueues("root").length); + assertEquals(5, newConf.getQueues("root").length); } @Test @@ -329,7 +335,7 @@ public void testAddWithUpdate() throws Exception { assertEquals(Status.OK.getStatusCode(), response.getStatus()); CapacitySchedulerConfiguration newCSConf = ((CapacityScheduler) rm.getResourceScheduler()).getConfiguration(); - assertEquals(4, newCSConf.getQueues("root").length); + assertEquals(5, newCSConf.getQueues("root").length); assertEquals(25.0f, newCSConf.getNonLabeledQueueCapacity("root.d"), 0.01f); assertEquals(50.0f, newCSConf.getNonLabeledQueueCapacity("root.b"), 0.01f); } @@ -491,6 +497,49 @@ public void testStopWithRemoveQueue() throws Exception { } @Test + public void testRemoveQueueWhichHasQueueMapping() throws Exception { + WebResource r = resource(); + + ClientResponse response; + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + + // Validate Queue 'mappedqueue' exists before deletion + assertNotNull("Failed to setup CapacityScheduler Configuration", + cs.getQueue("mappedqueue")); + + // Set state of queue 'mappedqueue' to STOPPED. + SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo(); + Map stoppedParam = new HashMap<>(); + stoppedParam.put(CapacitySchedulerConfiguration.STATE, + QueueState.STOPPED.toString()); + QueueConfigInfo stoppedInfo = new QueueConfigInfo("root.mappedqueue", + stoppedParam); + updateInfo.getUpdateQueueInfo().add(stoppedInfo); + + // Remove queue 'mappedqueue' using update scheduler-conf + updateInfo.getRemoveQueueInfo().add("root.mappedqueue"); + response = r.path("ws").path("v1").path("cluster") + .path("scheduler-conf").queryParam("user.name", userName) + .accept(MediaType.APPLICATION_JSON) + .entity(YarnWebServiceUtils.toJson(updateInfo, + SchedConfUpdateInfo.class), MediaType.APPLICATION_JSON) + .put(ClientResponse.class); + String responseText = response.getEntity(String.class); + + // Queue 'mappedqueue' deletion will fail as there is queue mapping present + assertEquals(Status.BAD_REQUEST.getStatusCode(), response.getStatus()); + assertTrue(responseText.contains("Failed to re-init queues : " + + "mapping contains invalid or non-leaf queue mappedqueue")); + + // Validate queue 'mappedqueue' exists after above failure + CapacitySchedulerConfiguration newCSConf = + ((CapacityScheduler) rm.getResourceScheduler()).getConfiguration(); + assertEquals(4, newCSConf.getQueues("root").length); + assertNotNull("CapacityScheduler Configuration is corrupt", + cs.getQueue("mappedqueue")); + } + + @Test public void testStopWithConvertLeafToParentQueue() throws Exception { WebResource r = resource(); ClientResponse response; @@ -544,7 +593,7 @@ public void testRemoveParentQueue() throws Exception { assertEquals(Status.OK.getStatusCode(), response.getStatus()); CapacitySchedulerConfiguration newCSConf = ((CapacityScheduler) rm.getResourceScheduler()).getConfiguration(); - assertEquals(2, newCSConf.getQueues("root").length); + assertEquals(3, newCSConf.getQueues("root").length); assertNull(newCSConf.getQueues("root.c")); } @@ -575,7 +624,7 @@ public void testRemoveParentQueueWithCapacity() throws Exception { assertEquals(Status.OK.getStatusCode(), response.getStatus()); CapacitySchedulerConfiguration newCSConf = ((CapacityScheduler) rm.getResourceScheduler()).getConfiguration(); - assertEquals(2, newCSConf.getQueues("root").length); + assertEquals(3, newCSConf.getQueues("root").length); assertEquals(100.0f, newCSConf.getNonLabeledQueueCapacity("root.b"), 0.01f); } @@ -607,7 +656,7 @@ public void testRemoveMultipleQueues() throws Exception { assertEquals(Status.OK.getStatusCode(), response.getStatus()); CapacitySchedulerConfiguration newCSConf = ((CapacityScheduler) rm.getResourceScheduler()).getConfiguration(); - assertEquals(1, newCSConf.getQueues("root").length); + assertEquals(2, newCSConf.getQueues("root").length); } private void stopQueue(String... queuePaths) throws Exception { -- 2.7.4 (Apple Git-66)