diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java index 5c107aa..2791de4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java @@ -43,10 +43,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.util.Records; +import org.apache.log4j.Logger; import org.junit.Assert; public class MockAM { + private static final Logger LOG = Logger.getLogger(MockAM.class); + private volatile int responseId = 0; private final ApplicationAttemptId attemptId; private RMContext context; @@ -73,18 +76,27 @@ public void setAMRMProtocol(ApplicationMasterProtocol amRMProtocol, public void waitForState(RMAppAttemptState finalState) throws Exception { RMApp app = context.getRMApps().get(attemptId.getApplicationId()); RMAppAttempt attempt = app.getRMAppAttempt(attemptId); - int timeoutSecs = 0; + final int timeoutMsecs = 40000; + final int minWaitMsecs = 1000; + final int waitMsPerLoop = 500; + int loop = 0; while (!finalState.equals(attempt.getAppAttemptState()) - && timeoutSecs++ < 40) { - System.out - .println("AppAttempt : " + attemptId + " State is : " - + attempt.getAppAttemptState() - + " Waiting for state : " + finalState); - Thread.sleep(1000); + && waitMsPerLoop * loop < timeoutMsecs) { + LOG.info("AppAttempt : " + attemptId + " State is : " + + attempt.getAppAttemptState() + " Waiting for state : " + + finalState); + Thread.sleep(waitMsPerLoop); + loop++; + } + int waitedMsecs = waitMsPerLoop * loop; + if (minWaitMsecs > waitedMsecs) { + Thread.sleep(minWaitMsecs - waitedMsecs); + } + LOG.info("Attempt State is : " + attempt.getAppAttemptState()); + if (waitedMsecs >= timeoutMsecs) { + Assert.fail("Attempt state is not correct (timedout): expected: " + + finalState + " actual: " + attempt.getAppAttemptState()); } - System.out.println("AppAttempt State is : " + attempt.getAppAttemptState()); - Assert.assertEquals("AppAttempt state is not correct (timedout)", - finalState, attempt.getAppAttemptState()); } public RegisterApplicationMasterResponse registerAppAttempt() diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 63d6557..902fd56 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; @@ -95,6 +96,7 @@ @SuppressWarnings("unchecked") public class MockRM extends ResourceManager { + static final Logger LOG = Logger.getLogger(MockRM.class); static final String ENABLE_WEBAPP = "mockrm.webapp.enabled"; public MockRM() { @@ -126,15 +128,21 @@ public void waitForState(ApplicationId appId, RMAppState finalState) throws Exception { RMApp app = getRMContext().getRMApps().get(appId); Assert.assertNotNull("app shouldn't be null", app); - int timeoutSecs = 0; - while (!finalState.equals(app.getState()) && timeoutSecs++ < 40) { - System.out.println("App : " + appId + " State is : " + app.getState() - + " Waiting for state : " + finalState); - Thread.sleep(2000); + final int timeoutMsecs = 80000; + final int waitMsPerLoop = 500; + int loop = 0; + while (!finalState.equals(app.getState()) && + ((waitMsPerLoop * loop) < timeoutMsecs)) { + LOG.info("App : " + appId + " State is : " + app.getState() + + " Waiting for state : " + finalState); + loop++; + } + int waitedMsecs = waitMsPerLoop * loop; + LOG.info("App State is : " + app.getState()); + if (waitedMsecs >= timeoutMsecs) { + Assert.fail("App state is not correct (timedout): expected: " + + finalState + " actual: " + app.getState()); } - System.out.println("App State is : " + app.getState()); - Assert.assertEquals("App state is not correct (timedout)", finalState, - app.getState()); } public void waitForState(ApplicationAttemptId attemptId, @@ -143,16 +151,26 @@ public void waitForState(ApplicationAttemptId attemptId, RMApp app = getRMContext().getRMApps().get(attemptId.getApplicationId()); Assert.assertNotNull("app shouldn't be null", app); RMAppAttempt attempt = app.getRMAppAttempt(attemptId); - int timeoutSecs = 0; - while (!finalState.equals(attempt.getAppAttemptState()) && timeoutSecs++ < 40) { - System.out.println("AppAttempt : " + attemptId - + " State is : " + attempt.getAppAttemptState() - + " Waiting for state : " + finalState); - Thread.sleep(1000); + final int timeoutMsecs = 40000; + final int minWaitMsecs = 1000; + final int waitMsPerLoop = 10; + int loop = 0; + while (!finalState.equals(attempt.getAppAttemptState()) + && waitMsPerLoop * loop < timeoutMsecs) { + LOG.info("AppAttempt : " + attemptId + " State is : " + + attempt.getAppAttemptState() + " Waiting for state : " + finalState); + Thread.sleep(waitMsPerLoop); + loop++; + } + int waitedMsecs = waitMsPerLoop * loop; + if (minWaitMsecs > waitedMsecs) { + Thread.sleep(minWaitMsecs - waitedMsecs); + } + LOG.info("Attempt State is : " + attempt.getAppAttemptState()); + if (waitedMsecs >= timeoutMsecs) { + Assert.fail("Attempt state is not correct (timedout): expected: " + + finalState + " actual: " + attempt.getAppAttemptState()); } - System.out.println("Attempt State is : " + attempt.getAppAttemptState()); - Assert.assertEquals("Attempt state is not correct (timedout)", finalState, - attempt.getAppAttemptState()); } public void waitForContainerAllocated(MockNM nm, ContainerId containerId) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java index ca5c7a4..8c175b5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java @@ -20,6 +20,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index c889446..92c0a51 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -586,10 +586,28 @@ public void testRMRestartWaitForPreviousAMToFinish() throws Exception { .getAppAttemptState()); Assert.assertEquals(RMAppAttemptState.LAUNCHED,rmApp.getAppAttempts() .get(latestAppAttemptId).getAppAttemptState()); - + rm3.waitForState(latestAppAttemptId, RMAppAttemptState.FAILED); rm3.waitForState(rmApp.getApplicationId(), RMAppState.ACCEPTED); - Assert.assertEquals(4, rmApp.getAppAttempts().size()); + final int maxRetry = 10; + int retry = 0; + while (retry < maxRetry) { + // This is workaround to wait for the completion of the handler for + // the state transition. + retry++; + if (retry >= maxRetry) { + // reaches max retry. assert and abort. + Assert.assertEquals(4, rmApp.getAppAttempts().size()); + } + if (rmApp.getAppAttempts().size() != 4) { + // retry. + Thread.sleep(100); + } else { + // assert and go next. + Assert.assertEquals(4, rmApp.getAppAttempts().size()); + break; + } + } Assert.assertEquals(RMAppAttemptState.FAILED, rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState()); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java index 7befba4..3387f41 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java @@ -592,7 +592,7 @@ public void testRMRestartOrFailoverNotCountedForAMFailures() rm2.stop(); } - @Test (timeout = 50000) + @Test (timeout = 120000) public void testRMAppAttemptFailuresValidityInterval() throws Exception { YarnConfiguration conf = new YarnConfiguration(); conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, @@ -612,10 +612,10 @@ public void testRMAppAttemptFailuresValidityInterval() throws Exception { new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService()); nm1.registerNode(); - // set window size to a larger number : 20s + // set window size to a larger number : 60s // we will verify the app should be failed if - // two continuous attempts failed in 20s. - RMApp app = rm1.submitApp(200, 20000); + // two continuous attempts failed in 60s. + RMApp app = rm1.submitApp(200, 60000); MockAM am = MockRM.launchAM(app, rm1, nm1); // Fail current attempt normally @@ -636,8 +636,8 @@ public void testRMAppAttemptFailuresValidityInterval() throws Exception { rm1.waitForState(app.getApplicationId(), RMAppState.FAILED); ControlledClock clock = new ControlledClock(new SystemClock()); - // set window size to 6s - RMAppImpl app1 = (RMAppImpl)rm1.submitApp(200, 6000);; + // set window size to 10s + RMAppImpl app1 = (RMAppImpl)rm1.submitApp(200, 10000);; app1.setSystemClock(clock); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); @@ -655,8 +655,8 @@ public void testRMAppAttemptFailuresValidityInterval() throws Exception { MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1); am2.waitForState(RMAppAttemptState.RUNNING); - // wait for 6 seconds - clock.setTime(System.currentTimeMillis() + 6*1000); + // wait for 10 seconds + clock.setTime(System.currentTimeMillis() + 10*1000); // Fail attempt2 normally nm1.nodeHeartbeat(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE); @@ -693,8 +693,8 @@ public void testRMAppAttemptFailuresValidityInterval() throws Exception { MockAM am4 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1); - // wait for 6 seconds - clock.setTime(System.currentTimeMillis() + 6*1000); + // wait for 10 seconds + clock.setTime(System.currentTimeMillis() + 10*1000); // Fail attempt4 normally nm1 .nodeHeartbeat(am4.getApplicationAttemptId(), 1, ContainerState.COMPLETE);