diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java index 8103c16..6a28605 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java @@ -78,11 +78,17 @@ private Configuration getConfForNodeHealthScript() { } private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable) - throws IOException { - PrintWriter pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile)); - pw.println(scriptStr); - pw.flush(); - pw.close(); + throws IOException { + PrintWriter pw = null; + try { + FileUtil.setWritable(nodeHealthscriptFile, true); + FileUtil.setReadable(nodeHealthscriptFile, true); + pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile)); + pw.println(scriptStr); + pw.flush(); + } finally { + pw.close(); + } FileUtil.setExecutable(nodeHealthscriptFile, setExecutable); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java index 9f06b85..84eea39 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java @@ -977,7 +977,7 @@ T runWithRetries() throws Exception { Thread.sleep(zkRetryInterval); continue; } - LOG.error("Error while doing ZK operation.", ke); + LOG.debug("Error while doing ZK operation.", ke); throw ke; } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java index af33ee1..c317344 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java @@ -22,8 +22,6 @@ import java.util.HashMap; import java.util.List; -import org.junit.Assert; - import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; @@ -45,6 +43,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.junit.Assert; import org.junit.Test; /** @@ -180,7 +179,6 @@ public void testAMRestartWithExistingContainers() throws Exception { // complete container by sending the container complete event which has earlier // attempt's attemptId nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE); - rm1.waitForState(nm1, containerId3, RMContainerState.COMPLETED); // Even though the completed container containerId3 event was sent to the // earlier failed attempt, new RMAppAttempt can also capture this container @@ -189,7 +187,7 @@ public void testAMRestartWithExistingContainers() throws Exception { RMAppAttempt newAttempt = app1.getRMAppAttempt(am2.getApplicationAttemptId()); // 4 containers finished, acquired/allocated/reserved/completed. - Assert.assertEquals(4, newAttempt.getJustFinishedContainers().size()); + waitForContainersToFinish(4, newAttempt); boolean container3Exists = false, container4Exists = false, container5Exists = false, container6Exists = false; for(ContainerStatus status : newAttempt.getJustFinishedContainers()) { @@ -230,11 +228,22 @@ public void testAMRestartWithExistingContainers() throws Exception { Assert.assertFalse(schedulerNewAttempt.getLiveContainers().contains( containerId2)); // all 4 normal containers finished. - Assert.assertEquals(5, newAttempt.getJustFinishedContainers().size()); - + System.out.println("New attempt's just finished containers: " + + newAttempt.getJustFinishedContainers()); + waitForContainersToFinish(5, newAttempt); rm1.stop(); } + private void waitForContainersToFinish(int expectedNum, RMAppAttempt attempt) + throws InterruptedException { + int count = 0; + while (attempt.getJustFinishedContainers().size() != expectedNum + && count < 500) { + Thread.sleep(100); + count++; + } + } + @Test public void testNMTokensRebindOnAMRestart() throws Exception { YarnConfiguration conf = new YarnConfiguration();