diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 8c8ad16..661958b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1032,6 +1032,12 @@ public static final String AHS_WEBAPP_SPNEGO_KEYTAB_FILE_KEY = AHS_PREFIX + "webapp.spnego-keytab-file"; + /** Directory for fail flag files */ + public static final String YARN_AHS_FAIL_FLAG_DIR = + AHS_PREFIX + "fail.flag.dir"; + public static final String DEFAULT_YARN_AHS_FAIL_FLAG_DIR = + "/tmp/hadoop-yarn/fail"; + //////////////////////////////// // ATS Configs //////////////////////////////// diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 196e89d..c8be0bd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; +import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; @@ -28,10 +29,11 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ExitUtil; @@ -821,6 +823,7 @@ public void transition(RMAppImpl app, RMAppEvent event) { if (event instanceof RMAppFailedAttemptEvent) { msg = app.getAppAttemptFailedDiagnostics(event); } + app.writeKilledHistoryFlag(); LOG.info(msg); app.diagnostics.append(msg); // Inform the node for app-finish @@ -982,6 +985,7 @@ public void transition(RMAppImpl app, RMAppEvent event) { private static class AppKilledTransition extends FinalTransition { @Override public void transition(RMAppImpl app, RMAppEvent event) { + app.writeKilledHistoryFlag(); app.diagnostics.append(getAppKilledDiagnostics()); super.transition(app, event); }; @@ -1002,7 +1006,9 @@ public void transition(RMAppImpl app, RMAppEvent event) { private static final class AppRejectedTransition extends FinalTransition{ + @Override public void transition(RMAppImpl app, RMAppEvent event) { + app.writeKilledHistoryFlag(); RMAppRejectedEvent rejectedEvent = (RMAppRejectedEvent)event; app.diagnostics.append(rejectedEvent.getMessage()); super.transition(app, event); @@ -1080,6 +1086,25 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) { } } + private void writeKilledHistoryFlag() { + if (getCurrentAppAttempt() != null) { + Path failDir = new Path(conf.get(YarnConfiguration.YARN_AHS_FAIL_FLAG_DIR, + YarnConfiguration.DEFAULT_YARN_AHS_FAIL_FLAG_DIR)); + try { + FileSystem fs = FileSystem.get(failDir.toUri(), conf); + if (!fs.exists(failDir)) { + fs.mkdirs(failDir); + } + Path flagFile = new Path(failDir, user + "_" + + getCurrentAppAttempt().getAppAttemptId().toString()); + fs.createNewFile(flagFile); + } catch (IOException ioe) { + LOG.warn("Unable to write fail flag file for application " + + getCurrentAppAttempt().getAppAttemptId(), ioe); + } + } + } + @Override public String getApplicationType() { return this.applicationType;