diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index e30434f..f816a21 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1050,6 +1050,12 @@ public static final String AHS_WEBAPP_SPNEGO_KEYTAB_FILE_KEY = AHS_PREFIX + "webapp.spnego-keytab-file"; + /** Directory for fail flag files */ + public static final String YARN_AHS_FAIL_FLAG_DIR = + AHS_PREFIX + "fail.flag.dir"; + public static final String DEFAULT_YARN_AHS_FAIL_FLAG_DIR = + "/tmp/hadoop-yarn/fail"; + //////////////////////////////// // ATS Configs //////////////////////////////// diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index d84e3d3..f66e1e8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; +import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; @@ -28,10 +29,12 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ExitUtil; @@ -85,6 +88,11 @@ private static final Log LOG = LogFactory.getLog(RMAppImpl.class); private static final String UNAVAILABLE = "N/A"; + final public static FsPermission FAIL_FLAG_DIR_PERMISSION = + FsPermission.createImmutable((short) 0777); // rwxrwxrwx + final public static FsPermission FAIL_FLAG_PERMISSION = + FsPermission.createImmutable((short) 0664); // rw-rw-r-- + // Immutable fields private final ApplicationId applicationId; private final RMContext rmContext; @@ -817,6 +825,7 @@ public void transition(RMAppImpl app, RMAppEvent event) { if (event instanceof RMAppFailedAttemptEvent) { msg = app.getAppAttemptFailedDiagnostics(event); } + app.writeKilledHistoryFlag(); LOG.info(msg); app.diagnostics.append(msg); // Inform the node for app-finish @@ -986,6 +995,7 @@ public AppKilledTransition() { @Override public void transition(RMAppImpl app, RMAppEvent event) { + app.writeKilledHistoryFlag(); app.diagnostics.append(getAppKilledDiagnostics()); super.transition(app, event); }; @@ -1011,6 +1021,7 @@ public AppRejectedTransition() { } public void transition(RMAppImpl app, RMAppEvent event) { + app.writeKilledHistoryFlag(); RMAppRejectedEvent rejectedEvent = (RMAppRejectedEvent)event; app.diagnostics.append(rejectedEvent.getMessage()); super.transition(app, event); @@ -1092,6 +1103,27 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) { } } + private void writeKilledHistoryFlag() { + if (getCurrentAppAttempt() != null) { + Path failDir = new Path(conf.get(YarnConfiguration.YARN_AHS_FAIL_FLAG_DIR, + YarnConfiguration.DEFAULT_YARN_AHS_FAIL_FLAG_DIR)); + try { + FileSystem fs = FileSystem.get(failDir.toUri(), conf); + if (!fs.exists(failDir)) { + fs.mkdirs(failDir); + fs.setPermission(failDir, FAIL_FLAG_DIR_PERMISSION); + } + Path flagFile = new Path(failDir, user + "_" + + getCurrentAppAttempt().getAppAttemptId().toString()); + fs.createNewFile(flagFile); + fs.setPermission(flagFile, FAIL_FLAG_PERMISSION); + } catch (IOException ioe) { + LOG.warn("Unable to write fail flag file for application " + + getCurrentAppAttempt().getAppAttemptId(), ioe); + } + } + } + @Override public String getApplicationType() { return this.applicationType;