diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 74b07c2..fb5117b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -281,9 +281,15 @@ public YarnClientApplication createApplication() throws @Override public void killApplication(ApplicationId applicationId) throws YarnException, IOException { - client.killApplication(applicationId); + killApplication(applicationId, null); } + public void killApplication(ApplicationId applicationId, String reason) + throws YarnException, IOException { + client.killApplication(applicationId, reason); + } + + @Override public ApplicationReport getApplicationReport(ApplicationId appId) throws YarnException, IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index 1ca1fd4..a96e5e3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -53,6 +53,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.ResourceMgrDelegate; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobCounter; @@ -65,6 +66,7 @@ import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TaskReport; import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; @@ -75,6 +77,8 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.JarFinder; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.client.cli.ApplicationCLI; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.junit.AfterClass; import org.junit.Assert; @@ -190,6 +194,66 @@ public void testSleepJob() throws IOException, InterruptedException, // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value } + @Test (timeout = 300000) + public void testSleepAppKillWithReason() throws Exception { + LOG.info("\n\n\nStarting testSleepAppKillWithReason()."); + + if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { + LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + + " not found. Not running test."); + return; + } + final ApplicationCLI appCli = new ApplicationCLI(); + final ResourceMgrDelegate client = + new ResourceMgrDelegate(new YarnConfiguration(mrCluster.getConfig())); + appCli.setClient(client); + appCli.setSysOutPrintStream(System.out); + + final Configuration sleepConf = new Configuration(mrCluster.getConfig()); + final SleepJob sleepJob = new SleepJob(); + sleepJob.setConf(sleepConf); + final Job job = sleepJob.createJob(1, 0, 10000, 10, 0, 0); + job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. + job.setJarByClass(SleepJob.class); + job.setMaxMapAttempts(4); + job.submit(); + + // wait until running + // + TaskReport[] reports = new TaskReport[0]; + while ( reports.length == 0 + || reports[0].getRunningTaskAttemptIds().isEmpty()) { + Thread.sleep(1000); + reports = job.getTaskReports(TaskType.MAP); + } + + // kill application + // + final String killAppReason = "Kill reason for app"; + final String reasonTail = "Don't include this"; + final StringBuilder tooLongOfAReason = + new StringBuilder(YarnClient.MAX_KILL_REASON_LENGTH + 10); + tooLongOfAReason.append(killAppReason); + while (tooLongOfAReason.length() < YarnClient.MAX_KILL_REASON_LENGTH) { + tooLongOfAReason.append('!'); + } + tooLongOfAReason.append(reasonTail); + + int exitCode = appCli.run( + new String[] { + "-kill", + TypeConverter.toYarn(job.getJobID()).getAppId().toString(), + tooLongOfAReason.toString()}); + + Assert.assertSame(0, exitCode); + Assert.assertFalse("Job should fail", job.waitForCompletion(false)); + final String jobDiagnostics = job.getStatus().getFailureInfo(); + Assert.assertTrue("No job kill reason in: " + jobDiagnostics, + jobDiagnostics.contains("\"" + killAppReason + "!!!!")); + Assert.assertFalse("Extra characters in: " + jobDiagnostics, + jobDiagnostics.contains(reasonTail)); + } + protected void verifySleepJobCounters(Job job) throws InterruptedException, IOException { Counters counters = job.getCounters(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java index 606cf4e..391b5b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java @@ -57,4 +57,10 @@ public static KillApplicationRequest newInstance(ApplicationId applicationId) { @Public @Stable public abstract void setApplicationId(ApplicationId applicationId); + + @Public + public abstract String getReason(); + + @Public + public abstract void setReason(String reason); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index a4631d1..059a129 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -113,6 +113,7 @@ message SubmitApplicationResponseProto { message KillApplicationRequestProto { optional ApplicationIdProto application_id = 1; + optional string reason = 2; } message KillApplicationResponseProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java index 155ba5d..80c5d2a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java @@ -46,6 +46,7 @@ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class YarnClient extends AbstractService { + public static final int MAX_KILL_REASON_LENGTH = 128; /** * Create a new instance of YarnClient. @@ -101,12 +102,16 @@ public abstract ApplicationId submitApplication(ApplicationSubmissionContext app * * @param applicationId * {@link ApplicationId} of the application that needs to be killed + * @param reason justification for application kill * @throws YarnException * in case of errors or if YARN rejects the request due to * access-control restrictions. * @throws IOException * @see #getQueueAclsInfo() */ + public abstract void killApplication(ApplicationId applicationId, + String reason) throws YarnException, IOException; + public abstract void killApplication(ApplicationId applicationId) throws YarnException, IOException; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index a5ff9f6..26e6c11 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -179,9 +179,20 @@ public YarnClientApplication createApplication() @Override public void killApplication(ApplicationId applicationId) throws YarnException, IOException { + killApplication(applicationId, null); + } + + public void killApplication(ApplicationId applicationId, String reason) + throws YarnException, IOException { KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); request.setApplicationId(applicationId); + if (reason != null) { + if (reason.length() > YarnClient.MAX_KILL_REASON_LENGTH) { + reason = reason.substring(0, YarnClient.MAX_KILL_REASON_LENGTH); + } + } + request.setReason(reason); try { int pollCount = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index a7b7d65..a391c61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -74,7 +74,10 @@ public int run(String[] args) throws Exception { "Supports optional use of -appTypes to filter applications " + "based on application type, " + "and -appStates to filter applications based on application state"); - opts.addOption(KILL_CMD, true, "Kills the application."); + Option killOption = new Option(KILL_CMD, true, "Kills the application."); + killOption.setArgs(2); + killOption.setArgName("Application ID [reason]"); + opts.addOption(killOption); opts.addOption(HELP_CMD, false, "Displays help for all commands."); Option appTypeOpt = new Option(APP_TYPE_CMD, true, "Works with -list to " + "filter applications based on " + @@ -90,7 +93,6 @@ public int run(String[] args) throws Exception { appStateOpt.setArgs(Option.UNLIMITED_VALUES); appStateOpt.setArgName("States"); opts.addOption(appStateOpt); - opts.getOption(KILL_CMD).setArgName("Application ID"); opts.getOption(STATUS_CMD).setArgName("Application ID"); int exitCode = -1; @@ -149,11 +151,12 @@ public int run(String[] args) throws Exception { } listApplications(appTypes, appStates); } else if (cliParser.hasOption(KILL_CMD)) { - if (args.length != 2) { + if (args.length < 2 || args.length > 3) { printUsage(opts); return exitCode; } - killApplication(cliParser.getOptionValue(KILL_CMD)); + final String[] killArgs = cliParser.getOptionValues(KILL_CMD); + killApplication(killArgs[0], killArgs.length == 2 ? killArgs[1] : null); } else if (cliParser.hasOption(HELP_CMD)) { printUsage(opts); return 0; @@ -224,10 +227,11 @@ private void listApplications(Set appTypes, * Kills the application with the application id as appId * * @param applicationId + * @param reason * @throws YarnException * @throws IOException */ - private void killApplication(String applicationId) + private void killApplication(String applicationId, String reason) throws YarnException, IOException { ApplicationId appId = ConverterUtils.toApplicationId(applicationId); ApplicationReport appReport = client.getApplicationReport(appId); @@ -237,7 +241,7 @@ private void killApplication(String applicationId) sysout.println("Application " + applicationId + " has already finished "); } else { sysout.println("Killing application " + applicationId); - client.killApplication(appId); + client.killApplication(appId, reason); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java index db97367..99ca603 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java @@ -38,8 +38,8 @@ boolean viaProto = false; private ApplicationId applicationId = null; - - + private String reason; + public KillApplicationRequestPBImpl() { builder = KillApplicationRequestProto.newBuilder(); } @@ -80,6 +80,9 @@ private void mergeLocalToBuilder() { if (this.applicationId != null) { builder.setApplicationId(convertToProtoFormat(this.applicationId)); } + if (reason != null) { + builder.setReason(reason); + } } private void mergeLocalToProto() { @@ -119,6 +122,27 @@ public void setApplicationId(ApplicationId applicationId) { this.applicationId = applicationId; } + @Override + public String getReason() { + if (reason != null) { + return reason; + } + final KillApplicationRequestProtoOrBuilder p = viaProto ? proto : builder; + if (p.hasReason()) { + reason = p.getReason(); + } + return reason; + } + + @Override + public void setReason(String reason) { + maybeInitBuilder(); + if (reason == null) { + builder.clearReason(); + } + this.reason = reason; + } + private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) { return new ApplicationIdPBImpl(p); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index cd2226f..959aad6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -90,8 +90,7 @@ import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppKillEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; @@ -347,6 +346,7 @@ public KillApplicationResponse forceKillApplication( KillApplicationRequest request) throws YarnException { ApplicationId applicationId = request.getApplicationId(); + final String reason = request.getReason(); UserGroupInformation callerUGI; try { @@ -386,7 +386,7 @@ public KillApplicationResponse forceKillApplication( return KillApplicationResponse.newInstance(true); } else { this.rmContext.getDispatcher().getEventHandler() - .handle(new RMAppEvent(applicationId, RMAppEventType.KILL)); + .handle(new RMAppKillEvent(applicationId, reason)); return KillApplicationResponse.newInstance(false); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 0bf7c81..afc0d0e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -929,6 +929,11 @@ private static String getAppKilledDiagnostics() { @SuppressWarnings("unchecked") @Override public void transition(RMAppImpl app, RMAppEvent event) { + final RMAppKillEvent killEvent = (RMAppKillEvent)event; + final String reason = killEvent.getReason(); + if (reason != null) { + app.diagnostics.append("user diagnostics: \"" + reason + "\";"); + } app.stateBeforeKilling = app.getState(); app.handler.handle(new RMAppAttemptEvent(app.currentAttempt .getAppAttemptId(), RMAppAttemptEventType.KILL)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppKillEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppKillEvent.java new file mode 100644 index 0000000..7fb6db3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppKillEvent.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.rmapp; + +import org.apache.hadoop.yarn.api.records.ApplicationId; + +public class RMAppKillEvent extends RMAppEvent { + private final String reason; + public RMAppKillEvent(ApplicationId applicationId, String reason) { + super(applicationId, RMAppEventType.KILL); + this.reason = reason; + } + + public String getReason() { + return reason; + } +}