diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsService.java
new file mode 100644
index 00000000000..c1a597f9fbc
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsService.java
@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.IssueType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+/**
+ * Utility methods to launch the diagnostic scripts.
+ */
+@InterfaceAudience.Private
+public final class DiagnosticsService {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(DiagnosticsService.class);
+ private static final String PYTHON_COMMAND = "python";
+ private static final String COLON = ":";
+ private static final String COMMA = ",";
+ private static final String OUT_DIR_PREFIX = "out_dir:";
+ private static final String EXECUTION_ERROR_MESSAGE = "Error occurred " +
+ "during the execution of the diagnostic script with the command '{}'.";
+ private static final String INCORRECT_NUMBER_OF_PARAMETERS_MESSAGE =
+ "Error while parsing diagnostic option, incorrect number of " +
+ "parameters. Expected 1 or 2, but got {}. Skipping this option.";
+
+ private static String scriptLocation = "/tmp/diagnostics_collector.py";
+
+ private DiagnosticsService() {
+ // hidden constructor
+ }
+
+ public static CommonIssues listCommonIssues() throws Exception {
+ if (Shell.WINDOWS) {
+ throw new UnsupportedOperationException("Not implemented for Windows.");
+ }
+ CommonIssues issueTypes = new CommonIssues();
+ ProcessBuilder pb = createProcessBuilder(CommandArgument.LIST_ISSUES);
+
+ List result = executeCommand(pb);
+ for (String line : result) {
+ issueTypes.add(parseIssueType(line));
+ }
+
+ return issueTypes;
+ }
+
+ public static String collectIssueData(String issueId, List args)
+ throws Exception {
+ if (Shell.WINDOWS) {
+ throw new UnsupportedOperationException("Not implemented for Windows.");
+ }
+ ProcessBuilder pb = createProcessBuilder(CommandArgument.COMMAND, issueId,
+ args);
+
+ List result = executeCommand(pb);
+ Optional outputDirectory = result.stream()
+ .filter(e -> e.contains(OUT_DIR_PREFIX))
+ .findFirst();
+
+ if (!outputDirectory.isPresent()) {
+ LOG.error(EXECUTION_ERROR_MESSAGE, pb.command());
+ throw new IOException("Output directory in result not found.");
+ }
+
+ String[] splittedOutputDirectory = outputDirectory.get().split(COLON);
+
+ if (splittedOutputDirectory.length != 2) {
+ LOG.error(EXECUTION_ERROR_MESSAGE, pb.command());
+ throw new IOException("Output directory is invalid.");
+ }
+ return splittedOutputDirectory[1];
+ }
+
+ @VisibleForTesting
+ protected static ProcessBuilder createProcessBuilder(CommandArgument argument) {
+ return createProcessBuilder(argument, null, null);
+ }
+
+ @VisibleForTesting
+ protected static ProcessBuilder createProcessBuilder(
+ CommandArgument argument, String issueId, List additionalArgs) {
+ List commandList =
+ new ArrayList<>(Arrays.asList(PYTHON_COMMAND, scriptLocation,
+ argument.getShortOption()));
+
+ if (argument.equals(CommandArgument.COMMAND)) {
+ commandList.add(issueId);
+ if (additionalArgs != null) {
+ commandList.add(CommandArgument.ARGUMENTS.getShortOption());
+ commandList.addAll(additionalArgs);
+ }
+ }
+
+ return new ProcessBuilder(commandList);
+ }
+
+ private static List executeCommand(ProcessBuilder pb)
+ throws Exception {
+ Process process = pb.start();
+ int exitCode;
+ List result = new ArrayList<>();
+
+ try (BufferedReader reader = new BufferedReader(
+ new InputStreamReader(process.getInputStream(),
+ StandardCharsets.UTF_8))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ result.add(line);
+ }
+ process.waitFor();
+ } catch (Exception e) {
+ LOG.error(EXECUTION_ERROR_MESSAGE, pb.command());
+ throw e;
+ }
+ exitCode = process.exitValue();
+ if (exitCode != 0) {
+ throw new IOException("The collector script exited with non-zero " +
+ "exit code: " + exitCode);
+ }
+
+ return result;
+ }
+
+ @VisibleForTesting
+ protected static IssueType parseIssueType(String line) {
+ String[] issueParams = line.split(COLON);
+ IssueType parsedIssueType;
+
+ if (issueParams.length < 1 || issueParams.length > 2) {
+ LOG.warn(INCORRECT_NUMBER_OF_PARAMETERS_MESSAGE,
+ issueParams.length);
+ return null;
+ } else {
+ String name = issueParams[0];
+ parsedIssueType = new IssueType(name);
+ if (issueParams.length == 2) {
+ List parameterList =
+ Arrays.asList(issueParams[1].split(COMMA));
+ parsedIssueType.setParameters(parameterList);
+ }
+ }
+
+ return parsedIssueType;
+ }
+
+ @VisibleForTesting
+ protected static void setScriptLocation(String scriptLocationParam) {
+ scriptLocation = scriptLocationParam;
+ }
+
+ enum CommandArgument{
+ LIST_ISSUES("-l"),
+ COMMAND("-c"),
+ ARGUMENTS("-a");
+
+ private final String shortOption;
+
+ CommandArgument(String shortOption) {
+ this.shortOption = shortOption;
+ }
+
+ public CommandArgument fromString(String option) {
+ for (CommandArgument arg : CommandArgument.values()) {
+ if (arg.shortOption.equals(option)) {
+ return arg;
+ }
+ }
+ return null;
+ }
+
+ public String getShortOption() {
+ return shortOption;
+ }
+
+
+
+ }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java
index 82ceed37c2d..533d521f759 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java
@@ -42,6 +42,13 @@
/** Path for {@code RMWebServiceProtocol#getClusterMetricsInfo}. */
public static final String METRICS = "/metrics";
+ /** Path for {@code RMWebServices#getCommonIssueList}. */
+ public static final String COMMON_ISSUE_LIST = "/common-issues/list";
+
+ /** Path for {@code RMWebServices#getCommonIssueData}. */
+ public static final String COMMON_ISSUE_COLLECT =
+ "/common-issues/collect";
+
/** Path for {@code RMWebServiceProtocol#getSchedulerInfo}. */
public static final String SCHEDULER = "/scheduler";
@@ -213,6 +220,8 @@
// ----------------QueryParams for RMWebServiceProtocol----------------
+ public static final String ISSUEID = "issueId";
+ public static final String ISSUEARGS = "args";
public static final String TIME = "time";
public static final String STATES = "states";
public static final String NODEID = "nodeId";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java
index f2736e3773c..652baa429f7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import java.io.IOException;
+import java.util.List;
import java.util.Set;
import javax.servlet.http.HttpServletRequest;
@@ -47,6 +48,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -110,6 +112,25 @@
*/
ClusterMetricsInfo getClusterMetricsInfo();
+ /**
+ * This method retrieves the common diagnosable issue list, and it is
+ * reachable by using {@link RMWSConsts#COMMON_ISSUE_LIST}.
+ *
+ * @return the list of available diagnostic cases
+ */
+ CommonIssues getCommonIssueList();
+
+ /**
+ * This method retrieves the diagnostic information for the selected issue,
+ * and it is reachable by using {@link RMWSConsts#COMMON_ISSUE_COLLECT}.
+ *
+ * @param issueId the selected issue's ID. It is a QueryParam.
+ * @param args the necessary arguments for diagnosing the issue.
+ * It is a QueryParam.
+ * @return the associated diagnostic information to the selected issue
+ */
+ Response getCommonIssueData(String issueId, List args);
+
/**
* This method retrieves the current scheduler status, and it is reachable by
* using {@link RMWSConsts#SCHEDULER}.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java
index 7c4e5df5bc7..a5d4230a021 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java
@@ -132,6 +132,7 @@
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest;
import org.apache.hadoop.yarn.server.resourcemanager.AdminService;
+import org.apache.hadoop.yarn.server.resourcemanager.DiagnosticsService;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
@@ -171,6 +172,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FifoSchedulerInfo;
@@ -212,6 +214,7 @@
import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.ForbiddenException;
import org.apache.hadoop.yarn.webapp.NotFoundException;
+import org.apache.hadoop.yarn.webapp.WebAppException;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
@@ -385,6 +388,42 @@ public ClusterMetricsInfo getClusterMetricsInfo() {
return new ClusterMetricsInfo(this.rm);
}
+ @GET
+ @Path(RMWSConsts.COMMON_ISSUE_LIST)
+ @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
+ MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
+ @Override
+ public CommonIssues getCommonIssueList() {
+ initForReadableEndpoints();
+ try {
+ return DiagnosticsService.listCommonIssues();
+ } catch (Exception e) {
+ throw new WebAppException("Error collecting the common " +
+ "issue types. Error message: " + e.getMessage() + ". " +
+ "For more information please check the ResourceManager logs.");
+ }
+ }
+
+ @GET
+ @Path(RMWSConsts.COMMON_ISSUE_COLLECT)
+ @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
+ MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
+ @Override
+ public Response getCommonIssueData(
+ @QueryParam(RMWSConsts.ISSUEID) String issueId,
+ @QueryParam(RMWSConsts.ISSUEARGS) List args) {
+ initForReadableEndpoints();
+ try {
+ return Response.status(Status.OK)
+ .entity(DiagnosticsService.collectIssueData(issueId, args))
+ .build();
+ } catch (Exception e) {
+ throw new WebAppException("Error collecting the selected " +
+ "issue data. Error message: " + e.getMessage() + ". " +
+ "For more information please check the ResourceManager logs.");
+ }
+ }
+
@GET
@Path(RMWSConsts.SCHEDULER)
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CommonIssues.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CommonIssues.java
new file mode 100644
index 00000000000..5ccf36eba7b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CommonIssues.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.ArrayList;
+import java.util.List;
+
+@XmlRootElement(name = "commonIssues")
+@XmlAccessorType(XmlAccessType.FIELD)
+public class CommonIssues {
+ private List issue = new ArrayList<>();
+
+ public CommonIssues() {}
+
+ public void add(IssueType type) {
+ if (type != null &&
+ issue.stream().noneMatch(e-> e.getName().equals(type.getName()))) {
+ issue.add(type);
+ }
+ }
+
+ public List getIssueList() {
+ return issue;
+ }
+
+ public void addAll(List issueTypes) {
+ issue.addAll(issueTypes);
+ }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueType.java
new file mode 100644
index 00000000000..27f96a0ae2b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/IssueType.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+@XmlRootElement(name = "issue")
+@XmlAccessorType(XmlAccessType.FIELD)
+public class IssueType {
+ private String name;
+ private List parameters = new ArrayList<>();
+
+ public IssueType() {}
+
+ public IssueType(String name) {
+ this.name = name;
+ this.parameters = Collections.emptyList();
+ }
+
+ public IssueType(String name, List parameters) {
+ this.name = name;
+ this.parameters = new ArrayList<>(parameters);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public List getParameters() {
+ return parameters;
+ }
+
+ public void setParameters(List parameters) {
+ this.parameters = parameters;
+ }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsServiceTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsServiceTest.java
new file mode 100644
index 00000000000..b65aac8db61
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/DiagnosticsServiceTest.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager;
+
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.IssueType;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.Assert.fail;
+
+
+public class DiagnosticsServiceTest {
+ private static final String ISSUE_NAME_APP_FAILED = "application_failed";
+ private static final String ISSUE_NAME_APP_FAILED_NECESSARY_ARGS =
+ "application_failed_necessary_args";
+ private static final String ISSUE_NAME_APP_HANGING = "application_hanging";
+ private static final String ISSUE_NAME_SCHED_ISSUE =
+ "scheduler_related_issue";
+ private static final String ISSUE_NAME_RM_NM_ISSUE = "rm_nm_start_failure";
+ private static final String ISSUE_ARG_APP_ID = "appId";
+ private static final String ISSUE_ARG_NODE_ID = "nodeId";
+ private static final String COLON = ":";
+ private static final String COMMA = ",";
+ private static final String OUTPUT_DIR = "/tmp";
+
+ @Before
+ public void setUp() {
+ DiagnosticsService.setScriptLocation("src/test/resources/diagnostics" +
+ "/diagnostics_collector_test.py");
+ handleWindowsRuntime();
+ }
+
+ @Test
+ public void testListCommonIssuesValidCaseWithOptionsToBeSkipped()
+ throws Exception {
+ // The test script contains two invalid options: one with an ambiguous name
+ // and one with too many parameters. These should be skipped silently.
+ CommonIssues commonIssues = DiagnosticsService.listCommonIssues();
+
+ Assert.assertEquals(4, commonIssues.getIssueList().size());
+ assertIssueEquality(ISSUE_NAME_APP_FAILED,
+ Collections.singletonList(ISSUE_ARG_APP_ID),
+ commonIssues.getIssueList().get(0));
+
+ assertIssueEquality(ISSUE_NAME_APP_HANGING,
+ Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_NODE_ID),
+ commonIssues.getIssueList().get(1));
+
+ assertIssueEquality(ISSUE_NAME_SCHED_ISSUE,
+ Collections.emptyList(),
+ commonIssues.getIssueList().get(2));
+
+ assertIssueEquality(ISSUE_NAME_RM_NM_ISSUE,
+ Collections.singletonList(ISSUE_ARG_NODE_ID),
+ commonIssues.getIssueList().get(3));
+ }
+
+ @Test(expected = IOException.class)
+ public void testListCommonIssuesScriptMissing() throws Exception {
+ DiagnosticsService.setScriptLocation("/src/invalidLocation/script.py");
+ DiagnosticsService.listCommonIssues();
+ }
+
+ @Test
+ public void testCollectIssueDataValidOutput() throws Exception {
+ // valid case: the script prints out one directory
+ Assert.assertEquals(OUTPUT_DIR, DiagnosticsService.collectIssueData(
+ ISSUE_NAME_APP_FAILED, null));
+ }
+
+ @Test
+ public void testCollectIssueDataValidOutputWhenArgsArePresent()
+ throws Exception {
+ // valid case: appId and nodeId are necessary params and they are present
+ Assert.assertEquals(OUTPUT_DIR, DiagnosticsService.collectIssueData(
+ ISSUE_NAME_APP_FAILED_NECESSARY_ARGS,
+ Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_NODE_ID)));
+ }
+
+ @Test(expected = IOException.class)
+ public void testCollectIssueDataInvalidOutputWhenWrongArgsArePresent()
+ throws Exception {
+ // valid case: appId and nodeId are necessary params but two appIds are
+ // given
+ Assert.assertEquals(OUTPUT_DIR, DiagnosticsService.collectIssueData(
+ ISSUE_NAME_APP_FAILED_NECESSARY_ARGS,
+ Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_APP_ID)));
+ }
+
+ @Test(expected = IOException.class)
+ public void testCollectIssueDataInvalidOutputEmptyDir() throws Exception {
+ // invalid case: the script prints out an empty string as directory
+ // with the correct prefix
+ DiagnosticsService.collectIssueData(ISSUE_NAME_APP_HANGING, null);
+ }
+
+ @Test(expected = IOException.class)
+ public void testCollectIssueDataInvalidOutputMissingOutputDir()
+ throws Exception {
+ // invalid case: the script doesn't print out the correct output directory
+ DiagnosticsService.collectIssueData(ISSUE_NAME_SCHED_ISSUE, null);
+ }
+
+ @Test(expected = IOException.class)
+ public void testCollectIssueDataInvalidOutputMissingPrints()
+ throws Exception {
+ // invalid case: the script doesn't print out anything
+ DiagnosticsService.collectIssueData(ISSUE_NAME_RM_NM_ISSUE, null);
+ }
+
+ @Test(expected = IOException.class)
+ public void testCollectIssueDataScriptMissing() throws Exception {
+ DiagnosticsService.setScriptLocation("/src/invalidLocation/script.py");
+ DiagnosticsService.collectIssueData(ISSUE_NAME_APP_FAILED, null);
+ }
+
+ @Test
+ public void testParseIssueTypeValidCases() {
+ // valid case: name, no parameters
+ String line = ISSUE_NAME_APP_FAILED;
+
+ assertIssueEquality(ISSUE_NAME_APP_FAILED, Collections.emptyList(),
+ DiagnosticsService.parseIssueType(line));
+
+ // valid case: name, one parameter
+ line = ISSUE_NAME_APP_FAILED + COLON + ISSUE_ARG_APP_ID;
+
+ assertIssueEquality(ISSUE_NAME_APP_FAILED,
+ Collections.singletonList(ISSUE_ARG_APP_ID),
+ DiagnosticsService.parseIssueType(line));
+
+ // valid case: name, two parameters
+ line = ISSUE_NAME_APP_FAILED + COLON + ISSUE_ARG_APP_ID +
+ COMMA + ISSUE_ARG_NODE_ID;
+
+ assertIssueEquality(ISSUE_NAME_APP_FAILED,
+ Arrays.asList(ISSUE_ARG_APP_ID, ISSUE_ARG_NODE_ID),
+ DiagnosticsService.parseIssueType(line));
+ }
+
+ @Test
+ public void testParseIssueTypeInvalidCases() {
+ // invalid case: too many values
+ String line = ISSUE_NAME_APP_FAILED + COLON + ISSUE_NAME_APP_FAILED +
+ COLON + ISSUE_NAME_APP_FAILED;
+
+ IssueType issueType = DiagnosticsService.parseIssueType(line);
+ Assert.assertNull(issueType);
+ }
+
+ private void assertIssueEquality(String expectedIssueName,
+ List expectedParams,
+ IssueType actualIssue) {
+ Assert.assertEquals(expectedIssueName,
+ actualIssue.getName());
+ Assert.assertEquals(expectedParams.size(),
+ actualIssue.getParameters().size());
+ Assert.assertTrue(CollectionUtils.isEqualCollection(
+ expectedParams, actualIssue.getParameters()));
+ }
+
+ private void handleWindowsRuntime() {
+ if (Shell.WINDOWS) {
+ try {
+ DiagnosticsService.listCommonIssues();
+ fail("On Windows listCommonIssues should throw " +
+ "UnsupportedOperationException");
+ } catch (Exception e) {
+ // Exception is expected
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/diagnostics/diagnostics_collector_test.py b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/diagnostics/diagnostics_collector_test.py
new file mode 100644
index 00000000000..0a81261cd8a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/diagnostics/diagnostics_collector_test.py
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+
+import argparse
+import sys, os
+
+
+def application_failed():
+ # Prints out out dir correctly
+ print("application_failed")
+ print("out_dir:/tmp")
+
+
+def application_failed_necessary_args():
+ # Prints out out dir correctly only if "arg1" and "arg2" are present
+ if args.arguments is None or len(args.arguments) is not 2:
+ sys.exit(os.EX_USAGE)
+ elif args.arguments[0] == "appId" and args.arguments[1] == "nodeId":
+ print("out_dir:/tmp")
+
+
+def application_hanging():
+ # Prints out empty out dir
+ print("application_hanging")
+ print("out_dir:")
+
+
+def scheduler_related_issue():
+ # Doesn't print out out dir
+ print("scheduler_related_issue")
+
+
+def rm_nm_start_failure():
+ sys.exit(os.EX_OK)
+
+
+def list_issues():
+ print("application_failed:appId", "application_hanging:appId,nodeId", "scheduler_related_issue",
+ "rm_nm_start_failure:nodeId", "rm_nm_start_failure:nodeId", "rm_nm_start_failure_1:nodeId:nodeId", sep="\n")
+
+
+ISSUE_MAP = {
+ "application_failed": application_failed,
+ "application_failed_necessary_args": application_failed_necessary_args,
+ "application_hanging": application_hanging,
+ "scheduler_related_issue": scheduler_related_issue,
+ "rm_nm_start_failure": rm_nm_start_failure
+}
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-l", "--list", help="List the available issue types.", action="store_true")
+parser.add_argument("-c", "--command", choices=list(ISSUE_MAP), help="Initiate the diagnostic information collecton"
+ "for diagnosing the selected issue type.")
+parser.add_argument("-a", "--arguments", nargs='*', help="The required arguments for the selected issue type.")
+args = parser.parse_args()
+
+if not (args.list or args.command):
+ parser.error('No action requested, use --list or --command')
+
+if args.list:
+ list_issues()
+ sys.exit(os.EX_OK)
+
+func = ISSUE_MAP[args.command]
+func()
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java
index 00a8beb6684..9fcb41ba0ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -47,6 +48,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -128,6 +130,22 @@ public ClusterMetricsInfo getClusterMetricsInfo() {
getConf());
}
+ @Override
+ public CommonIssues getCommonIssueList() {
+ return RouterWebServiceUtil.genericForward(webAppAddress, null,
+ CommonIssues.class, HTTPMethods.GET,
+ RMWSConsts.RM_WEB_SERVICE_PATH + RMWSConsts.COMMON_ISSUE_LIST,
+ null, null, getConf());
+ }
+
+ @Override
+ public Response getCommonIssueData(String issueId, List args) {
+ return RouterWebServiceUtil.genericForward(webAppAddress, null,
+ Response.class, HTTPMethods.GET,
+ RMWSConsts.RM_WEB_SERVICE_PATH + RMWSConsts.COMMON_ISSUE_COLLECT,
+ null, null, getConf());
+ }
+
@Override
public SchedulerTypeInfo getSchedulerInfo() {
return RouterWebServiceUtil.genericForward(webAppAddress, null,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
index ab97b1a7fd9..c5e90c48f58 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
@@ -75,6 +75,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -1131,6 +1132,16 @@ public ClusterUserInfo getClusterUserInfo(HttpServletRequest hsr) {
throw new NotImplementedException("Code is not implemented");
}
+ @Override
+ public CommonIssues getCommonIssueList() {
+ throw new NotImplementedException("Code is not implemented");
+ }
+
+ @Override
+ public Response getCommonIssueData(String issueId, List args) {
+ throw new NotImplementedException("Code is not implemented");
+ }
+
@Override
public SchedulerTypeInfo getSchedulerInfo() {
throw new NotImplementedException("Code is not implemented");
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java
index bde46484d6b..fb9f7cf1832 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServices.java
@@ -70,6 +70,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -356,6 +357,30 @@ public ClusterMetricsInfo getClusterMetricsInfo() {
return pipeline.getRootInterceptor().getClusterMetricsInfo();
}
+ @GET
+ @Path(RMWSConsts.COMMON_ISSUE_LIST)
+ @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
+ MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
+ @Override
+ public CommonIssues getCommonIssueList() {
+ init();
+ RequestInterceptorChainWrapper pipeline = getInterceptorChain(null);
+ return pipeline.getRootInterceptor().getCommonIssueList();
+ }
+
+ @GET
+ @Path(RMWSConsts.COMMON_ISSUE_COLLECT)
+ @Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
+ MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
+ @Override
+ public Response getCommonIssueData(
+ @QueryParam(RMWSConsts.ISSUEID) String issueId,
+ @QueryParam(RMWSConsts.ISSUEARGS) List args) {
+ init();
+ RequestInterceptorChainWrapper pipeline = getInterceptorChain(null);
+ return pipeline.getRootInterceptor().getCommonIssueData(issueId, args);
+ }
+
@GET
@Path(RMWSConsts.SCHEDULER)
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java
index 67c9d671fb1..56b790be941 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockRESTRequestInterceptor.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.router.webapp;
import java.io.IOException;
+import java.util.List;
import java.util.Set;
import javax.servlet.http.HttpServletRequest;
@@ -44,6 +45,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -96,6 +98,16 @@ public ClusterMetricsInfo getClusterMetricsInfo() {
return new ClusterMetricsInfo();
}
+ @Override // TODO these may need to be edited for testing purposes
+ public CommonIssues getCommonIssueList() {
+ return new CommonIssues();
+ }
+
+ @Override // TODO these may need to be edited for testing purposes
+ public Response getCommonIssueData(String issueId, List args) {
+ return Response.ok().build();
+ }
+
@Override
public SchedulerTypeInfo getSchedulerInfo() {
return new SchedulerTypeInfo();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java
index 142a6511b93..a77841c64d6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/PassThroughRESTRequestInterceptor.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.router.webapp;
import java.io.IOException;
+import java.util.List;
import java.util.Set;
import javax.servlet.http.HttpServletRequest;
@@ -42,6 +43,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CommonIssues;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -121,6 +123,16 @@ public ClusterMetricsInfo getClusterMetricsInfo() {
return getNextInterceptor().getClusterMetricsInfo();
}
+ @Override
+ public CommonIssues getCommonIssueList() {
+ return getNextInterceptor().getCommonIssueList();
+ }
+
+ @Override
+ public Response getCommonIssueData(String issueId, List args) {
+ return getNextInterceptor().getCommonIssueData(issueId, args);
+ }
+
@Override
public SchedulerTypeInfo getSchedulerInfo() {
return getNextInterceptor().getSchedulerInfo();