Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (revision 5036) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (working copy) @@ -52,6 +52,8 @@ import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; +import org.apache.hadoop.hive.ql.exec.errors.ErrorAndSolution; +import org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -753,17 +755,47 @@ return "Ended Job = " + jobId; } + private String getTaskAttemptLogUrl(String taskTrackerHttpAddress, + String taskAttemptId) { + return taskTrackerHttpAddress + "/tasklog?taskid=" + taskAttemptId + "&all=true"; + } + + // Used for showJobFailDebugInfo + private static class TaskInfo { + String jobId; + HashSet logUrls; + + public TaskInfo(String jobId) { + this.jobId = jobId; + logUrls = new HashSet(); + } + public void addLogUrl(String logUrl) { + logUrls.add(logUrl); + } + public HashSet getLogUrls() { + return logUrls; + } + public String getJobId() { + return jobId; + } + } + + @SuppressWarnings("deprecation") private void showJobFailDebugInfo(JobConf conf, RunningJob rj) throws IOException { - + // Mapping from task ID to the number of failures Map failures = new HashMap(); + // Successful task ID's Set successes = new HashSet(); - Map taskToJob = new HashMap(); + Map taskIdToInfo = new HashMap(); + int startIndex = 0; + // Loop to get all task completion events because getTaskCompletionEvents + // only returns a subset per call while (true) { - TaskCompletionEvent[] taskCompletions = rj - .getTaskCompletionEvents(startIndex); + TaskCompletionEvent[] taskCompletions = + rj.getTaskCompletionEvents(startIndex); if (taskCompletions == null || taskCompletions.length == 0) { break; @@ -771,22 +803,36 @@ boolean more = true; for (TaskCompletionEvent t : taskCompletions) { - // getTaskJobIDs return Strings for compatibility with Hadoop version - // without - // TaskID or TaskAttemptID + // getTaskJobIDs returns Strings for compatibility with Hadoop versions + // without TaskID or TaskAttemptID String[] taskJobIds = ShimLoader.getHadoopShims().getTaskJobIDs(t); if (taskJobIds == null) { - console - .printError("Task attempt info is unavailable in this Hadoop version"); + console.printError("Task attempt info is unavailable in " + + "this Hadoop version"); more = false; break; } + // For each task completion event, get the associated task id, job id + // and the logs String taskId = taskJobIds[0]; String jobId = taskJobIds[1]; - taskToJob.put(taskId, jobId); + + TaskInfo ti = taskIdToInfo.get(taskId); + if(ti == null) { + ti = new TaskInfo(jobId); + taskIdToInfo.put(taskId, ti); + } + // These tasks should have come from the same job. + assert(ti.getJobId() == jobId); + ti.getLogUrls().add( + getTaskAttemptLogUrl(t.getTaskTrackerHttp(), t.getTaskId())); + // If a task failed, then keep track of the total number of failures + // for that task (typically, a task gets re-run up to 4 times if it + // fails + if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { Integer failAttempts = failures.get(taskId); if (failAttempts == null) { @@ -821,16 +867,42 @@ } // Display Error Message for tasks with the highest failure count - console.printError("\nFailed tasks with most" + "(" + maxFailures + ")" - + " failures " + ": "); String jtUrl = JobTrackerURLResolver.getURL(conf); for (String task : failures.keySet()) { if (failures.get(task).intValue() == maxFailures) { - String jobId = taskToJob.get(task); - String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" - + task.toString(); - console.printError("Task URL: " + taskUrl + "\n"); + TaskInfo ti = taskIdToInfo.get(task); + String jobId = ti.getJobId(); + String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + + task.toString(); + + TaskLogProcessor tlp = new TaskLogProcessor(conf); + for(String logUrl : ti.getLogUrls()) { + tlp.addTaskAttemptLogUrl(logUrl); + } + + List errors = tlp.getErrors(); + + StringBuilder sb = new StringBuilder(); + // We use a StringBuilder and then call printError only once as + // printError will write to both stderr and the error log file. In + // situations where both the stderr and the log file output is + // simultaneously output to a single stream, this will look cleaner. + sb.append("\n"); + sb.append("Task with the most failures(" + maxFailures + "): \n"); + sb.append("-----\n"); + sb.append("Task ID:\n " + task + "\n\n"); + sb.append("URL:\n " + taskUrl + "\n"); + + for(ErrorAndSolution e : errors) { + sb.append("\n"); + sb.append("Possible error:\n " + e.getError() + "\n\n"); + sb.append("Solution:\n " + e.getSolution() + "\n"); + } + sb.append("-----\n"); + + console.printError(sb.toString()); + // Only print out one task because that's good enough for debugging. break; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java (revision 0) @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.errors; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Detects the condition where there is a error with one of the input files in + * the query. + * + * Conditions to check: + * 1. EOFException in log + * 2. A line indicating the split file in the log. This is needed to + * generate the proper error message. + * + */ + +public class DataCorruptErrorHeuristic extends RegexErrorHeuristic { + + private static final String splitRegex = "split:.*"; + private static final String exceptionRegex = "EOFException"; + + public DataCorruptErrorHeuristic() { + queryRegex = ".*"; + logRegexes = new HashSet(); + logRegexes.add(splitRegex); + logRegexes.add(exceptionRegex); + } + + @Override + public ErrorAndSolution getErrorAndSolution() { + ErrorAndSolution es = null; + + if(queryMatches) { + if (regexToLogLines.get(exceptionRegex).size() > 0 && + regexToLogLines.get(splitRegex).size() > 0) { + + // There should only be a single split line... + assert(regexToLogLines.get(splitRegex).size()==1); + String splitLogLine = regexToLogLines.get(splitRegex).get(0); + + // Extract only 'split: hdfs://...' + Pattern p = Pattern.compile(splitRegex, Pattern.CASE_INSENSITIVE); + Matcher m = p.matcher(splitLogLine); + m.find(); + String splitStr = m.group(); + + es = new ErrorAndSolution( + "Data file " + splitStr + " is corrupted.", + "Replace file. e.g. by re-running the query that produced the source table or partition."); + } + } + reset(); + return es; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (revision 0) @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.errors; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.mapred.JobConf; + +/** + * TaskLogProcessor reads the logs from failed task attempts and tries to figure + * out what the cause of the error was using various heuristics. + */ +public class TaskLogProcessor { + + Map heuristics = + new HashMap(); + List taskLogUrls = new ArrayList(); + + JobConf conf = null; + // Query is the hive query string i.e. "SELECT * FROM src;" associated with + // this set of tasks logs + String query = null; + + public TaskLogProcessor(JobConf conf) { + this.conf = conf; + query = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING); + + heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats()); + heuristics.put(new MapAggrMemErrorHeuristic(), new HeuristicStats()); + heuristics.put(new DataCorruptErrorHeuristic(), new HeuristicStats()); + for(ErrorHeuristic e : heuristics.keySet()) { + e.init(query, conf); + } + } + + /** + * Adds a task log URL for the heuristics to read through + * @param url + */ + public void addTaskAttemptLogUrl(String url) { + taskLogUrls.add(url); + } + + private static class HeuristicStats { + + // The number of times eh has returned non-null errors + int triggerCount = 0; + // All ErrorAndSolutions that ErrorHeuristic has generated. For the same error, they + // should be the same though it's possible that different file paths etc + // could generate different error messages + List ens = new ArrayList(); + + HeuristicStats() { + } + + int getTriggerCount() { + return triggerCount; + } + + void incTriggerCount() { + triggerCount++; + } + + List getErrorAndSolutions() { + return ens; + } + + void addErrorAndSolution(ErrorAndSolution e) { + ens.add(e); + } + } + + /** + * Processes the provided task logs using the known error heuristics to get + * the matching errors + * @return A ErrorAndSolution from the ErrorHeuristic that most frequently + * generated matches. In case of ties, multiple ErrorAndSolutions will be + * returned. + */ + public List getErrors() { + + for(String urlString : taskLogUrls) { + + // Open the log file, and read in a line. Then feed the line into + // each of the ErrorHeuristics. Repeat for all the lines in the log. + URL taskAttemptLogUrl; + try { + taskAttemptLogUrl = new URL(urlString); + } catch(MalformedURLException e) { + throw new RuntimeException("Bad task log url", e); + } + BufferedReader in; + try { + in = new BufferedReader( + new InputStreamReader(taskAttemptLogUrl.openStream())); + String inputLine; + while ((inputLine = in.readLine()) != null) { + for(ErrorHeuristic e : heuristics.keySet()) { + e.processLogLine(inputLine); + } + } + in.close(); + } catch (IOException e) { + throw new RuntimeException("Error while reading from task log url", e); + } + + // Once the lines of the log file have been fed into the ErrorHeuristics, + // see if they have detected anything. If any has, record + // what ErrorAndSolution it gave so we can later return the most + // frequently occurring error + for(Entry ent : heuristics.entrySet()) { + ErrorHeuristic eh = ent.getKey(); + HeuristicStats hs = ent.getValue(); + + ErrorAndSolution es = eh.getErrorAndSolution(); + if(es != null) { + hs.incTriggerCount(); + hs.addErrorAndSolution(es); + } + } + + } + + // Return the errors that occur the most frequently + int max = 0; + for(HeuristicStats hs : heuristics.values()) { + if(hs.getTriggerCount() > max) { + max = hs.getTriggerCount(); + } + } + + List errors = new ArrayList(); + for(HeuristicStats hs : heuristics.values()) { + if(hs.getTriggerCount() == max) { + if(hs.getErrorAndSolutions().size() > 0) { + // An error heuristic could have generated different ErrorAndSolution + // for each task attempt, but most likely they are the same. Plus, + // one of those is probably good enough for debugging + errors.add(hs.getErrorAndSolutions().get(0)); + } + } + } + + return errors; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java (revision 0) @@ -0,0 +1,87 @@ +package org.apache.hadoop.hive.ql.exec.errors; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; +import java.util.regex.Pattern; + +import org.apache.hadoop.mapred.JobConf; + +/** + * Simple heuristic where the query and the lines of the task log file are run + * through regular expressions to see if they resemble a known error condition. + * + * Only a single regular expression can be supplied to match the query whereas + * multiple regular expressions can be supplied to match lines from the log file. + * A mapping is maintained from the regular expression to the lines from the log + * file that it matched. + */ +public abstract class RegexErrorHeuristic implements ErrorHeuristic { + + String query = null; + JobConf conf = null; + + // Pattern to look for in the hive query and whether it matched + String queryRegex = null; + boolean queryMatches = false; + + // The regexes to look for in the log files + Set logRegexes = null; + + // Mapping from the regex to lines in the log file where find() == true + Map> regexToLogLines = new HashMap>(); + Map regexToPattern = new HashMap(); + + public RegexErrorHeuristic() { + } + + + @Override + /** + * Before init is called, logRegexes and queryRegexes should be populated. + */ + public void init(String query, JobConf conf) { + this.query = query; + this.conf = conf; + + assert( (logRegexes!=null) && (queryRegex != null)); + + Pattern queryPattern = Pattern.compile(queryRegex, Pattern.CASE_INSENSITIVE); + queryMatches = queryPattern.matcher(query).find(); + + for(String regex : logRegexes) { + regexToPattern.put(regex, Pattern.compile(regex, Pattern.CASE_INSENSITIVE)); + regexToLogLines.put(regex, new ArrayList()); + } + + } + + @Override + abstract public ErrorAndSolution getErrorAndSolution(); + + @Override + public void processLogLine(String line) { + if(queryMatches) { + for(Entry e : regexToPattern.entrySet()) { + String regex = e.getKey(); + Pattern p = e.getValue(); + boolean lineMatches = p.matcher(line).find(); + if(lineMatches) { + regexToLogLines.get(regex).add(line); + } + } + } + } + + /** + * Resets to state before any processLogLine() calls + */ + protected void reset() { + for(List lst : regexToLogLines.values()) { + lst.clear(); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java (revision 0) @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.errors; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.mapred.JobConf; + +/** + * Detects out-of-memory errors when hash tables in map-based aggregation group + * by queries take up too much memory. + * + * Conditions to check + * + * 1. The query contains a group by. + * 2. Map-side aggregation is turned on. + * 3. There is a out of memory exception in the log. + */ +public class MapAggrMemErrorHeuristic extends RegexErrorHeuristic { + + private static final String outOfMemoryRegex = "OutOfMemoryError"; + private boolean configMatches = false; + + public MapAggrMemErrorHeuristic() { + queryRegex = "group by"; + logRegexes = new HashSet(); + logRegexes.add(outOfMemoryRegex); + } + + @Override + public void init(String query, JobConf conf) { + super.init(query, conf); + configMatches = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE); + } + + @Override + public ErrorAndSolution getErrorAndSolution() { + ErrorAndSolution es = null; + if(queryMatches && configMatches) { + List matchingLines = regexToLogLines.get(outOfMemoryRegex); + + if (matchingLines.size() > 0) { + String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString(); + float confValue = HiveConf.getFloatVar(conf, + HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); + + es = new ErrorAndSolution( + "Out of memory due to hash maps used in map-side aggregation.", + "Currently " + confName + " is set to " + confValue + ". " + + "Try setting it to a lower value. i.e " + + "'set " + confName + " = " + confValue/2 + ";'"); + } + } + reset(); + return es; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java (revision 0) @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.errors; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.mapred.JobConf; + +/** + * Classes implementing ErrorHeuristic are able to generate a possible cause and + * solution for Hive jobs that have failed by examining the query, task log + * files, and the job configuration. + * + * A class implementing ErrorHeuristic should only detect one type of error. + * + */ +public interface ErrorHeuristic { + + /** + * Initialize this error heuristic. Must be called before any other methods + * are called + * @param query + * @param jobConf + */ + void init(String query, JobConf jobConf); + + /** + * Process the given log line. It should be called for every line in the task + * log file, in sequence. + * + * @param line + */ + void processLogLine(String line); + + /** + * Examine the hive query, job configuration, and the lines from the task log + * seen so far though processLogLine() and generate a possible cause/solution. + * Once this method is called, the implementing class should be reset to the + * state before any processLogLine() calls were made. + * + * @return a matching error, or null if a suitable match wasn't found. + * + */ + ErrorAndSolution getErrorAndSolution(); +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java (revision 0) @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.errors; + +/** + * Immutable class for storing a possible error and a way to resolve the error + */ +public class ErrorAndSolution { + + String error = null; + String solution = null; + + ErrorAndSolution(String error, String solution) { + this.error = error; + this.solution = solution; + } + + public String getError() { + return error; + } + + public String getSolution() { + return solution; + } + + @Override + public boolean equals(Object o) { + if( !(o instanceof ErrorAndSolution)) { + return false; + } + ErrorAndSolution e = (ErrorAndSolution)o; + + return e.error == this.error && e.solution == this.solution; + } + + @Override + public int hashCode() { + return error.hashCode() * 37 + solution.hashCode(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java (revision 0) @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.errors; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Detects when a query has failed because a user's script that was specified in + * transform returns a non-zero error code. + * + * Conditions to check: + * + * 1. "Script failed with code " is in the log + * + */ + +public class ScriptErrorHeuristic extends RegexErrorHeuristic { + + private static final String failedRegex = "Script failed with code [0-9]+"; + + public ScriptErrorHeuristic() { + queryRegex = ".*"; + logRegexes = new HashSet(); + logRegexes.add(failedRegex); + } + + @Override + public ErrorAndSolution getErrorAndSolution() { + ErrorAndSolution es = null; + + if(queryMatches) { + for(List matchingLines : regexToLogLines.values()) { + // There should really only be one line with "Script failed..." + if (matchingLines.size() > 0) { + assert(matchingLines.size() == 1); + + // Get "Script failed with code " + Matcher m1 = Pattern.compile(failedRegex).matcher(matchingLines.get(0)); + m1.find(); + String failedStr = m1.group(); + + // Get "" + Matcher m2 = Pattern.compile("[0-9]+").matcher(failedStr); + m2.find(); + String errorCode = m2.group(); + + es = new ErrorAndSolution( + "A user-supplied transfrom script has exited with error code " + + errorCode + " instead of 0.", + "Verify that the script can properly handle all the input rows " + + "without throwing exceptions and exits properly."); + } + } + } + + reset(); + return es; + } +}