diff --git llap-debug/pom.xml llap-debug/pom.xml new file mode 100644 index 0000000000..23a52e7c99 --- /dev/null +++ llap-debug/pom.xml @@ -0,0 +1,103 @@ + + + + 4.0.0 + + org.apache.hive + hive + 3.0.0-SNAPSHOT + ../pom.xml + + + hive-llap-debug + jar + Llap Debug tool + + + .. + + + + + org.apache.hive + hive-llap-server + ${project.version} + + + com.google.inject + guice + provided + + + org.apache.httpcomponents + httpclient + provided + + + org.codehaus.jackson + jackson-core-asl + provided + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + junit + junit + test + + + org.mockito + mockito-all + test + + + + + + + maven-assembly-plugin + + + + org.apache.hive.tez.tools.debug.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/AMArtifactsHelper.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/AMArtifactsHelper.java new file mode 100644 index 0000000000..32771353c9 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/AMArtifactsHelper.java @@ -0,0 +1,84 @@ +package org.apache.hive.tez.tools.debug; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogsInfo; +import org.apache.http.client.HttpClient; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.JsonProcessingException; +import org.codehaus.jackson.map.DeserializationConfig.Feature; +import org.codehaus.jackson.type.TypeReference; + +import com.google.inject.Inject; +import com.google.inject.Singleton; + +@Singleton +public class AMArtifactsHelper { + + private static final String RM_WS_PREFIX = "/ws/v1/cluster"; + private static final String AHS_WS_PREFIX = "/ws/v1/applicationhistory"; + + private final HttpClient httpClient; + private final String rmAddress; + private final String ahsAddress; + private final ObjectMapper mapper; + + @Inject + public AMArtifactsHelper(Configuration conf, HttpClient httpClient) { + this.httpClient = httpClient; + if (YarnConfiguration.useHttps(conf)) { + rmAddress = "https://" + conf.get(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, + YarnConfiguration.DEFAULT_RM_WEBAPP_HTTPS_ADDRESS); + ahsAddress = "https://" + conf.get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, + YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS); + } else { + rmAddress = "http://" + conf.get(YarnConfiguration.RM_WEBAPP_ADDRESS, + YarnConfiguration.DEFAULT_RM_WEBAPP_ADDRESS); + ahsAddress = "http://" + conf.get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, + YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_ADDRESS); + } + mapper = new ObjectMapper(); + mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + mapper.configure(Feature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true); + } + + public Artifact getAMInfoArtifact(String name, String appId) { + String attemptsUrl = rmAddress + RM_WS_PREFIX + "/apps/" + appId + "/appattempts"; + return new HttpArtifact(httpClient, name, attemptsUrl, true); + } + + public Artifact getLogListArtifact(String name, String containerId, String nodeId) { + String logsListUrl = ahsAddress + AHS_WS_PREFIX + "/containers/" + containerId + "/logs"; + if (nodeId != null) { + logsListUrl += "?nm.id=" + nodeId; + } + return new HttpArtifact(httpClient, name, logsListUrl, true); + } + + public Artifact getLogArtifact(String name, String containerId, String logFile, String nodeId) { + String logUrl = ahsAddress + AHS_WS_PREFIX + "/containers/" + containerId + "/logs/" + logFile; + if (nodeId != null) { + logUrl += "?nm.id=" + nodeId; + } + return new HttpArtifact(httpClient, name, logUrl, false); + } + + public static class ContainerLogs { + public List containerLogsInfo; + } + public List parseContainerLogs(Path path) throws IOException { + TypeReference> typeRef = new TypeReference>(){}; + // The api return with containerLogsInfo or without it, hence trying both formats. + try { + return mapper.readValue(Files.newInputStream(path), ContainerLogs.class).containerLogsInfo; + } catch (JsonProcessingException e) { + return mapper.readValue(Files.newInputStream(path), typeRef); + } + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/ATSArtifactHelper.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/ATSArtifactHelper.java new file mode 100644 index 0000000000..c87035a37e --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/ATSArtifactHelper.java @@ -0,0 +1,71 @@ +package org.apache.hive.tez.tools.debug; + +import java.net.URISyntaxException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.http.client.HttpClient; +import org.apache.http.client.utils.URIBuilder; + +import com.google.inject.Inject; +import com.google.inject.Singleton; + +@Singleton +public class ATSArtifactHelper { + // This is a private field in yarn timeline client :-(. + private static final String ATS_PATH_PREFIX = "/ws/v1/timeline/"; + + private final HttpClient httpClient; + private final String atsAddress; + + public static class ATSEvent { + public long timestamp; + public String eventtype; + // ignored eventinfo + } + public static class ATSLog { + public List events; + public String entitytype; + public String entity; + public long starttime; + // ignored domain, relatedentities, primaryfilters, otherinfo. + } + + @Inject + public ATSArtifactHelper(Configuration conf, HttpClient httpClient) { + this.httpClient = httpClient; + if (YarnConfiguration.useHttps(conf)) { + atsAddress = "https://" + conf.get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, + YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS); + } else { + atsAddress = "http://" + conf.get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, + YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_ADDRESS); + } + } + + public Artifact getEntityArtifact(String name, String entityType, String entityId) { + try { + URIBuilder builder = new URIBuilder(atsAddress); + builder.setPath(ATS_PATH_PREFIX + entityType + "/" + entityId); + String url = builder.build().toString(); + return new HttpArtifact(httpClient, name, url, false); + } catch (URISyntaxException e) { + throw new RuntimeException("Invalid atsAddress: " + atsAddress, e); + } + } + + public Artifact getChildEntityArtifact(String name, String entityType, String rootEntityType, + String rootEntityId) { + try { + URIBuilder builder = new URIBuilder(atsAddress); + builder.setPath(ATS_PATH_PREFIX + entityType); + builder.setParameter("primaryFilter", rootEntityType + ":" + rootEntityId); + String url = builder.build().toString(); + return new HttpArtifact(httpClient, name, url, false); + } catch (URISyntaxException e) { + throw new RuntimeException("Invalid atsAddress: " + atsAddress, e); + } + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/HttpArtifact.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/HttpArtifact.java new file mode 100644 index 0000000000..af9cdf4067 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/HttpArtifact.java @@ -0,0 +1,64 @@ +package org.apache.hive.tez.tools.debug; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.IOUtils; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; + +/** + * Class to download a http resource into a given path. + */ +public class HttpArtifact implements Artifact { + + private final HttpClient client; + private final String name; + private final String url; + private final boolean isTemp; + + public HttpArtifact(HttpClient client, String name, String url, boolean isTemp) { + this.client = client; + this.name = name; + this.url = url; + this.isTemp = isTemp; + } + + @Override + public String getName() { + return name; + } + + @Override + public void downloadInto(Path path) throws ArtifactDownloadException { + System.out.println("Downloading: " + url); + // Try to use nio to transfer the streaming data from http into the outputstream of the path. + // We are using 3 buffers while with nio we should be able to download with one. + InputStream entityStream = null; + try { + HttpGet httpGet = new HttpGet(url); + HttpResponse response = client.execute(httpGet); + entityStream = response.getEntity().getContent(); + Files.copy(entityStream, path); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } finally { + IOUtils.closeQuietly(entityStream); + } + } + + @Override + public boolean isTemp() { + return isTemp; + } + + @Override + public String toString() { + return "HttpArtifact[Name: " + name + ", URL: " + url + "]"; + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/Main.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/Main.java new file mode 100644 index 0000000000..3dca068587 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/Main.java @@ -0,0 +1,86 @@ +package org.apache.hive.tez.tools.debug; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.tez.tools.debug.framework.ArtifactAggregator; +import org.apache.hive.tez.tools.debug.framework.ArtifactSourceCreator; +import org.apache.hive.tez.tools.debug.framework.ArtifactSourceType; +import org.apache.hive.tez.tools.debug.framework.Params; + +public class Main { + public static void main(String[] args) { + String dagId = null; + String queryId = null; + File outputFile = null; + List sourceTypes = new ArrayList<>(); + if (args.length % 2 != 0) { + usage("Got extra arguments."); + } + for (int i = 0; i < args.length; i += 2) { + if (args[i].equals("--dagId")) { + dagId = args[i + 1]; + } else if (args[i].equals("--queryId")) { + queryId = args[i + 1]; + } else if (args[i].equals("--outputFile")) { + outputFile = new File(args[i + 1]); + } else if (args[i].equals("--sourceType")) { + try { + sourceTypes.add(ArtifactSourceType.valueOf(args[i + 1])); + } catch (IllegalArgumentException e) { + usage("Invalid source type: " + args[i + 1]); + } + } else { + usage("Unknown option: " + args[i]); + } + } + if ((dagId == null && queryId == null) || (dagId != null && queryId != null)) { + usage("Specify either dagId or queryId."); + } + Params params = new Params(); + if (dagId != null) { + params.setTezDagId(dagId); + } + if (queryId != null) { + params.setHiveQueryId(queryId); + } + if (outputFile == null) { + outputFile = new File((dagId == null ? queryId : dagId) + ".zip"); + } + if (outputFile.exists()) { + usage("File already exists: " + outputFile.getAbsolutePath()); + } + + // Add command line option to download only a subset of sources. + if (sourceTypes.isEmpty()) { + sourceTypes = Arrays.asList(ArtifactSourceType.values()); + } + + ExecutorService service = Executors.newFixedThreadPool(1); + try (ArtifactAggregator aggregator = new ArtifactAggregator(new Configuration(), service, + params, outputFile.getAbsolutePath(), sourceTypes)) { + aggregator.aggregate(); + } catch (Exception e) { + System.err.println("Error occured while trying to create aggregator: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } finally { + service.shutdownNow(); + } + } + + private static void usage(String msg) { + if (msg != null) { + System.err.println(msg); + } + System.err.println( + "Usage: download_aggregate <--dagId dagId | --queryId queryId> " + + "[--sourceType ]... [--outputFile outputFile]"); + System.exit(1); + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/Artifact.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/Artifact.java new file mode 100644 index 0000000000..352ecf0fea --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/Artifact.java @@ -0,0 +1,29 @@ +package org.apache.hive.tez.tools.debug.framework; + +import java.nio.file.Path; + +/** + * Interface representing an Artifact which should be downloaded. + */ +public interface Artifact { + /** + * Name used by ArtifactAggregator as the path of the file in the archive. Use '/' as separator + * in the path. This should be unique across all sources. + * + * @return Name of the artifact. + */ + String getName(); + + /** + * Download this resource into the path. + * @param path The path into which the resource should be copied. + * @throws ArtifactDownloadException + */ + void downloadInto(Path path) throws ArtifactDownloadException; + + /** + * If this is a temporary artifact then the aggregator will delete this file. + * @return true if this is a temporary artifact. + */ + boolean isTemp(); +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactAggregator.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactAggregator.java new file mode 100644 index 0000000000..af4b32ca9e --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactAggregator.java @@ -0,0 +1,182 @@ +package org.apache.hive.tez.tools.debug.framework; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.URI; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +import org.apache.hadoop.conf.Configuration; +import org.apache.http.client.HttpClient; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.codehaus.jackson.JsonGenerator; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.annotate.JsonSerialize.Inclusion; + +import com.google.common.collect.ImmutableMap; +import com.google.inject.Binder; +import com.google.inject.Guice; +import com.google.inject.Injector; +import com.google.inject.Module; + +/** + * Class to download and aggregate all the logs into a zip file. + */ +public class ArtifactAggregator implements AutoCloseable { + private final ExecutorService service; + private final Params params; + private final CloseableHttpClient httpClient; + private final FileSystem zipfs; + private final List pendingSources; + private final Map artifactSource; + + public ArtifactAggregator(final Configuration conf, ExecutorService service, Params params, + String zipFilePath, List sourceTypes) throws IOException { + this.service = service; + this.params = params; + this.httpClient = HttpClients.createDefault(); + this.artifactSource = new HashMap<>(); + this.zipfs = FileSystems.newFileSystem(URI.create("jar:file:" + zipFilePath), + ImmutableMap.of("create", "true", "encoding", "UTF-8")); + + Injector injector = Guice.createInjector(new Module() { + @Override + public void configure(Binder binder) { + binder.bind(HttpClient.class).toInstance(httpClient); + binder.bind(Configuration.class).toInstance(conf); + } + }); + this.pendingSources = new ArrayList<>(sourceTypes.size()); + for (ArtifactSourceCreator sourceType : sourceTypes) { + pendingSources.add(sourceType.getSource(injector)); + } + } + + public void aggregate() throws IOException { + final Map errors = new HashMap<>(); + while (!pendingSources.isEmpty()) { + List artifacts = collectDownloadableArtifacts(errors); + if (artifacts.isEmpty() && !pendingSources.isEmpty()) { + // Artifacts is empty, but some sources are pending. + // Can be because dagId was given, queryId could not be found, etc ... + break; + } + List> futures = new ArrayList<>(artifacts.size()); + for (final Artifact artifact : artifacts) { + final Path path = getArtifactPath(artifact.getName()); + futures.add(service.submit(new Runnable() { + public void run() { + try { + artifact.downloadInto(path); + artifactSource.get(artifact).updateParams(params, artifact, path); + } catch (Throwable t) { + errors.put(artifact.getName(), t); + } finally { + if (artifact.isTemp()) { + try { + Files.delete(path); + } catch (IOException ignore) { + } + } + } + } + })); + } + // Its important that we wait for all futures in this stage, there are some cases where all + // downloads/updates of one stage should finish before we start the next stage. + for (Future future : futures) { + try { + future.get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } catch (ExecutionException e) { + // ignore, this should not happen, we catch all throwable and serialize into error + } + } + } + writeErrors(errors); + } + + private Path getArtifactPath(String artifactName) throws IOException { + final Path path = zipfs.getPath("/", artifactName.split("/")); + Path parent = path.getParent(); + if (parent != null) { + Files.createDirectories(parent); + } + return path; + } + + @Override + public void close() throws IOException { + try { + if (zipfs != null) { + zipfs.close(); + } + } finally { + if (httpClient != null) { + httpClient.close(); + } + } + } + + private List collectDownloadableArtifacts(Map errors) { + List artifacts = new ArrayList<>(); + Iterator iter = pendingSources.iterator(); + while (iter.hasNext()) { + ArtifactSource source = iter.next(); + if (source.hasRequiredParams(params)) { + try { + for (Artifact artifact : source.getArtifacts(params)) { + artifacts.add(artifact); + artifactSource.put(artifact, source); + } + } catch (ArtifactDownloadException e) { + errors.put(source.getClass().getSimpleName(), e); + } + iter.remove(); + } + } + return artifacts; + } + + private void writeErrors(Map errors) throws IOException { + if (errors.isEmpty()) { + return; + } + Path path = zipfs.getPath("ERRORS"); + OutputStream stream = null; + try { + stream = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE); + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.setSerializationInclusion(Inclusion.NON_NULL); + JsonGenerator generator = objectMapper.getJsonFactory().createJsonGenerator(stream); + generator.writeStartObject(); + for (Entry entry : errors.entrySet()) { + StringWriter writer = new StringWriter(); + entry.getValue().printStackTrace(new PrintWriter(writer)); + generator.writeStringField(entry.getKey(), writer.toString()); + } + generator.writeEndObject(); + generator.close(); + } finally { + // We should not close a stream from ZipFileSystem, I have no clue why. + // IOUtils.closeQuietly(stream); + } + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactDownloadException.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactDownloadException.java new file mode 100644 index 0000000000..565e2177df --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactDownloadException.java @@ -0,0 +1,18 @@ +package org.apache.hive.tez.tools.debug.framework; + +public class ArtifactDownloadException extends Exception { + + private static final long serialVersionUID = -3632196021073193293L; + + public ArtifactDownloadException(String message, Throwable cause) { + super(message, cause); + } + + public ArtifactDownloadException(String message) { + super(message); + } + + public ArtifactDownloadException(Throwable cause) { + super(cause); + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSource.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSource.java new file mode 100644 index 0000000000..b787e79cb0 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSource.java @@ -0,0 +1,10 @@ +package org.apache.hive.tez.tools.debug.framework; + +import java.nio.file.Path; +import java.util.List; + +public interface ArtifactSource { + boolean hasRequiredParams(Params params); + List getArtifacts(Params params) throws ArtifactDownloadException; + void updateParams(Params params, Artifact artifact, Path path) throws ArtifactDownloadException; +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSourceCreator.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSourceCreator.java new file mode 100644 index 0000000000..cc5160af5b --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSourceCreator.java @@ -0,0 +1,7 @@ +package org.apache.hive.tez.tools.debug.framework; + +import com.google.inject.Injector; + +public interface ArtifactSourceCreator { + ArtifactSource getSource(Injector injector); +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSourceType.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSourceType.java new file mode 100644 index 0000000000..48126fed87 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/ArtifactSourceType.java @@ -0,0 +1,55 @@ +package org.apache.hive.tez.tools.debug.framework; + +import org.apache.hive.tez.tools.debug.source.DummyArtifacts; +import org.apache.hive.tez.tools.debug.source.HiveATSArtifacts; +import org.apache.hive.tez.tools.debug.source.LlapDeamonLogsArtifacts; +import org.apache.hive.tez.tools.debug.source.LlapDeamonLogsListArtifacts; +import org.apache.hive.tez.tools.debug.source.SliderAMInfoArtifacts; +import org.apache.hive.tez.tools.debug.source.SliderAMLogsArtifacts; +import org.apache.hive.tez.tools.debug.source.SliderAMLogsListArtifacts; +import org.apache.hive.tez.tools.debug.source.SliderInstanceJmx; +import org.apache.hive.tez.tools.debug.source.SliderInstanceStack; +import org.apache.hive.tez.tools.debug.source.SliderStatusArtifacts; +import org.apache.hive.tez.tools.debug.source.TezAMInfoArtifacts; +import org.apache.hive.tez.tools.debug.source.TezAMLogsArtifacts; +import org.apache.hive.tez.tools.debug.source.TezAMLogsListArtifacts; +import org.apache.hive.tez.tools.debug.source.TezATSArtifacts; +import org.apache.hive.tez.tools.debug.source.TezTasksLogsArtifacts; +import org.apache.hive.tez.tools.debug.source.TezTasksLogsListArtifacts; + +import com.google.inject.Injector; + +public enum ArtifactSourceType implements ArtifactSourceCreator { + TEZ_ATS(TezATSArtifacts.class), + TEZ_AM_INFO(TezAMInfoArtifacts.class), + TEZ_AM_LOG_INFO(TezAMLogsListArtifacts.class), + TEZ_AM_LOGS(TezAMLogsArtifacts.class), + TEZ_AM_JMX(DummyArtifacts.class), + TEZ_AM_STACK(DummyArtifacts.class), + TEZ_TASK_LOGS_INFO(TezTasksLogsListArtifacts.class), + TEZ_TASK_LOGS(TezTasksLogsArtifacts.class), + LLAP_DEAMON_LOGS_INFO(LlapDeamonLogsListArtifacts.class), + LLAP_DEAMON_LOGS(LlapDeamonLogsArtifacts.class), + SLIDER_STATUS(SliderStatusArtifacts.class), + SLIDER_INSTANCE_JMX(SliderInstanceJmx.class), + SLIDER_INSTANCE_STACK(SliderInstanceStack.class), + SLIDER_AM_INFO(SliderAMInfoArtifacts.class), + SLIDER_AM_LOG_INFO(SliderAMLogsListArtifacts.class), + SLIDER_AM_LOGS(SliderAMLogsArtifacts.class), + TEZ_CONFIG(DummyArtifacts.class), + TEZ_HIVE2_CONFIG(DummyArtifacts.class), + HIVE_ATS(HiveATSArtifacts.class), + HIVE_CONFIG(DummyArtifacts.class), + HIVE2_CONFIG(DummyArtifacts.class), + HADOOP_CONFIG(DummyArtifacts.class), + HIVESERVER2_LOG(DummyArtifacts.class); + + private final Class sourceClass; + private ArtifactSourceType(Class sourceClass) { + this.sourceClass = sourceClass; + } + + public ArtifactSource getSource(Injector injector) { + return injector.getInstance(sourceClass); + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/Params.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/Params.java new file mode 100644 index 0000000000..e25ddf3fc6 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/framework/Params.java @@ -0,0 +1,206 @@ +package org.apache.hive.tez.tools.debug.framework; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; + +public class Params { + private String appType; + + // Tez information. + private String tezDagId; + private String tezAmAppId; + + private final AppLogs tezAmLogs = new AppLogs(); + private final AppLogs tezTaskLogs = new AppLogs(); + + // Slider AM info. + private String sliderAppId; + private final AppLogs sliderAmLogs = new AppLogs(); + private Set sliderInstanceUrls; + + // Hive information. + private String hiveQueryId; + + // Start and End time of query/dag. + private long startTime = 0; + private long endTime = Long.MAX_VALUE; + + public static class AppLogs { + // Node -> Container -> log + private final ConcurrentHashMap>> + appLogs = new ConcurrentHashMap<>(); + + private boolean finishedContainers; + private boolean finishedLogs; + + public void addLog(String nodeId, String containerId, List logs) { + if (!appLogs.containsKey(nodeId)) { + appLogs.putIfAbsent(nodeId, new ConcurrentHashMap>()); + } + appLogs.get(nodeId).put(containerId, logs); + } + + public void addContainer(String nodeId, String containerId) { + addLog(nodeId, containerId, Collections.emptyList()); + } + + public boolean isFinishedContainers() { + return finishedContainers; + } + + public void finishContainers() { + this.finishedContainers = true; + } + + public boolean isFinishedLogs() { + return finishedLogs; + } + + public void finishLogs() { + this.finishedLogs = true; + } + + public List getLogListArtifacts(AMArtifactsHelper helper, String name) { + List artifacts = new ArrayList<>(); + for (Entry>> entry : + appLogs.entrySet()) { + String nodeId = entry.getKey(); + for (String containerId : entry.getValue().keySet()) { + artifacts.add(helper.getLogListArtifact(name + "/" + containerId + ".logs.json", + containerId, nodeId)); + } + } + return artifacts; + } + + public List getLogArtifacts(AMArtifactsHelper helper, String name) { + List artifacts = new ArrayList<>(); + for (Entry>> entry : + appLogs.entrySet()) { + String nodeId = entry.getKey(); + for (Entry> e : entry.getValue().entrySet()) { + String containerId = e.getKey(); + for (ContainerLogInfo log: e.getValue()) { + artifacts.add(helper.getLogArtifact(name + "/" + containerId + "/" + log.fileName, + containerId, log.fileName, nodeId)); + } + } + } + return artifacts; + } + } + + public static class ContainerLogInfo { + public ContainerLogInfo() {} + public ContainerLogInfo(String fileName, long fileSize, String lastModifiedTime) { + this.fileName = fileName; + this.fileSize = fileSize; + this.lastModifiedTime = lastModifiedTime; + } + public String fileName; + public long fileSize; + public String lastModifiedTime; + } + + public static class ContainerLogsInfo { + public List containerLogInfo; + public String containerId; + public String nodeId; + } + + public String getTezDagId() { + return tezDagId; + } + + public String getAppType() { + return appType; + } + + public void setAppType(String appType) { + this.appType = appType; + } + + public void setTezDagId(String tezDagId) { + this.tezDagId = tezDagId; + } + + public String getTezAmAppId() { + return tezAmAppId; + } + + public void setTezAmAppId(String tezAmAppId) { + this.tezAmAppId = tezAmAppId; + } + + + public AppLogs getTezAmLogs() { + return tezAmLogs; + } + + public AppLogs getTezTaskLogs() { + return tezTaskLogs; + } + + public String getSliderAppId() { + return sliderAppId; + } + + public void setSliderAppId(String sliderAppId) { + this.sliderAppId = sliderAppId; + } + + public AppLogs getSliderAmLogs() { + return sliderAmLogs; + } + + public Set getSliderInstanceUrls() { + return sliderInstanceUrls; + } + + public void setSliderInstanceUrls(Set sliderInstanceUrls) { + this.sliderInstanceUrls = sliderInstanceUrls; + } + + public String getHiveQueryId() { + return hiveQueryId; + } + + public void setHiveQueryId(String hiveQueryId) { + this.hiveQueryId = hiveQueryId; + } + + public long getStartTime() { + return startTime; + } + + public void updateStartTime(long startTime) { + if (this.startTime == 0 || startTime < this.startTime) { + this.startTime = startTime; + } + } + + public long getEndTime() { + return endTime; + } + + public void updateEndTime(long endTime) { + if (this.endTime == Long.MAX_VALUE || endTime > this.endTime) { + this.endTime = endTime; + } + } + + public boolean shouldIncludeArtifact(long startTime, long endTime) { + if (endTime == 0) { + endTime = Long.MAX_VALUE; + } + // overlap is true if one of them started when other was running. + return (this.startTime <= startTime && startTime <= this.endTime) || + (startTime <= this.startTime && this.startTime <= endTime); + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/AMInfoArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/AMInfoArtifacts.java new file mode 100644 index 0000000000..02e05eb7af --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/AMInfoArtifacts.java @@ -0,0 +1,87 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.AppLogs; +import org.codehaus.jackson.map.DeserializationConfig.Feature; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.annotate.JsonRootName; + +public abstract class AMInfoArtifacts implements ArtifactSource { + + protected final AMArtifactsHelper helper; + protected final ObjectMapper mapper; + + public AMInfoArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + this.mapper = new ObjectMapper(); + mapper.configure(Feature.UNWRAP_ROOT_VALUE, true); + mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + mapper.configure(Feature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true); + } + + public abstract String getArtifactName(); + + public abstract String getAmId(Params params); + + public abstract AppLogs getAMAppLogs(Params params); + + @Override + public boolean hasRequiredParams(Params params) { + return getAmId(params) != null; + } + + @Override + public List getArtifacts(Params params) { + return Collections.singletonList( + helper.getAMInfoArtifact(getArtifactName(), getAmId(params))); + } + + @JsonRootName("appAttempts") + public static class AMInfo { + public List appAttempt; + } + + public static class AppAttempt { + public int id; + public long startTime; + public long finishedTime; + public String containerId; + public String nodeId; + public String appAttemptId; + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + AppLogs amLogs = getAMAppLogs(params); + if (amLogs.isFinishedContainers()) { + return; + } + if (artifact.getName().equals(getArtifactName())) { + AMInfo amInfo; + try { + amInfo = mapper.readValue(Files.newInputStream(path), AMInfo.class); + } catch (IOException e) { + throw new ArtifactDownloadException("Error reading value:", e); + } + if (amInfo != null && amInfo.appAttempt != null) { + for (AppAttempt attempt: amInfo.appAttempt) { + if (params.shouldIncludeArtifact(attempt.startTime, attempt.finishedTime)) { + amLogs.addContainer(attempt.nodeId, attempt.containerId); + } + } + amLogs.finishContainers(); + } + } + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/DummyArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/DummyArtifacts.java new file mode 100644 index 0000000000..0dc9e44804 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/DummyArtifacts.java @@ -0,0 +1,26 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; + +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; + +public class DummyArtifacts implements ArtifactSource { + + @Override + public List getArtifacts(Params params) { + return Collections.emptyList(); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) { + } + + @Override + public boolean hasRequiredParams(Params params) { + return false; + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/HiveATSArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/HiveATSArtifacts.java new file mode 100644 index 0000000000..627a6ceae7 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/HiveATSArtifacts.java @@ -0,0 +1,102 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.hive.tez.tools.debug.ATSArtifactHelper; +import org.apache.hive.tez.tools.debug.ATSArtifactHelper.ATSEvent; +import org.apache.hive.tez.tools.debug.ATSArtifactHelper.ATSLog; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.map.DeserializationConfig.Feature; +import org.codehaus.jackson.map.ObjectMapper; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; + +public class HiveATSArtifacts implements ArtifactSource { + + private final ATSArtifactHelper helper; + private final ObjectMapper mapper; + + @Inject + public HiveATSArtifacts(ATSArtifactHelper helper) { + this.helper = helper; + this.mapper = new ObjectMapper(); + mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + mapper.configure(Feature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true); + } + + @Override + public List getArtifacts(Params params) { + return ImmutableList.of(helper.getEntityArtifact("HIVE_QUERY", "HIVE_QUERY_ID", + params.getHiveQueryId())); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + if (artifact.getName().equals("HIVE_QUERY")) { + InputStream stream; + JsonNode node; + try { + stream = Files.newInputStream(path); + node = mapper.readTree(stream); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + if (node == null) { + return; + } + if (params.getAppType() == null) { + String appType = node.path("primaryfilters").path("executionmode").path(0).asText(); + if (appType != null && !appType.isEmpty()) { + params.setAppType(appType); + } + } + JsonNode other = node.get("otherinfo"); + if (other == null) { + return; + } + // Get and update dag id/hive query id. + if (params.getTezAmAppId() == null) { + String appId = other.path("APP_ID").asText(); + if (appId != null && !appId.isEmpty()) { + params.setTezAmAppId(appId); + } + } + if (params.getTezDagId() == null) { + String dagId = other.path("DAG_ID").asText(); + if (dagId != null && !dagId.isEmpty()) { + params.setTezDagId(dagId); + } + } + ATSLog log; + try { + log = mapper.treeToValue(node, ATSLog.class); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + for (ATSEvent event : log.events) { + if (event.eventtype != null) { + if (event.eventtype.equals("QUERY_SUBMITTED")) { + params.updateStartTime(event.timestamp); + } else if (event.eventtype.equals("QUERY_COMPLETED")) { + params.updateEndTime(event.timestamp); + } + } + } + } + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getHiveQueryId() != null; + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/LlapDeamonLogsArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/LlapDeamonLogsArtifacts.java new file mode 100644 index 0000000000..377d29de00 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/LlapDeamonLogsArtifacts.java @@ -0,0 +1,39 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.nio.file.Path; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; + +import com.google.inject.Inject; + +public class LlapDeamonLogsArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public LlapDeamonLogsArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getAppType() != null && params.getAppType().equals("LLAP") && + params.getTezTaskLogs().isFinishedLogs(); + } + + @Override + public List getArtifacts(Params params) { + return params.getTezTaskLogs().getLogArtifacts(helper, "LLAP/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + } + +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/LlapDeamonLogsListArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/LlapDeamonLogsListArtifacts.java new file mode 100644 index 0000000000..73ae692edd --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/LlapDeamonLogsListArtifacts.java @@ -0,0 +1,130 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Calendar; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogInfo; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogsInfo; + +import com.google.inject.Inject; + +public class LlapDeamonLogsListArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public LlapDeamonLogsListArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getAppType() != null && params.getAppType().equals("LLAP") && + params.getTezTaskLogs().isFinishedContainers(); + } + + @Override + public List getArtifacts(Params params) { + return params.getTezTaskLogs().getLogListArtifacts(helper, "LLAP/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + List logsInfoList; + try { + logsInfoList = helper.parseContainerLogs(path); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + if (logsInfoList != null) { + for (ContainerLogsInfo logsInfo : logsInfoList) { + filterLogs(logsInfo.containerLogInfo, params); + params.getTezTaskLogs().addLog( + logsInfo.nodeId, logsInfo.containerId, logsInfo.containerLogInfo); + } + // This is not correct, but we have no way to tell all the logs have downloaded + params.getTezTaskLogs().finishLogs(); + } + } + + private void filterLogs(List containerLogInfo, Params params) { + String hiveQueryId = params.getHiveQueryId(); + Iterator iter = containerLogInfo.iterator(); + while (iter.hasNext()) { + String fileName = iter.next().fileName; + if (fileName.startsWith("llap-daemon")) { + long startTime = getLlapLogsStartTime(fileName); + // Hourly rotation. + if (startTime > 0 && !params.shouldIncludeArtifact(startTime, startTime + 60 * 60 * 1000)) { + iter.remove(); + } + } else if (fileName.startsWith("llapdaemon_history")) { + long startTime = getLlapHistoryStartTime(fileName); + // Daily rotation. + long endTime = startTime + 24 * 60 * 60 * 1000; + if (startTime > 0 && !params.shouldIncludeArtifact(startTime, endTime)) { + iter.remove(); + } + } else if (!fileName.startsWith(hiveQueryId) && !fileName.startsWith("gc.log")) { + iter.remove(); + } + } + } + + // llapdeamon-history.log_2017-11-25_1.done + private static final Pattern llapDaemonHistoryLogPattern = + Pattern.compile("log_(\\d+)-(\\d+)-(\\d+)_\\d+"); + private long getLlapHistoryStartTime(String fileName) { + long startTime = 0; + Matcher matcher = llapDaemonHistoryLogPattern.matcher(fileName); + if (matcher.find()) { + int year = Integer.parseInt(matcher.group(1)); + int month = Integer.parseInt(matcher.group(2)); + int date = Integer.parseInt(matcher.group(3)); + startTime = new Calendar.Builder().setDate(year, month - 1, date).build() + .getTimeInMillis(); + } else if (fileName.endsWith(".log")) { + Calendar cal = Calendar.getInstance(); + cal.set(Calendar.HOUR_OF_DAY, 0); + cal.set(Calendar.MINUTE, 0); + cal.set(Calendar.SECOND, 0); + cal.set(Calendar.MILLISECOND, 0); + startTime = cal.getTimeInMillis(); + } + return startTime; + } + + // LLAP Deamon log: llap-deamon--.log_2017-11-25-00_1.done + private static final Pattern llapDaemonLogPattern = + Pattern.compile("log_(\\d+)-(\\d+)-(\\d+)-(\\d+)_\\d+"); + private long getLlapLogsStartTime(String fileName) { + long startTime = 0; + Matcher matcher = llapDaemonLogPattern.matcher(fileName); + if (matcher.find()) { + int year = Integer.parseInt(matcher.group(1)); + int month = Integer.parseInt(matcher.group(2)); + int date = Integer.parseInt(matcher.group(3)); + int hour = Integer.parseInt(matcher.group(4)); + startTime = new Calendar.Builder().setDate(year, month - 1, date) + .setTimeOfDay(hour, 0, 0).build().getTimeInMillis(); + } else if (fileName.endsWith(".log")) { + Calendar cal = Calendar.getInstance(); + cal.set(Calendar.MINUTE, 0); + cal.set(Calendar.SECOND, 0); + cal.set(Calendar.MILLISECOND, 0); + startTime = cal.getTimeInMillis(); + } + return startTime; + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMInfoArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMInfoArtifacts.java new file mode 100644 index 0000000000..36a1ff0251 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMInfoArtifacts.java @@ -0,0 +1,30 @@ +package org.apache.hive.tez.tools.debug.source; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.AppLogs; + +import com.google.inject.Inject; + +public class SliderAMInfoArtifacts extends AMInfoArtifacts { + + @Inject + public SliderAMInfoArtifacts(AMArtifactsHelper helper) { + super(helper); + } + + @Override + public String getArtifactName() { + return "SLIDER_AM/INFO"; + } + + @Override + public String getAmId(Params params) { + return params.getSliderAppId(); + } + + @Override + public AppLogs getAMAppLogs(Params params) { + return params.getSliderAmLogs(); + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMLogsArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMLogsArtifacts.java new file mode 100644 index 0000000000..28ac236e63 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMLogsArtifacts.java @@ -0,0 +1,37 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.nio.file.Path; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; + +import com.google.inject.Inject; + +public class SliderAMLogsArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public SliderAMLogsArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getSliderAmLogs().isFinishedLogs(); + } + + @Override + public List getArtifacts(Params params) { + return params.getSliderAmLogs().getLogArtifacts(helper, "SLIDER_AM/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMLogsListArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMLogsListArtifacts.java new file mode 100644 index 0000000000..8f447c7d5c --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderAMLogsListArtifacts.java @@ -0,0 +1,55 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogsInfo; + +import com.google.inject.Inject; + +public class SliderAMLogsListArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public SliderAMLogsListArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getSliderAmLogs().isFinishedContainers(); + } + + @Override + public List getArtifacts(Params params) { + return params.getSliderAmLogs().getLogListArtifacts(helper, "SLIDER_AM/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + List logsInfoList; + try { + logsInfoList = helper.parseContainerLogs(path); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + if (logsInfoList != null) { + for (ContainerLogsInfo logsInfo : logsInfoList) { + // filterLogs(logsInfo.containerLogInfo, params); + params.getSliderAmLogs().addLog(logsInfo.nodeId, logsInfo.containerId, + logsInfo.containerLogInfo); + } + // This is not correct, but we have no way to tell all the logs have downloaded + params.getSliderAmLogs().finishLogs(); + } + } + +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderInstanceJmx.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderInstanceJmx.java new file mode 100644 index 0000000000..472295fadb --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderInstanceJmx.java @@ -0,0 +1,54 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hive.tez.tools.debug.HttpArtifact; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.http.client.HttpClient; +import org.apache.http.client.utils.URIBuilder; + +import com.google.inject.Inject; + +public class SliderInstanceJmx implements ArtifactSource { + + private final HttpClient client; + + @Inject + public SliderInstanceJmx(HttpClient client) { + this.client = client; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getSliderInstanceUrls() != null; + } + + @Override + public List getArtifacts(Params params) { + List artifacts = new ArrayList<>(); + for (String url : params.getSliderInstanceUrls()) { + try { + URIBuilder builder = new URIBuilder(url); + builder.setPath(builder.getPath() + "/jmx"); + artifacts.add(new HttpArtifact(client, + "SLIDER_AM/" + builder.getHost() + ":" + builder.getPort() + "/jmx", + builder.build().toString(), false)); + } catch (URISyntaxException e) { + // Return this to user. + e.printStackTrace(); + } + } + return artifacts; + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderInstanceStack.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderInstanceStack.java new file mode 100644 index 0000000000..72e0316f9b --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderInstanceStack.java @@ -0,0 +1,54 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hive.tez.tools.debug.HttpArtifact; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.http.client.HttpClient; +import org.apache.http.client.utils.URIBuilder; + +import com.google.inject.Inject; + +public class SliderInstanceStack implements ArtifactSource { + + private final HttpClient client; + + @Inject + public SliderInstanceStack(HttpClient client) { + this.client = client; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getSliderInstanceUrls() != null; + } + + @Override + public List getArtifacts(Params params) { + List artifacts = new ArrayList<>(); + for (String url : params.getSliderInstanceUrls()) { + try { + URIBuilder builder = new URIBuilder(url); + builder.setPath(builder.getPath() + "/stacks"); + artifacts.add(new HttpArtifact(client, + "SLIDER_AM/" + builder.getHost() + ":" + builder.getPort() + "/stacks", + builder.build().toString(), false)); + } catch (URISyntaxException e) { + // Return this to user. + e.printStackTrace(); + } + } + return artifacts; + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderStatusArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderStatusArtifacts.java new file mode 100644 index 0000000000..9426e202b0 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/SliderStatusArtifacts.java @@ -0,0 +1,114 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.PushbackReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.llap.cli.LlapStatusOptionsProcessor; +import org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; +import org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver.ExitCode; +import org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver.LlapStatusCliException; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.map.ObjectMapper; + +import com.google.inject.Inject; + +public class SliderStatusArtifacts implements ArtifactSource { + + private final ObjectMapper mapper; + + @Inject + public SliderStatusArtifacts() { + this.mapper = new ObjectMapper(); + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getAppType() != null && params.getAppType().equals("LLAP"); + } + + @Override + public List getArtifacts(Params params) { + return Collections.singletonList(new Artifact() { + @Override + public boolean isTemp() { + return false; + } + + @Override + public String getName() { + return "SLIDER_AM/STATUS"; + } + + @Override + public void downloadInto(Path path) throws ArtifactDownloadException { + // Run the driver and get the status. + LlapStatusServiceDriver serviceDriver = new LlapStatusServiceDriver(); + int ret = serviceDriver.run(new LlapStatusOptionsProcessor.LlapStatusOptions(null), 0); + if (ret == ExitCode.SUCCESS.getInt()) { + try { + PrintWriter writer = new PrintWriter(Files.newBufferedWriter(path)); + serviceDriver.outputJson(writer); + writer.flush(); + writer.close(); + } catch (IOException | LlapStatusCliException e) { + throw new ArtifactDownloadException("Error trying to serialize status.", e); + } + } else { + throw new ArtifactDownloadException("Error trying to fetch status, ret code: " + ret); + } + } + }); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + JsonNode tree; + try { + PushbackReader reader = new PushbackReader(Files.newBufferedReader(path)); + for (;;) { + int ch = reader.read(); + if (ch < 0) { + reader.close(); + return; + } + if (ch == '{') { + reader.unread(ch); + break; + } + } + tree = mapper.readTree(reader); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + if (tree == null) { + return; + } + String sliderAppId = tree.path("amInfo").path("appId").asText(); + if (!sliderAppId.isEmpty()) { + params.setSliderAppId(sliderAppId); + } + JsonNode instances = tree.path("runningInstances"); + if (instances.isArray()) { + Set inst = new HashSet<>(); + for (int i = 0; i < instances.size(); ++i) { + String nodeUrl = instances.path(i).path("webUrl").asText(); + if (!nodeUrl.isEmpty()) { + inst.add(nodeUrl); + } + } + params.setSliderInstanceUrls(inst); + } + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMInfoArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMInfoArtifacts.java new file mode 100644 index 0000000000..1800e110db --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMInfoArtifacts.java @@ -0,0 +1,30 @@ +package org.apache.hive.tez.tools.debug.source; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.AppLogs; + +import com.google.inject.Inject; + +public class TezAMInfoArtifacts extends AMInfoArtifacts { + + @Inject + public TezAMInfoArtifacts(AMArtifactsHelper helper) { + super(helper); + } + + @Override + public String getArtifactName() { + return "TEZ_AM/INFO"; + } + + @Override + public String getAmId(Params params) { + return params.getTezAmAppId(); + } + + @Override + public AppLogs getAMAppLogs(Params params) { + return params.getTezAmLogs(); + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMLogsArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMLogsArtifacts.java new file mode 100644 index 0000000000..75f8977ab9 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMLogsArtifacts.java @@ -0,0 +1,37 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.nio.file.Path; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; + +import com.google.inject.Inject; + +public class TezAMLogsArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public TezAMLogsArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getTezAmLogs().isFinishedLogs(); + } + + @Override + public List getArtifacts(Params params) { + return params.getTezAmLogs().getLogArtifacts(helper, "TEZ_AM/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMLogsListArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMLogsListArtifacts.java new file mode 100644 index 0000000000..47f8f431db --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezAMLogsListArtifacts.java @@ -0,0 +1,69 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogInfo; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogsInfo; + +import com.google.inject.Inject; + +public class TezAMLogsListArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public TezAMLogsListArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getTezAmLogs().isFinishedContainers(); + } + + @Override + public List getArtifacts(Params params) { + return params.getTezAmLogs().getLogListArtifacts(helper, "TEZ_AM/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + List logsInfoList; + try { + logsInfoList = helper.parseContainerLogs(path); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + if (logsInfoList != null) { + for (ContainerLogsInfo logsInfo : logsInfoList) { + filterLogs(logsInfo.containerLogInfo, params.getTezDagId()); + params.getTezAmLogs().addLog(logsInfo.nodeId, logsInfo.containerId, + logsInfo.containerLogInfo); + } + // This is not correct, but we have no way to tell all the logs have downloaded + params.getTezAmLogs().finishLogs(); + } + } + + private static Pattern dagPattern = Pattern.compile("dag_\\d+_\\d+_\\d+"); + private void filterLogs(List logs, String dagId) { + Iterator iter = logs.iterator(); + while (iter.hasNext()) { + Matcher matcher = dagPattern.matcher(iter.next().fileName); + if (matcher.find() && !matcher.group().equals(dagId)) { + iter.remove(); + } + } + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezATSArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezATSArtifacts.java new file mode 100644 index 0000000000..97caef0880 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezATSArtifacts.java @@ -0,0 +1,144 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hive.tez.tools.debug.ATSArtifactHelper; +import org.apache.hive.tez.tools.debug.ATSArtifactHelper.ATSEvent; +import org.apache.hive.tez.tools.debug.ATSArtifactHelper.ATSLog; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.AppLogs; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.map.DeserializationConfig.Feature; +import org.codehaus.jackson.map.ObjectMapper; + +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; + +public class TezATSArtifacts implements ArtifactSource { + + private final ATSArtifactHelper helper; + private final ObjectMapper mapper; + private static final Pattern logsPattern = Pattern.compile( + "^.*applicationhistory/containers/(.*?)/logs.*\\?nm.id=(.+:[\\d+]+).*$"); + + @Inject + public TezATSArtifacts(ATSArtifactHelper helper) { + this.helper = helper; + this.mapper = new ObjectMapper(); + mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } + + @Override + public List getArtifacts(Params params) { + String dagId = params.getTezDagId(); + return ImmutableList.of( + helper.getEntityArtifact("TEZ_ATS/DAG", "TEZ_DAG_ID", dagId), + helper.getEntityArtifact("TEZ_ATS/DAG_EXTRAINFO", "TEZ_DAG_EXTRA_INFO", dagId), + helper.getChildEntityArtifact("TEZ_ATS/VERTEX", "TEZ_VERTEX_ID", "TEZ_DAG_ID", dagId), + helper.getChildEntityArtifact("TEZ_ATS/TASK", "TEZ_TASK_ID", "TEZ_DAG_ID", dagId), + helper.getChildEntityArtifact("TEZ_ATS/TASK_ATTEMPT", "TEZ_TASK_ATTEMPT_ID", + "TEZ_DAG_ID", dagId)); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + try { + if (artifact.getName().equals("TEZ_ATS/DAG")) { + extractDagData(params, path); + } + if (artifact.getName().equals("TEZ_ATS/TASK_ATTEMPT")) { + extractTaskContainers(params, path); + } + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + } + + private void extractTaskContainers(Params params, Path path) throws IOException { + AppLogs appLogs = params.getTezTaskLogs(); + if (appLogs.isFinishedContainers()) { + return; + } + InputStream stream = Files.newInputStream(path); + JsonNode node = mapper.readTree(stream); + if (node == null || !node.isObject()) { + return; + } + node = node.get("entities"); + if (node == null || !node.isArray()) { + return; + } + for (int i = 0; i < node.size(); ++i) { + JsonNode entity = node.get(i).path("otherinfo"); + // TODO(hjp): Check if there is inProgressLogsURL and try this out. + String logsUrl = entity.path("completedLogsURL").asText(); + if (logsUrl == null || logsUrl.isEmpty()) { + continue; + } + Matcher matcher = logsPattern.matcher(logsUrl); + if (matcher.matches()) { + String containerId = matcher.group(1); + String nodeId = matcher.group(2); + appLogs.addContainer(nodeId, containerId); + } else { + String containerId = entity.path("containerId").asText(); + String nodeId = entity.path("nodeId").asText(); + if (!containerId.isEmpty() && !nodeId.isEmpty()) { + appLogs.addContainer(nodeId, containerId); + } + } + } + appLogs.finishContainers(); + } + + private void extractDagData(Params params, Path path) throws IOException { + InputStream stream = Files.newInputStream(path); + JsonNode node = mapper.readTree(stream); + if (node == null) { + return; + } + JsonNode other = node.get("otherinfo"); + if (other == null) { + return; + } + // Get and update dag id/hive query id. + if (params.getTezAmAppId() == null) { + String appId = other.path("applicationId").asText(); + if (appId != null && !appId.isEmpty()) { + params.setTezAmAppId(appId); + } + } + if (params.getHiveQueryId() == null) { + String callerType = other.path("callerType").asText(); + String callerId = other.path("callerId").asText(); + if (!callerType.isEmpty() && !callerId.isEmpty() && callerType.equals("HIVE_QUERY_ID")) { + params.setHiveQueryId(callerId); + } + } + ATSLog log = mapper.treeToValue(node, ATSLog.class); + for (ATSEvent event : log.events) { + if (event.eventtype != null) { + if (event.eventtype.equals("DAG_SUBMITTED")) { + params.updateStartTime(event.timestamp); + } else if (event.eventtype.equals("DAG_FINISHED")) { + params.updateEndTime(event.timestamp); + } + } + } + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getTezDagId() != null; + } +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezTasksLogsArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezTasksLogsArtifacts.java new file mode 100644 index 0000000000..eeee017456 --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezTasksLogsArtifacts.java @@ -0,0 +1,39 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.nio.file.Path; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; + +import com.google.inject.Inject; + +public class TezTasksLogsArtifacts implements ArtifactSource { + + private final AMArtifactsHelper helper; + + @Inject + public TezTasksLogsArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getAppType() != null && params.getAppType().equals("TEZ") && + params.getTezTaskLogs().isFinishedLogs(); + } + + @Override + public List getArtifacts(Params params) { + return params.getTezTaskLogs().getLogArtifacts(helper, "TEZ_TASKS/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + } + +} diff --git llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezTasksLogsListArtifacts.java llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezTasksLogsListArtifacts.java new file mode 100644 index 0000000000..dbc44ee7bc --- /dev/null +++ llap-debug/src/main/java/org/apache/hive/tez/tools/debug/source/TezTasksLogsListArtifacts.java @@ -0,0 +1,68 @@ +package org.apache.hive.tez.tools.debug.source; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.List; + +import org.apache.hive.tez.tools.debug.AMArtifactsHelper; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.ArtifactDownloadException; +import org.apache.hive.tez.tools.debug.framework.ArtifactSource; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogInfo; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogsInfo; + +import com.google.inject.Inject; + +public class TezTasksLogsListArtifacts implements ArtifactSource { + private final AMArtifactsHelper helper; + + @Inject + public TezTasksLogsListArtifacts(AMArtifactsHelper helper) { + this.helper = helper; + } + + @Override + public boolean hasRequiredParams(Params params) { + return params.getAppType() != null && params.getAppType().equals("TEZ") && + params.getTezTaskLogs().isFinishedContainers(); + } + + @Override + public List getArtifacts(Params params) { + return params.getTezTaskLogs().getLogListArtifacts(helper, "TEZ_TASKS/LOGS"); + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + List logsInfoList; + try { + logsInfoList = helper.parseContainerLogs(path); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + if (logsInfoList != null) { + for (ContainerLogsInfo logsInfo : logsInfoList) { + filterLogs(logsInfo.containerLogInfo, params); + params.getTezTaskLogs().addLog( + logsInfo.nodeId, logsInfo.containerId, logsInfo.containerLogInfo); + } + // This is not correct, but we have no way to tell all the logs have downloaded + params.getTezTaskLogs().finishLogs(); + } + } + + private void filterLogs(List containerLogInfo, Params params) { + String syslogPrefix = "syslog_attempt_" + params.getTezDagId().substring(4) + "_"; + Iterator iter = containerLogInfo.iterator(); + while (iter.hasNext()) { + String fileName = iter.next().fileName; + if (fileName.startsWith("syslog_attempt_") && !fileName.startsWith(syslogPrefix)) { + iter.remove(); + } + } + } + +} diff --git llap-debug/src/test/java/org/apache/hive/tez/tools/debug/framework/TestArtifactAggregator.java llap-debug/src/test/java/org/apache/hive/tez/tools/debug/framework/TestArtifactAggregator.java new file mode 100644 index 0000000000..ea2451d7c3 --- /dev/null +++ llap-debug/src/test/java/org/apache/hive/tez/tools/debug/framework/TestArtifactAggregator.java @@ -0,0 +1,254 @@ +package org.apache.hive.tez.tools.debug.framework; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.map.ObjectMapper; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.google.inject.Injector; + +public class TestArtifactAggregator { + private ExecutorService service; + private Configuration conf; + private Params params; + private File file; + private ZipFile zipFile; + + @Before + public void setup() throws Exception { + service = Executors.newSingleThreadExecutor(); + conf = new Configuration(false); + params = new Params(); + file = File.createTempFile("test-tmp", ".zip"); + file.delete(); + } + + @After + public void teardown() throws Exception { + service.shutdownNow(); + if (zipFile != null) { + zipFile.close(); + } + if (file != null) { + file.delete(); + } + } + + private void run(ArtifactSource ... sources) throws Exception { + ArrayList creators = new ArrayList<>(sources.length); + for (ArtifactSource source : sources) { + creators.add(new TestSourceCreator<>(source)); + } + try (ArtifactAggregator aggregator = new ArtifactAggregator(conf, service, params, + file.getAbsolutePath(), creators)) { + aggregator.aggregate(); + } + zipFile = new ZipFile(file); + } + + @Test + public void testEmptySources() throws Exception { + run(); + assertNotNull(zipFile); + assertEquals(0, zipFile.size()); + } + + @Test + public void testSingleSourceSingleArtifactSuccess() throws Exception { + TestArtifactSource source = new TestArtifactSource() + .setRequiredParams(true) + .addArtifact(new TestArtifact("Artifact1", "Content1", false)); + run(source); + assertEquals(1, zipFile.size()); + assertEquals("Content1", readEntry("Artifact1")); + } + + @Test + public void testSingleSourceSingleArtifactFailure() throws Exception { + TestArtifactSource source = new TestArtifactSource() + .setRequiredParams(true) + .addArtifact(new TestArtifact("Test1", new ArtifactDownloadException("Err1"), false)); + run(source); + assertEquals(1, zipFile.size()); + JsonNode node = new ObjectMapper().readTree(readEntry("ERRORS")); + assertError(node, "Test1", "Err1"); + } + + @Test + public void testSingleSourceMulipleArtifactsSuccess() throws Exception { + TestArtifactSource source = new TestArtifactSource() + .setRequiredParams(true) + .addArtifact(new TestArtifact("Test1", "Content1", false)) + .addArtifact(new TestArtifact("Test2", "Content2", false)); + run(source); + assertEquals(2, zipFile.size()); + assertEquals("Content1", readEntry("Test1")); + assertEquals("Content2", readEntry("Test2")); + } + + @Test + public void testMultipleSourceChainingSuccess() throws Exception { + TestArtifactSource source2 = new TestArtifactSource() + .addArtifact(new TestArtifact("Test2", "Content2", false)); + TestArtifactSource source1 = new TestArtifactSource() + .setRequiredParams(true) + .addArtifact(new TestArtifact("Test1", "Content1", false)) + .addSource(source2); + run(source1, source2); + assertEquals(2, zipFile.size()); + assertEquals("Content1", readEntry("Test1")); + assertEquals("Content2", readEntry("Test2")); + } + + @Test + public void testAll() throws Exception { + TestArtifactSource source4 = new TestArtifactSource() + .addArtifact(new TestArtifact("Test7", "Content7", false)); + TestArtifactSource source3 = new TestArtifactSource() + .addArtifact(new TestArtifact("Test6", new ArtifactDownloadException("Err6"), false)) + .addSource(source4); + TestArtifactSource source2 = new TestArtifactSource() + .addArtifact(new TestArtifact("Test5", "Content5", false)); + TestArtifactSource source1 = new TestArtifactSource() + .setRequiredParams(true) + .addArtifact(new TestArtifact("Test1", "Content1", false)) + .addArtifact(new TestArtifact("Test2", "Content2", true)) + .addArtifact(new TestArtifact("Test3", new ArtifactDownloadException("Err3"), false)) + .addArtifact(new TestArtifact("Test4", new ArtifactDownloadException("Err4"), true)) + .addSource(source2) + .addSource(source3); + run(source4, source1, source2, source3); + + assertEquals(3, zipFile.size()); + JsonNode node = new ObjectMapper().readTree(readEntry("ERRORS")); + + assertEquals("Content1", readEntry("Test1")); + assertNull(zipFile.getEntry("Test2")); + assertError(node, "Test3", "Err3"); + assertError(node, "Test4", "Err4"); + assertEquals("Content5", readEntry("Test5")); + assertError(node, "Test6", "Err6"); + assertNull(zipFile.getEntry("Test7")); + } + + private void assertError(JsonNode node, String name, String err) { + assertTrue(node.path(name).asText().contains(err)); + } + + private String readEntry(String entryName) throws IOException { + ZipEntry entry = zipFile.getEntry(entryName); + assertNotNull(entry); + byte[] buffer = new byte[(int) entry.getSize()]; + IOUtils.readFully(zipFile.getInputStream(entry), buffer); + return new String(buffer); + } + + private static class TestSourceCreator + implements ArtifactSourceCreator { + final ArtifactSource source; + + TestSourceCreator(ArtifactSource source) { + this.source = source; + } + + @Override + public ArtifactSource getSource(Injector injector) { + assertNotNull(injector); + return source; + } + } + + private static class TestArtifact implements Artifact { + private boolean isTemp; + private Object content; + private String name; + + TestArtifact(String name, Object content, boolean isTemp) { + this.name = name; + this.content = content; + this.isTemp = isTemp; + } + + @Override + public String getName() { + return name; + } + + @Override + public void downloadInto(Path path) throws ArtifactDownloadException { + if (content instanceof ArtifactDownloadException) { + throw (ArtifactDownloadException)content; + } + try { + Files.write(path, content.toString().getBytes(), StandardOpenOption.CREATE_NEW, + StandardOpenOption.WRITE); + } catch (IOException e) { + throw new ArtifactDownloadException(e); + } + } + + @Override + public boolean isTemp() { + return isTemp; + } + } + + private static class TestArtifactSource implements ArtifactSource { + private boolean hasRequiredParams = false; + private List artifacts = new ArrayList<>(); + private List sources = new ArrayList<>(); + + public TestArtifactSource setRequiredParams(boolean val) { + hasRequiredParams = val; + return this; + } + + public TestArtifactSource addArtifact(Artifact artifact) { + artifacts.add(artifact); + return this; + } + + public TestArtifactSource addSource(TestArtifactSource source) { + sources.add(source); + return this; + } + + @Override + public boolean hasRequiredParams(Params params) { + return hasRequiredParams; + } + + @Override + public List getArtifacts(Params params) { + return artifacts; + } + + @Override + public void updateParams(Params params, Artifact artifact, Path path) + throws ArtifactDownloadException { + for (TestArtifactSource source : sources) { + source.setRequiredParams(true); + } + } + } +} diff --git llap-debug/src/test/java/org/apache/hive/tez/tools/debug/framework/TestArtifactSourceType.java llap-debug/src/test/java/org/apache/hive/tez/tools/debug/framework/TestArtifactSourceType.java new file mode 100644 index 0000000000..e82a0c2d00 --- /dev/null +++ llap-debug/src/test/java/org/apache/hive/tez/tools/debug/framework/TestArtifactSourceType.java @@ -0,0 +1,33 @@ +package org.apache.hive.tez.tools.debug.framework; + +import static org.junit.Assert.assertNotNull; + +import org.apache.hadoop.conf.Configuration; +import org.apache.http.client.HttpClient; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.junit.Test; + +import com.google.inject.Binder; +import com.google.inject.Guice; +import com.google.inject.Injector; +import com.google.inject.Module; + +public class TestArtifactSourceType { + + @Test + public void testCreateAllTypes() { + CloseableHttpClient httpClient = HttpClients.createDefault(); + Configuration conf = new Configuration(false); + Injector injector = Guice.createInjector(new Module() { + @Override + public void configure(Binder binder) { + binder.bind(HttpClient.class).toInstance(httpClient); + binder.bind(Configuration.class).toInstance(conf); + } + }); + for (ArtifactSourceType type : ArtifactSourceType.values()) { + assertNotNull(type.getSource(injector)); + } + } +} diff --git llap-debug/src/test/java/org/apache/hive/tez/tools/debug/source/TestArtifacts.java llap-debug/src/test/java/org/apache/hive/tez/tools/debug/source/TestArtifacts.java new file mode 100644 index 0000000000..9d8080256d --- /dev/null +++ llap-debug/src/test/java/org/apache/hive/tez/tools/debug/source/TestArtifacts.java @@ -0,0 +1,193 @@ +package org.apache.hive.tez.tools.debug.source; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.tez.tools.debug.framework.Artifact; +import org.apache.hive.tez.tools.debug.framework.Params; +import org.apache.hive.tez.tools.debug.framework.Params.AppLogs; +import org.apache.hive.tez.tools.debug.framework.Params.ContainerLogInfo; +import org.apache.http.client.HttpClient; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.junit.After; +import org.junit.Test; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.inject.Binder; +import com.google.inject.Guice; +import com.google.inject.Injector; +import com.google.inject.Module; + +public class TestArtifacts { + private final Injector injector; + private final List tempFiles; + + public TestArtifacts() { + CloseableHttpClient httpClient = HttpClients.createDefault(); + Configuration conf = new Configuration(false); + injector = Guice.createInjector(new Module() { + @Override + public void configure(Binder binder) { + binder.bind(HttpClient.class).toInstance(httpClient); + binder.bind(Configuration.class).toInstance(conf); + } + }); + tempFiles = new ArrayList<>(); + } + + @After + public void teardown() throws Exception { + for (Path path : tempFiles) { + Files.delete(path); + } + tempFiles.clear(); + } + + private void checkArtifacts(List artifacts, String ... names) { + Set pending = Sets.newHashSet(names); + for (Artifact artifact : artifacts) { + assertTrue("Unexpected artifact: " + artifact.getName(), pending.remove(artifact.getName())); + } + assertTrue("Artifact(s) expected but not found: " + pending, pending.isEmpty()); + } + + private Artifact findArtifact(List artifacts, String name) { + for (Artifact artifact : artifacts) { + if (artifact.getName().equals(name)) { + return artifact; + } + } + return null; + } + + private Path getTempPath(String content) throws IOException { + Path path = Files.createTempFile("Test-tmp", ".tmp"); + tempFiles.add(path); + Files.copy(new ByteArrayInputStream(content.getBytes()), path, + StandardCopyOption.REPLACE_EXISTING); + return path; + } + + @Test + public void testHiveATSArtifacts() throws Exception { + HiveATSArtifacts source = injector.getInstance(HiveATSArtifacts.class); + Params params = new Params(); + + assertFalse(source.hasRequiredParams(params)); + + params.setHiveQueryId("test-hive-queryid"); + assertTrue(source.hasRequiredParams(params)); + + List artifacts = source.getArtifacts(params); + checkArtifacts(artifacts, "HIVE_QUERY"); + + Path path = getTempPath( + "{\"entitytype\":\"HIVE_QUERY_ID\",\"entity\":\"hive-query-test-1\",\"starttime\":" + + "1498800924974,\"primaryfilters\":{\"executionmode\":[\"LLAP\"]},\"otherinfo\":" + + "{\"APP_ID\":\"application_test_1\",\"DAG_ID\":\"dag_test_1_1\"},\"events\":" + + "[{\"timestamp\":1498800925974,\"eventtype\":\"QUERY_SUBMITTED\"},{\"timestamp\":" + + "1498800927974,\"eventtype\":\"QUERY_COMPLETED\"}]}"); + source.updateParams(params, artifacts.get(0), path); + + assertEquals("dag_test_1_1", params.getTezDagId()); + assertEquals("application_test_1", params.getTezAmAppId()); + assertEquals("LLAP", params.getAppType()); + assertEquals(1498800925974L, params.getStartTime()); + assertEquals(1498800927974L, params.getEndTime()); + } + + @Test + public void testTezATSArtifacts() throws Exception { + TezATSArtifacts source = injector.getInstance(TezATSArtifacts.class); + Params params = new Params(); + + assertFalse(source.hasRequiredParams(params)); + + params.setTezDagId("test-tez-dag-id"); + assertTrue(source.hasRequiredParams(params)); + + List artifacts = source.getArtifacts(params); + checkArtifacts(artifacts, "TEZ_ATS/DAG", "TEZ_ATS/DAG_EXTRAINFO", "TEZ_ATS/VERTEX", + "TEZ_ATS/TASK", "TEZ_ATS/TASK_ATTEMPT"); + + Path path = getTempPath( + "{\"entitytype\":\"TEZ_DAG_ID\",\"entity\":\"dag_test_1_1\",\"starttime\":1498800924974," + + "\"otherinfo\":{\"applicationId\":\"app_test_1\",\"callerType\":\"HIVE_QUERY_ID\"," + + "\"callerId\":\"query_test_1\"},\"events\":[{\"timestamp\":1498800925974,\"eventtype\":" + + "\"DAG_SUBMITTED\"},{\"timestamp\":1498800927974,\"eventtype\":\"DAG_FINISHED\"}]}"); + source.updateParams(params, findArtifact(artifacts, "TEZ_ATS/DAG"), path); + + assertEquals("app_test_1", params.getTezAmAppId()); + assertEquals("query_test_1", params.getHiveQueryId()); + assertEquals(1498800925974L, params.getStartTime()); + assertEquals(1498800927974L, params.getEndTime()); + + assertFalse(params.getTezTaskLogs().isFinishedContainers()); + path = getTempPath("{\"entities\":[{\"entitytype\":\"TEZ_TASK_ATTEMPT_ID\"," + + "\"entity\":\"attempt_1_1_2\",\"starttime\":1498800924974,\"otherinfo\":" + + "{\"completedLogsURL\":\"http://host/path/applicationhistory/containers/" + + "container_1/logs\\\\?nm.id=node_1:1234\"}},{\"entitytype\":\"TEZ_TASK_ATTEMPT_" + + "ID\",\"entity\":\"attempt_1_1_3\",\"starttime\":1498800924974,\"otherinfo\":" + + "{\"containerId\":\"container_2\",\"nodeId\":\"node_2:3456\"}}]}"); + source.updateParams(params, findArtifact(artifacts, "TEZ_ATS/TASK_ATTEMPT"), path); + assertTrue(params.getTezTaskLogs().isFinishedContainers()); + } + + @Test + public void testLlapDeamonLogsListArtifacts() throws Exception { + LlapDeamonLogsListArtifacts source = injector.getInstance( + LlapDeamonLogsListArtifacts.class); + Params params = new Params(); + params.setHiveQueryId("hqid-1"); + + assertFalse(source.hasRequiredParams(params)); + params.setAppType("LLAP"); + AppLogs taskLogs = params.getTezTaskLogs(); + taskLogs.addContainer("test-node-id-1:8888", "test-container-id-1"); + taskLogs.addContainer("test-node-id-1:8888", "test-container-id-2"); + taskLogs.addContainer("test-node-id-2:8888", "test-container-id-3"); + taskLogs.finishContainers(); + + assertTrue(source.hasRequiredParams(params)); + + List artifacts = source.getArtifacts(params); + assertEquals(3, artifacts.size()); + + Path path = getTempPath("{\"containerLogsInfo\":{\"containerLogInfo\":[{\"fileName\":\"launch_container.sh\",\"fileSize\":\"4245\",\"lastModifiedTime\":\"Tue Jul 04 05:51:44 +0000 2017\"},{\"fileName\":\"directory.info\",\"fileSize\":\"10843\",\"lastModifiedTime\":\"Tue Jul 04 05:51:44 +0000 2017\"},{\"fileName\":\"slider-agent.out\",\"fileSize\":\"45\",\"lastModifiedTime\":\"Tue Jul 04 05:51:45 +0000 2017\"},{\"fileName\":\"slider-agent.log\",\"fileSize\":\"29998\",\"lastModifiedTime\":\"Tue Jul 04 05:52:02 +0000 2017\"},{\"fileName\":\"command-1.json\",\"fileSize\":\"6974\",\"lastModifiedTime\":\"Tue Jul 04 05:51:56 +0000 2017\"},{\"fileName\":\"output-1.txt\",\"fileSize\":\"1656\",\"lastModifiedTime\":\"Tue Jul 04 05:51:56 +0000 2017\"},{\"fileName\":\"errors-1.txt\",\"fileSize\":\"0\",\"lastModifiedTime\":\"Tue Jul 04 05:51:56 +0000 2017\"},{\"fileName\":\"command-2.json\",\"fileSize\":\"6937\",\"lastModifiedTime\":\"Tue Jul 04 05:52:00 +0000 2017\"},{\"fileName\":\"output-2.txt\",\"fileSize\":\"162\",\"lastModifiedTime\":\"Tue Jul 04 05:52:00 +0000 2017\"},{\"fileName\":\"errors-2.txt\",\"fileSize\":\"0\",\"lastModifiedTime\":\"Tue Jul 04 05:52:00 +0000 2017\"},{\"fileName\":\"shell.out\",\"fileSize\":\"2999\",\"lastModifiedTime\":\"Tue Jul 04 05:52:00 +0000 2017\"},{\"fileName\":\"llap-daemon-hive-ctr-e133-1493418528701-155152-01-000004.hwx.site.out\",\"fileSize\":\"14021\",\"lastModifiedTime\":\"Tue Jul 04 06:10:26 +0000 2017\"},{\"fileName\":\"status_command_stdout.txt\",\"fileSize\":\"0\",\"lastModifiedTime\":\"Tue Jul 04 06:41:27 +0000 2017\"},{\"fileName\":\"status_command_stderr.txt\",\"fileSize\":\"0\",\"lastModifiedTime\":\"Tue Jul 04 06:41:27 +0000 2017\"},{\"fileName\":\"gc.log.0.current\",\"fileSize\":\"8559\",\"lastModifiedTime\":\"Tue Jul 04 06:09:46 +0000 2017\"},{\"fileName\":\"llapdaemon_history.log\",\"fileSize\":\"2132\",\"lastModifiedTime\":\"Tue Jul 04 06:10:26 +0000 2017\"},{\"fileName\":\"llap-daemon-hive-ctr-e133-1493418528701-155152-01-000004.hwx.site.log_2017-07-04-05_1.done\",\"fileSize\":\"41968\",\"lastModifiedTime\":\"Tue Jul 04 05:53:13 +0000 2017\"},{\"fileName\":\"llap-daemon-hive-ctr-e133-1493418528701-155152-01-000004.hwx.site.log\",\"fileSize\":\"16357\",\"lastModifiedTime\":\"Tue Jul 04 06:10:26 +0000 2017\"},{\"fileName\":\"hive_20170704060941_28dd6d01-2d6c-46f8-964b-e57f26e720ff-dag_1499147207464_0004_1.log.done\",\"fileSize\":\"16967\",\"lastModifiedTime\":\"Tue Jul 04 06:09:47 +0000 2017\"},{\"fileName\":\"hive_20170704061024_bf72f7f6-5684-4e16-95ff-8725e7eba4cf-dag_1499147207464_0004_2.log.done\",\"fileSize\":\"34530\",\"lastModifiedTime\":\"Tue Jul 04 06:10:26 +0000 2017\"},{\"fileName\":\"hive_20170704061024_bf72f7f6-5684-4e16-95ff-8725e7eba4cf-dag_1499147207464_0004_2.log\",\"fileSize\":\"176\",\"lastModifiedTime\":\"Tue Jul 04 06:10:26 +0000 2017\"},{\"fileName\":\"status_command.json\",\"fileSize\":\"2896\",\"lastModifiedTime\":\"Tue Jul 04 06:41:27 +0000 2017\"}],\"logAggregationType\":\"LOCAL\",\"containerId\":\"container_1499147207464_0003_01_000002\",\"nodeId\":\"ctr-e133-1493418528701-155152-01-000004.hwx.site:45454\"}}"); + source.updateParams(params, artifacts.get(0), path); + AppLogs logs = params.getTezTaskLogs(); + assertTrue(logs.isFinishedLogs()); + } + + @Test + public void testLlapDeamonLogsArtifacts() throws Exception { + LlapDeamonLogsArtifacts source = injector.getInstance(LlapDeamonLogsArtifacts.class); + Params params = new Params(); + + assertFalse(source.hasRequiredParams(params)); + params.setAppType("LLAP"); + AppLogs taskLogs = params.getTezTaskLogs(); + taskLogs.addLog("test-node-id-1:8888", "test-container-id-1", + Lists.newArrayList(new ContainerLogInfo("test-1", 10, "01-01-2017"))); + taskLogs.addLog("test-node-id-2:8888", "test-container-id-2", + Lists.newArrayList(new ContainerLogInfo("test-2", 10, "01-01-2017"), + new ContainerLogInfo("test-3", 10, "01-01-2017"))); + taskLogs.finishLogs(); + + assertTrue(source.hasRequiredParams(params)); + + List artifacts = source.getArtifacts(params); + assertEquals(3, artifacts.size()); + } +} diff --git pom.xml pom.xml index 85f1222e65..e9fd9fc26e 100644 --- pom.xml +++ pom.xml @@ -53,6 +53,7 @@ llap-ext-client llap-tez llap-server + llap-debug shims spark-client storage-api @@ -206,6 +207,7 @@ 3.0.0 0.6.0 2.2.4 + 4.1.0 @@ -850,6 +852,11 @@ jamon-runtime ${jamon-runtime.version} + + com.google.inject + guice + ${guice.version} + diff --git service/pom.xml service/pom.xml index b4c0d8f28d..c02fd3bca5 100644 --- service/pom.xml +++ service/pom.xml @@ -64,6 +64,11 @@ + + org.apache.hive + hive-llap-debug + ${project.version} + diff --git service/src/java/org/apache/hive/http/LogDownloadServlet.java service/src/java/org/apache/hive/http/LogDownloadServlet.java new file mode 100644 index 0000000000..64ebae97bf --- /dev/null +++ service/src/java/org/apache/hive/http/LogDownloadServlet.java @@ -0,0 +1,75 @@ +package org.apache.hive.http; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Pattern; + +import javax.activation.MimetypesFileTypeMap; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.tez.tools.debug.framework.ArtifactAggregator; +import org.apache.hive.tez.tools.debug.framework.ArtifactSourceType; +import org.apache.hive.tez.tools.debug.framework.Params; + +public class LogDownloadServlet extends HttpServlet { + private static final long serialVersionUID = 5333509911685834129L; + + private static final AtomicLong counter = new AtomicLong(); + + private Configuration getConfFromContext() { + Configuration conf = (Configuration)getServletContext().getAttribute( + HttpServer.CONF_CONTEXT_ATTRIBUTE); + assert conf != null; + return conf; + } + + private static final Pattern validId = Pattern.compile("^[0-9a-zA-Z\\-_]+$"); + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { + String dagId = request.getParameter("dagId"); + String queryId = request.getParameter("queryId"); + if (queryId == null && dagId == null) { + response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Please specifiy dagId or queryId"); + return; + } + String fileName = queryId == null ? dagId : queryId; + if (!validId.matcher(fileName).matches()) { + fileName = "debug_logs"; + } + + // TODO: Support partial source type downloads. + // String[] sourceTypes = request.getParameterValues("sourceType"); + + Params params = new Params(); + params.setTezDagId(dagId); + params.setHiveQueryId(queryId); + + File tmpDir = (File) getServletContext().getAttribute("javax.servlet.context.tmpdir"); + File tmpFile = new File(tmpDir, fileName + "-" + counter.incrementAndGet() + ".zip"); + + // TODO: Use a shared executor service for this with more threads. + ExecutorService service = Executors.newFixedThreadPool(1); + try (ArtifactAggregator aggregator = new ArtifactAggregator(getConfFromContext(), service, + params, tmpFile.getCanonicalPath(), Arrays.asList(ArtifactSourceType.values()))) { + aggregator.aggregate(); + } finally { + service.shutdown(); + } + try { + response.setContentType(MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(tmpFile)); + response.setHeader("Content-Disposition", "attachment; filename=" + fileName + ".zip"); + Files.copy(Paths.get(tmpFile.getCanonicalPath()), response.getOutputStream()); + } finally { + tmpFile.delete(); + } + } +} diff --git service/src/java/org/apache/hive/service/server/HiveServer2.java service/src/java/org/apache/hive/service/server/HiveServer2.java index e5f449122b..0cd36c6db6 100644 --- service/src/java/org/apache/hive/service/server/HiveServer2.java +++ service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -71,6 +71,7 @@ import org.apache.hive.common.util.ShutdownHookManager; import org.apache.hive.http.HttpServer; import org.apache.hive.http.LlapServlet; +import org.apache.hive.http.LogDownloadServlet; import org.apache.hive.service.CompositeService; import org.apache.hive.service.ServiceException; import org.apache.hive.service.cli.CLIService; @@ -91,7 +92,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; /** * HiveServer2. @@ -223,6 +223,7 @@ public void run() { builder.setUseSPNEGO(true); } builder.addServlet("llap", LlapServlet.class); + builder.addServlet("llap_debug_logs", LogDownloadServlet.class); builder.setContextRootRewriteTarget("/hiveserver2.jsp"); webServer = builder.build(); webServer.addServlet("query_page", "/query_page", QueryProfileServlet.class);