diff --git a/testutils/gen-report.py b/testutils/gen-report.py new file mode 100644 index 0000000..791da5c --- /dev/null +++ b/testutils/gen-report.py @@ -0,0 +1,240 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from BeautifulSoup import BeautifulSoup +import urllib2 +import xmltodict +import json +import Queue +from threading import Thread +from collections import OrderedDict +import itertools +from ascii_graph import Pyasciigraph +import sys +import argparse +import os + +# default build that is used against apache hive precommit test report +REPORTS_DIR = "/tmp/slow-test-reports" +BUILD_NUMBER = 830 +TOP_K = 25 +json_dumps = [] + +# parallel xml report downloader +class ReportDownloader(Thread): + def __init__(self, q): + Thread.__init__(self) + self.q = q + + def run(self): + while True: + # Get the work from the queue and expand the tuple + link = self.q.get() + xmlFile = urllib2.urlopen(link) + xmlData = xmlFile.read() + xmlSoup = BeautifulSoup(xmlData) + d = xmltodict.parse(xmlData, xml_attribs=True) + d['testsuite'].pop('properties', None) + json_dumps.append(d) + self.q.task_done() + +def get_links(rootUrl): + html_page = urllib2.urlopen(rootUrl) + soup = BeautifulSoup(html_page) + result = [] + for link in soup.findAll('a'): + hrefs = link.get('href') + if hrefs.endswith('.xml'): + result.append(rootUrl + "/" + hrefs) + + return result + +def take(iterable, n=TOP_K): + return list(itertools.islice(iterable, 0, n)) + +def plot_testsuite_time(json_data, top_k=TOP_K, ascii_graph=False, report_file=None): + suite_time = {} + + overall_time = 0.0 + for suite in json_data: + name = suite['testsuite']['@name'].rsplit(".",1)[-1] + time = float(suite['testsuite']['@time'].replace(',','')) + overall_time += time + if name in suite_time: + total_time = suite_time[name] + suite_time[name] = total_time + time + else: + suite_time[name] = time + + d_descending = OrderedDict(sorted(suite_time.items(), + key=lambda kv: kv[1], reverse=True)) + + gdata = [] + for k,v in take(d_descending.iteritems(), top_k): + gdata.append((k, v)) + + print '\nTop ' + str(top_k) + ' testsuite in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]' + if ascii_graph: + graph = Pyasciigraph() + for line in graph.graph('', gdata): + print line + else: + for line in gdata: + print line[0] + "\t" + str(line[1]) + + if report_file != None: + with open(report_file, "w") as f: + f.write('Top ' + str(top_k) + ' testsuite in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]\n') + for line in gdata: + f.write(line[0] + "\t" + str(line[1]) + "\n") + + +def plot_testcase_time(json_data, top_k=TOP_K, ascii_graph=False, report_file=None): + testcase_time = {} + + overall_time = 0.0 + for suite in json_data: + if int(suite['testsuite']['@tests']) > 0: + for t in suite['testsuite']['testcase']: + if isinstance(t, dict): + name = t['@classname'].rsplit(".",1)[-1] + "_" + t['@name'] + time = float(t['@time'].replace(',','')) + overall_time += time + if name in testcase_time: + total_time = testcase_time[name] + testcase_time[name] = total_time + time + else: + testcase_time[name] = time + if int(suite['testsuite']['@tests']) == 0: + print "Empty batch detected for testsuite: " + suite['testsuite']['@name'] + " which took " + suite['testsuite']['@time'] + "s" + + d_descending = OrderedDict(sorted(testcase_time.items(), + key=lambda kv: kv[1], reverse=True)) + + gdata = [] + for k,v in take(d_descending.iteritems(), top_k): + gdata.append((k, v)) + + + print '\nTop ' + str(top_k) + ' testcases in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]' + if ascii_graph: + graph = Pyasciigraph() + for line in graph.graph('', gdata): + print line + else: + for line in gdata: + print line[0] + "\t" + str(line[1]) + + if report_file != None: + with open(report_file, "a") as f: + f.write('\nTop ' + str(top_k) + ' testcases in terms of execution time (in seconds).. [Total time: ' + str(overall_time) + ' seconds]\n') + for line in gdata: + f.write(line[0] + "\t" + str(line[1]) + "\n") + +def get_latest_build_with_report(build_number): + latest_report = BUILD_NUMBER + if not os.path.exists(REPORTS_DIR): + os.makedirs(REPORTS_DIR) + for i in os.listdir(REPORTS_DIR): + if i.endswith(".txt"): + current_report = int(i.split(".txt")[0]) + if current_report > latest_report: + latest_report = current_report + + return latest_report + +def get_pending_report_list(last_report, precommit_url): + next_report = last_report + pending_reports = [] + done = False + while done == False: + try: + urllib2.urlopen(precommit_url % next_report) + pending_reports.append(next_report) + next_report += 1 + except urllib2.HTTPError, e: + done = True + + return pending_reports + +def print_report(reportUrl, json_dump, top_k, ascii_graph, report_file=None): + get_links(reportUrl) + links = get_links(reportUrl) + # Create a queue to communicate with the worker threads + q = Queue.Queue() + print "\nProcessing " + str(len(links)) + " test xml reports from " + reportUrl + ".." + # Create 8 worker threads + for x in range(8): + worker = ReportDownloader(q) + # Setting daemon to True will let the main thread exit even though the workers are blocking + worker.daemon = True + worker.start() + + # Put the tasks into the queue as a tuple + for link in links: + q.put(link) + + # Causes the main thread to wait for the queue to finish processing all the tasks + q.join() + + # dump test reports in json format + if json_dump: + with open('data.json', 'w') as outfile: + json.dump(json_dumps, outfile, indent = 2) + + # print or plot top-k tests on console + plot_testsuite_time(json_dumps, top_k, ascii_graph, report_file) + plot_testcase_time(json_dumps, top_k, ascii_graph, report_file) + del json_dumps[:] + +def main(): + parser = argparse.ArgumentParser(description='Program to print top-k test report for Apache Hive precommit tests') + parser.add_argument('-b', action='store', dest='build_number', help='build number of the test run. default uses test reports from apache hive precommit test run.') + parser.add_argument('-u', action='store', dest='report_url', help='url for the test report') + parser.add_argument('-j', action='store_true', default=False, dest='json_dump', help='json dump of test reports') + parser.add_argument('-k', action='store', dest='top_k', type=int, help='print top k testsuite and testcases to console') + parser.add_argument('-a', action='store_true', default=False, dest='ascii_graph', help='ascii output of the report') + parser.add_argument('-l', action='store_true', default=False, dest='latest_report', help='will generate all missing reports up until latest build number') + args = parser.parse_args() + + precommit_url = "http://104.198.109.242/logs/PreCommit-HIVE-Build-%s/test-results/" + last_report = get_latest_build_with_report(BUILD_NUMBER) + pending_reports = get_pending_report_list(last_report, precommit_url) + + build = last_report + if args.build_number != None: + build = args.build_number + + reportUrl = precommit_url % build + if args.report_url != None: + reportUrl = args.report_url + + json_dump = args.json_dump + + top_k = TOP_K + if args.top_k != None: + top_k = args.top_k + + ascii_graph = args.ascii_graph + + print_report(reportUrl, json_dump, top_k, ascii_graph, REPORTS_DIR + str(build) + ".txt") + + if args.latest_report: + for l in pending_reports: + reportUrl = precommit_url % l + print_report(reportUrl, json_dump, top_k, ascii_graph, REPORTS_DIR + str(l) + ".txt") + +main()