Uploaded image for project: 'Apache Arrow'
  1. Apache Arrow
  2. ARROW-8456

[Release] Add python script to help curating JIRA

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • None
    • 1.0.0
    • Developer Tools
    • None

    Description

      The following script produces reports like https://gist.github.com/kszucs/9857ef69c92a230ce5a5068551b83ed8

      from jira import JIRA
      import warnings
      import pygit2
      import pandas as pd
      from io import StringIO
      
      
      class Patch:
          
          def __init__(self, commit):
              self.commit = commit
              self.issue_key, self.msg = self._parse(commit.message)
              
          def _parse(self, message):
              first_line = message.splitlines()[0]
              
              m = re.match("(?P<ticket>((ARROW|PARQUET)\-\d+)):?(?P<msg>.*)", first_line)
              if m is None:
                  return None, ''
      
              values = m.groupdict()
              return values['ticket'], values['msg']
          
          @property
          def shortmessage(self):
              if not self.msg:
                  return self.commit.message.splitlines()[0]
              else:
                  return self.msg
      
          @property
          def sha(self):
              return self.commit.id
          
          @property
          def issue_url(self):
              return 'https://issues.apache.org/jira/browse/{}'.format(self.issue_key)
          
          @property
          def commit_url(self):
              return 'https://github.com/apache/arrow/commit/{}'.format(self.sha)
          
          def to_markdown(self):
              if self.issue_key is None:
                  return "[{}]({})\n".format(
                      self.shortmessage, 
                      self.commit_url
                  )
              else:
                  return "[{}]({}): [{}]({})\n".format(
                      self.issue_key, 
                      self.issue_url, 
                      self.shortmessage, 
                      self.commit_url
                  )
          
          
      JIRA_SEARCH_LIMIT = 10000
      # JIRA_SEARCH_LIMIT = 50
      
      
      class Release:
          """Release object for querying issues and commits
          
          Usage:
              jira = JIRA(
                  {'server': 'https://issues.apache.org/jira'}, 
                  basic_auth=(user, password)
              )
              repo = pygit2.Repository('path/to/arrow/repo')
              
              release = Release(jira, repo, '0.15.1', '0.15.0')
              # show the commits in application order
              for commit in release.commits():
                  print(commit.oid)
              # cherry-pick the patches to a branch
              release.apply_patches_to('a-branch')
          """
          
          def __init__(self, jira, repo, version, previous_version):
              self.jira = jira
              self.repo = repo
              self.version = version
              self.previous_version = previous_version
              self._issues = None
              self._patches = None
              
          def _tag(self, version):
              return self.repo.revparse_single(f'refs/tags/apache-arrow-{version}')
          
          def issues(self):
              # FIXME(kszucs): paginate instead of maxresults 
              if self._issues is None:
                  query = f'project=ARROW AND fixVersion={self.version}'
                  self._issues = self.jira.search_issues(query, maxResults=JIRA_SEARCH_LIMIT)
              return self._issues
          
          def patches(self):
              """Commits belonging to release applied on master branch
              
              The returned commits' order corresponds to the output of
              git log.
              """
              if self._patches is None:
                  previous_tag = self._tag(self.previous_version)
                  master = self.repo.branches['master']
                  ordering = pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE
                  walker = self.repo.walk(master.target, ordering)        
                  walker.hide(previous_tag.oid)
                  self._patches = list(map(Patch, walker))
      
              return self._patches
          
          def curate(self):
              issues = self.issues()
              patches = self.patches()
              issue_keys = {issue.key for issue in self.issues()}
              
              within, outside, nojira = [], [], []
              for p in patches:
                  if p.issue_key is None:
                      nojira.append(p)
                  elif p.issue_key in issue_keys:
                      within.append(p)
                      issue_keys.remove(p.issue_key)
                  else:
                      outside.append(p)
                      
              # remaining jira tickets
              nopatch = list(issue_keys)
                      
              return within, outside, nojira, nopatch
      
          def curation_report(self):
              out = StringIO()
              
              out.write('Total number of JIRA tickets assigned to version {}: {}\n'
                        .format(self.version, len(self.issues())))
              out.write('\n')
              out.write('Total number of applied patches since {}: {}\n'
                        .format(self.previous_version, len(self.patches())))
              
              out.write('\n\n')
      
              within, outside, nojira, nopatch = self.curate()
              
              out.write('Patches with assigned issue in {}:\n'.format(self.version))
              for p in within:
                  out.write("- {}".format(p.to_markdown()))
              
              out.write('\n\n')
              
              out.write('Patches with assigned issue outside of {}:\n'.format(self.version))
              for p in outside:
                  out.write("- {}".format(p.to_markdown()))
              
              out.write('\n\n')
                            
              out.write('Patches without assigned issue:\n')
              for p in nojira:
                  out.write("- {}".format(p.to_markdown()))
                            
              out.write('\n\n')
                            
              out.write('JIRAs in {} without assigned patch:\n'.format(self.version))
              for issue_key in nopatch:
                  url = 'https://issues.apache.org/jira/browse/{}'.format(issue_key)
                  out.write("- [{}]({})\n".format(issue_key, url))
                            
              return out.getvalue()
      
                  
          def apply_patches_to(self, branch_name):
              previous_tag = self._tag(self.previous_version)
              branch = repo.create_branch(branch_name, previous_tag.get_object())
              try:
                  head = branch.target
                  for commit in self.patches():
                      base = repo.merge_base(commit.oid, head)
                      parent_tree = commit.parents[0].tree
                      index = repo.merge_trees(parent_tree, head, commit.oid)
                      tree_id = index.write_tree(repo)
                      head = repo.create_commit(
                          branch.name, 
                          commit.author, 
                          commit.committer, 
                          commit.message,
                          tree_id, 
                          [head]
                      )
              except pygit2.GitError:
                  repo.branches[branch_name].delete()
                  raise
      
      
      import os
      import pygit2
      from jira import JIRA 
      
      jira = JIRA(
          {'server': 'https://issues.apache.org/jira'}, 
          basic_auth=(
              os.environ.get('APACHE_JIRA_USER'), 
              os.environ.get('APACHE_JIRA_PASSWORD')
          )
      )
      repo = pygit2.Repository('.')
      
      release = Release(jira, repo, version='0.17.0', previous_version='0.16.0')
      
      report = release.curation_report()
      

      Attachments

        Issue Links

          Activity

            People

              kszucs Krisztian Szucs
              kszucs Krisztian Szucs
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: