From 6c37258f5ba801e898ce239dd05c4ad752a43b9d Mon Sep 17 00:00:00 2001 From: Christopher Friedt Date: Tue, 17 Aug 2021 08:10:07 -0400 Subject: [PATCH] scripts: add script to extract top-ten bug-bashers This script allows us to programmatically query bug-bashers within a user-supplied time-window. For example, we held a "Bug Bash Week" August 1-7, 2021 (it was announced a week early though). The output of the script prints the "top ten" bug bashers in tab-separated columns in descending order. The first column is the number of bugs squashed and the second column is the github user id. Signed-off-by: Christopher Friedt --- CODEOWNERS | 1 + scripts/release/bug_bash.py | 187 ++++++++++++++++++++++++++++++++ scripts/requirements-extras.txt | 3 + 3 files changed, 191 insertions(+) create mode 100755 scripts/release/bug_bash.py diff --git a/CODEOWNERS b/CODEOWNERS index e08691a02de..6745bbbd0b2 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -587,6 +587,7 @@ /samples/subsys/pm/ @nashif @ceolin /samples/tfm_integration/ @ioannisg @microbuilder /samples/userspace/ @dcpleung @nashif +/scripts/release/bug_bash.py @cfriedt /scripts/coccicheck @himanshujha199640 @JuliaLawall /scripts/coccinelle/ @himanshujha199640 @JuliaLawall /scripts/coredump/ @dcpleung diff --git a/scripts/release/bug_bash.py b/scripts/release/bug_bash.py new file mode 100755 index 00000000000..cf346aa0ad1 --- /dev/null +++ b/scripts/release/bug_bash.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# Copyright (c) 2021, Facebook +# +# SPDX-License-Identifier: Apache-2.0 + +"""Query the Top-Ten Bug Bashers + +This script will query the top-ten Bug Bashers in a specified date window. + +Usage: + ./scripts/bug-bash.py -t ~/.ghtoken -b 2021-07-26 -e 2021-08-07 + GITHUB_TOKEN="..." ./scripts/bug-bash.py -b 2021-07-26 -e 2021-08-07 +""" + +import argparse +from datetime import datetime, timedelta +import operator +import os + +# Requires PyGithub +from github import Github + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-a', '--all', dest='all', + help='Show all bugs squashed', action='store_true') + parser.add_argument('-t', '--token', dest='tokenfile', + help='File containing GitHub token', metavar='FILE') + parser.add_argument('-b', '--begin', dest='begin', help='begin date (YYYY-mm-dd)', + metavar='date', type=valid_date_type, required=True) + parser.add_argument('-e', '--end', dest='end', help='end date (YYYY-mm-dd)', + metavar='date', type=valid_date_type, required=True) + + args = parser.parse_args() + + if args.end < args.begin: + raise ValueError( + 'end date {} is before begin date {}'.format(args.end, args.begin)) + + if args.tokenfile: + with open(args.tokenfile, 'r') as file: + token = file.read() + token = token.strip() + else: + if 'GITHUB_TOKEN' not in os.environ: + raise ValueError('No credentials specified') + token = os.environ['GITHUB_TOKEN'] + + setattr(args, 'token', token) + + return args + + +class BugBashTally(object): + def __init__(self, gh, begin_date, end_date): + """Create a BugBashTally object with the provided Github object, + begin datetime object, and end datetime object""" + self._gh = gh + self._repo = gh.get_repo('zephyrproject-rtos/zephyr') + self._begin_date = begin_date + self._end_date = end_date + + self._issues = [] + self._pulls = [] + + def get_tally(self): + """Return a dict with (key = user, value = score)""" + tally = dict() + for p in self.get_pulls(): + user = p.user.login + tally[user] = tally.get(user, 0) + 1 + + return tally + + def get_rev_tally(self): + """Return a dict with (key = score, value = list) sorted in + descending order""" + # there may be ties! + rev_tally = dict() + for user, score in self.get_tally().items(): + if score not in rev_tally: + rev_tally[score] = [user] + else: + rev_tally[score].append(user) + + # sort in descending order by score + rev_tally = dict( + sorted(rev_tally.items(), key=operator.itemgetter(0), reverse=True)) + + return rev_tally + + def get_top_ten(self): + """Return a dict with (key = score, value = user) sorted in + descending order""" + top_ten = [] + for score, users in self.get_rev_tally().items(): + # do not sort users by login - hopefully fair-ish + for user in users: + if len(top_ten) == 10: + return top_ten + + top_ten.append(tuple([score, user])) + + return top_ten + + def get_pulls(self): + """Return GitHub pull requests that squash bugs in the provided + date window""" + if self._pulls: + return self._pulls + + self.get_issues() + + return self._pulls + + def get_issues(self): + """Return GitHub issues representing bugs in the provided date + window""" + if self._issues: + return self._issues + + cutoff = self._end_date + timedelta(1) + issues = self._repo.get_issues(state='closed', labels=[ + 'bug'], since=self._begin_date) + + for i in issues: + # the PyGithub API and v3 REST API do not facilitate 'until' + # or 'end date' :-/ + if i.closed_at < self._begin_date or i.closed_at > cutoff: + continue + + ipr = i.pull_request + if ipr is None: + # ignore issues without a linked pull request + continue + + prid = int(ipr.html_url.split('/')[-1]) + pr = self._repo.get_pull(prid) + if not pr.merged: + # pull requests that were not merged do not count + continue + + self._pulls.append(pr) + self._issues.append(i) + + return self._issues + + +# https://gist.github.com/monkut/e60eea811ef085a6540f +def valid_date_type(arg_date_str): + """custom argparse *date* type for user dates values given from the + command line""" + try: + return datetime.strptime(arg_date_str, "%Y-%m-%d") + except ValueError: + msg = "Given Date ({0}) not valid! Expected format, YYYY-MM-DD!".format(arg_date_str) + raise argparse.ArgumentTypeError(msg) + + +def print_top_ten(top_ten): + """Print the top-ten bug bashers""" + for score, user in top_ten: + # print tab-separated value, to allow for ./script ... > foo.csv + print('{}\t{}'.format(score, user)) + + +def main(): + args = parse_args() + bbt = BugBashTally(Github(args.token), args.begin, args.end) + if args.all: + # print one issue per line + issues = bbt.get_issues() + pulls = bbt.get_pulls() + n = len(issues) + m = len(pulls) + assert n == m + for i in range(0, n): + print('{}\t{}\t{}'.format( + issues[i].number, pulls[i].user.login, pulls[i].title)) + else: + # print the top ten + print_top_ten(bbt.get_top_ten()) + + +if __name__ == '__main__': + main() diff --git a/scripts/requirements-extras.txt b/scripts/requirements-extras.txt index ee6fa553ea4..6ba351c3b8a 100644 --- a/scripts/requirements-extras.txt +++ b/scripts/requirements-extras.txt @@ -20,3 +20,6 @@ imgtool>=1.7.1 # used by nanopb module to generate sources from .proto files protobuf + +# used by scripts/release/bug_bash.py for generating top ten bug squashers +PyGithub