| #!/usr/bin/env python3 |
| # Copyright (c) 2024 Intel Corp. |
| # SPDX-License-Identifier: Apache-2.0 |
| |
| # Script that operates on a merged PR and sends data to elasticsearch for |
| # further insepctions using the PR dashboard at |
| # https://kibana.zephyrproject.io/ |
| |
| import sys |
| import os |
| from github import Github |
| import argparse |
| from elasticsearch import Elasticsearch |
| from elasticsearch.helpers import bulk |
| from datetime import timedelta |
| import pprint |
| |
| |
| date_format = '%Y-%m-%d %H:%M:%S' |
| |
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser( |
| formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False) |
| |
| parser.add_argument('--pull-request', help='pull request number', type=int) |
| parser.add_argument('--range', help='execute based on a date range, for example 2023-01-01..2023-01-05') |
| parser.add_argument('--repo', help='github repo', default='zephyrproject-rtos/zephyr') |
| parser.add_argument('--es-index', help='Elasticsearch index') |
| parser.add_argument('-y','--dry-run', action="store_true", help='dry run, do not upload data') |
| |
| return parser.parse_args() |
| |
| def gendata(data, index): |
| for t in data: |
| yield { |
| "_index": index, |
| "_source": t |
| } |
| |
| def process_pr(pr): |
| reviews = pr.get_reviews() |
| print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})') |
| assignee_reviews = 0 |
| prj = {} |
| |
| assignees = [] |
| labels = [] |
| for label in pr.labels: |
| labels.append(label.name) |
| |
| reviewers = set() |
| for review in reviews: |
| # get list of all approved reviews |
| if review.user and review.state == 'APPROVED': |
| reviewers.add(review.user.login) |
| |
| for assignee in pr.assignees: |
| # list assignees for later checks |
| assignees.append(assignee.login) |
| if assignee.login in reviewers: |
| assignee_reviews += 1 |
| |
| if assignee_reviews > 0 or pr.merged_by.login in assignees: |
| # in case of assignee reviews or if PR was merged by an assignee |
| prj['review_rule'] = "yes" |
| elif not pr.assignees or \ |
| (pr.user.login in assignees and len(assignees) == 1) or \ |
| ('Trivial' in labels or 'Hotfix' in labels): |
| # in case where no assignees set or if submitter is the only assignee |
| # or in case of trivial or hotfixes |
| prj['review_rule'] = "na" |
| else: |
| # everything else |
| prj['review_rule'] = "no" |
| |
| |
| created = pr.created_at |
| # if a PR was made ready for review from draft, calculate based on when it |
| # was moved out of draft. |
| for event in pr.get_issue_events(): |
| if event.event == 'ready_for_review': |
| created = event.created_at |
| |
| # calculate time the PR was in review, hours and business days. |
| delta = pr.closed_at - created |
| deltah = delta.total_seconds() / 3600 |
| prj['hours_open'] = deltah |
| |
| dates = (created + timedelta(idx + 1) for idx in range((pr.closed_at - created).days)) |
| |
| # Get number of business days per the guidelines, we need at least 2. |
| business_days = sum(1 for day in dates if day.weekday() < 5) |
| prj['business_days_open'] = business_days |
| |
| trivial = 'Trivial' in labels |
| hotfix = 'Hotfix' in labels |
| min_review_time_rule = "no" |
| |
| if hotfix or (trivial and deltah >= 4) or business_days >= 2: |
| min_review_time_rule = "yes" |
| |
| prj['time_rule'] = min_review_time_rule |
| |
| # This is all data we get easily though the Github API and serves as the basis |
| # for displaying some trends and metrics. |
| # Data can be extended in the future if we find more information that |
| # is useful through the API |
| |
| prj['nr'] = pr.number |
| prj['url'] = pr.url |
| prj['title'] = pr.title |
| prj['comments'] = pr.comments |
| prj['reviews'] = reviews.totalCount |
| prj['assignees'] = assignees |
| prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S") |
| prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S") |
| prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S") |
| prj['merged_by'] = pr.merged_by.login |
| prj['submitted_by'] = pr.user.login |
| prj['changed_files'] = pr.changed_files |
| prj['additions'] = pr.additions |
| prj['deletions'] = pr.deletions |
| prj['commits'] = pr.commits |
| # The branch we are targeting. main vs release branches. |
| prj['base'] = pr.base.ref |
| |
| # list all reviewers |
| prj['reviewers'] = list(reviewers) |
| prj['labels'] = labels |
| |
| return prj |
| |
| def main(): |
| args = parse_args() |
| token = os.environ.get('GITHUB_TOKEN') |
| if not token: |
| sys.exit('Github token not set in environment, please set the ' |
| 'GITHUB_TOKEN environment variable and retry.') |
| |
| gh = Github(token) |
| json_list = [] |
| gh_repo = gh.get_repo(args.repo) |
| |
| if args.pull_request: |
| pr = gh_repo.get_pull(args.pull_request) |
| prj = process_pr(pr) |
| json_list.append(prj) |
| elif args.range: |
| query = f'repo:{args.repo} merged:{args.range} is:pr is:closed sort:updated-desc base:main' |
| prs = gh.search_issues(query=f'{query}') |
| for _pr in prs: |
| pr = gh_repo.get_pull(_pr.number) |
| prj = process_pr(pr) |
| json_list.append(prj) |
| |
| if json_list and not args.dry_run: |
| # Send data over to elasticsearch. |
| es = Elasticsearch( |
| [os.environ['ELASTICSEARCH_SERVER']], |
| api_key=os.environ['ELASTICSEARCH_KEY'], |
| verify_certs=False |
| ) |
| |
| try: |
| if args.es_index: |
| index = args.es_index |
| else: |
| index = os.environ['PR_STAT_ES_INDEX'] |
| bulk(es, gendata(json_list, index)) |
| except KeyError as e: |
| print(f"Error: {e} not set.") |
| print(json_list) |
| if args.dry_run: |
| pprint.pprint(json_list) |
| |
| if __name__ == "__main__": |
| main() |