blob: 8e2a09b21522b0928d0f277ee0e7932dd87e0060 [file] [log] [blame]
Anas Nashif8d8875b2020-04-27 14:16:59 -04001#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16 ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22 $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33
34from yaml import load, YAMLError
35try:
36 # Use the speedier C LibYAML parser if available
Moritz Fischer6a76b982022-03-30 11:20:43 -070037 from yaml import CSafeLoader as SafeLoader
Anas Nashif8d8875b2020-04-27 14:16:59 -040038except ImportError:
Moritz Fischer6a76b982022-03-30 11:20:43 -070039 from yaml import SafeLoader
Anas Nashif8d8875b2020-04-27 14:16:59 -040040
41
42def _main():
43 # Entry point when run as an executable
44
45 args = _parse_args()
46 try:
47 args.cmd_fn(Maintainers(args.maintainers), args)
48 except (MaintainersError, GitError) as e:
49 _serr(e)
50
51
52def _parse_args():
53 # Parses arguments when run as an executable
54
55 parser = argparse.ArgumentParser(
56 formatter_class=argparse.RawDescriptionHelpFormatter,
Jamie McCraeec704442023-01-04 16:08:36 +000057 description=__doc__, allow_abbrev=False)
Anas Nashif8d8875b2020-04-27 14:16:59 -040058
59 parser.add_argument(
60 "-m", "--maintainers",
61 metavar="MAINTAINERS_FILE",
62 help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
63 "the top-level repository directory is used, and must exist. "
64 "Paths in the maintainers file will always be taken as relative "
65 "to the top-level directory.")
66
67 subparsers = parser.add_subparsers(
68 help="Available commands (each has a separate --help text)")
69
70 id_parser = subparsers.add_parser(
71 "path",
72 help="List area(s) for paths")
73 id_parser.add_argument(
74 "paths",
75 metavar="PATH",
76 nargs="*",
77 help="Path to list areas for")
78 id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
79
80 commits_parser = subparsers.add_parser(
81 "commits",
82 help="List area(s) for commit range")
83 commits_parser.add_argument(
84 "commits",
85 metavar="COMMIT_RANGE",
86 nargs="*",
87 help="Commit range to list areas for (default: HEAD~..)")
88 commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
89
90 list_parser = subparsers.add_parser(
91 "list",
92 help="List files in areas")
93 list_parser.add_argument(
94 "area",
95 metavar="AREA",
96 nargs="?",
97 help="Name of area to list files in. If not specified, all "
98 "non-orphaned files are listed (all files that do not appear in "
99 "any area).")
100 list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
101
102 areas_parser = subparsers.add_parser(
103 "areas",
104 help="List areas and maintainers")
105 areas_parser.add_argument(
106 "maintainer",
107 metavar="MAINTAINER",
108 nargs="?",
Nazar Kazakovf483b1b2022-03-16 21:07:43 +0000109 help="List all areas maintained by maintainer.")
Anas Nashif8d8875b2020-04-27 14:16:59 -0400110
111 areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
112
113 orphaned_parser = subparsers.add_parser(
114 "orphaned",
115 help="List orphaned files (files that do not appear in any area)")
116 orphaned_parser.add_argument(
117 "path",
118 metavar="PATH",
119 nargs="?",
120 help="Limit to files under PATH")
121 orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
122
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400123 count_parser = subparsers.add_parser(
124 "count",
125 help="Count areas, unique maintainers, and / or unique collaborators")
126 count_parser.add_argument(
127 "-a",
128 "--count-areas",
129 action="store_true",
130 help="Count the number of areas")
131 count_parser.add_argument(
132 "-c",
133 "--count-collaborators",
134 action="store_true",
135 help="Count the number of unique collaborators")
136 count_parser.add_argument(
137 "-n",
138 "--count-maintainers",
139 action="store_true",
140 help="Count the number of unique maintainers")
141 count_parser.add_argument(
142 "-o",
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500143 "--count-unmaintained",
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400144 action="store_true",
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500145 help="Count the number of unmaintained areas")
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400146 count_parser.set_defaults(cmd_fn=Maintainers._count_cmd)
147
Anas Nashif8d8875b2020-04-27 14:16:59 -0400148 args = parser.parse_args()
149 if not hasattr(args, "cmd_fn"):
150 # Called without a subcommand
151 sys.exit(parser.format_usage().rstrip())
152
153 return args
154
155
156class Maintainers:
157 """
158 Represents the contents of a maintainers YAML file.
159
160 These attributes are available:
161
162 areas:
163 A dictionary that maps area names to Area instances, for all areas
164 defined in the maintainers file
165
166 filename:
167 The path to the maintainers file
168 """
169 def __init__(self, filename=None):
170 """
171 Creates a Maintainers instance.
172
173 filename (default: None):
174 Path to the maintainers file to parse. If None, MAINTAINERS.yml in
175 the top-level directory of the Git repository is used, and must
176 exist.
177 """
Daniel Leungf7793852023-12-04 14:23:22 -0800178 if (filename is not None) and (pathlib.Path(filename).exists()):
Anas Nashif8d8875b2020-04-27 14:16:59 -0400179 self.filename = pathlib.Path(filename)
Daniel Leungf7793852023-12-04 14:23:22 -0800180 self._toplevel = self.filename.parent
181 else:
182 self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
183 self.filename = self._toplevel / "MAINTAINERS.yml"
Anas Nashif8d8875b2020-04-27 14:16:59 -0400184
185 self.areas = {}
186 for area_name, area_dict in _load_maintainers(self.filename).items():
187 area = Area()
188 area.name = area_name
189 area.status = area_dict.get("status")
190 area.maintainers = area_dict.get("maintainers", [])
191 area.collaborators = area_dict.get("collaborators", [])
192 area.inform = area_dict.get("inform", [])
193 area.labels = area_dict.get("labels", [])
Anas Nashifcf640fe2023-10-28 12:42:51 -0400194 area.tests = area_dict.get("tests", [])
195 area.tags = area_dict.get("tags", [])
Anas Nashif8d8875b2020-04-27 14:16:59 -0400196 area.description = area_dict.get("description")
197
198 # area._match_fn(path) tests if the path matches files and/or
199 # files-regex
200 area._match_fn = \
201 _get_match_fn(area_dict.get("files"),
202 area_dict.get("files-regex"))
203
204 # Like area._match_fn(path), but for files-exclude and
205 # files-regex-exclude
206 area._exclude_match_fn = \
207 _get_match_fn(area_dict.get("files-exclude"),
208 area_dict.get("files-regex-exclude"))
209
210 self.areas[area_name] = area
211
212 def path2areas(self, path):
213 """
214 Returns a list of Area instances for the areas that contain 'path',
215 taken as relative to the current directory
216 """
217 # Make directory paths end in '/' so that foo/bar matches foo/bar/.
218 # Skip this check in _contains() itself, because the isdir() makes it
219 # twice as slow in cases where it's not needed.
220 is_dir = os.path.isdir(path)
221
222 # Make 'path' relative to the repository root and normalize it.
223 # normpath() would remove a trailing '/', so we add it afterwards.
224 path = os.path.normpath(os.path.join(
225 os.path.relpath(os.getcwd(), self._toplevel),
226 path))
227
228 if is_dir:
229 path += "/"
230
231 return [area for area in self.areas.values()
232 if area._contains(path)]
233
234 def commits2areas(self, commits):
235 """
236 Returns a set() of Area instances for the areas that contain files that
237 are modified by the commit range in 'commits'. 'commits' could be e.g.
238 "HEAD~..", to inspect the tip commit
239 """
240 res = set()
241 # Final '--' is to make sure 'commits' is interpreted as a commit range
242 # rather than a path. That might give better error messages.
243 for path in _git("diff", "--name-only", commits, "--").splitlines():
244 res.update(self.path2areas(path))
245 return res
246
247 def __repr__(self):
248 return "<Maintainers for '{}'>".format(self.filename)
249
250 #
251 # Command-line subcommands
252 #
253
254 def _path_cmd(self, args):
255 # 'path' subcommand implementation
256
257 for path in args.paths:
258 if not os.path.exists(path):
259 _serr("'{}': no such file or directory".format(path))
260
261 res = set()
262 orphaned = []
263 for path in args.paths:
264 areas = self.path2areas(path)
265 res.update(areas)
266 if not areas:
267 orphaned.append(path)
268
269 _print_areas(res)
270 if orphaned:
271 if res:
272 print()
273 print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
274
275 def _commits_cmd(self, args):
276 # 'commits' subcommand implementation
277
278 commits = args.commits or ("HEAD~..",)
279 _print_areas({area for commit_range in commits
280 for area in self.commits2areas(commit_range)})
281
282 def _areas_cmd(self, args):
283 # 'areas' subcommand implementation
284 for area in self.areas.values():
285 if args.maintainer:
286 if args.maintainer in area.maintainers:
287 print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
288 else:
289 print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
290
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400291 def _count_cmd(self, args):
292 # 'count' subcommand implementation
293
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500294 if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained):
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400295 # if no specific count is provided, print them all
296 args.count_areas = True
297 args.count_collaborators = True
298 args.count_maintainers = True
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500299 args.count_unmaintained = True
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400300
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500301 unmaintained = 0
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400302 collaborators = set()
303 maintainers = set()
304
305 for area in self.areas.values():
306 if area.status == 'maintained':
307 maintainers = maintainers.union(set(area.maintainers))
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500308 elif area.status == 'odd fixes':
309 unmaintained += 1
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400310 collaborators = collaborators.union(set(area.collaborators))
311
312 if args.count_areas:
313 print('{:14}\t{}'.format('areas:', len(self.areas)))
314 if args.count_maintainers:
315 print('{:14}\t{}'.format('maintainers:', len(maintainers)))
316 if args.count_collaborators:
317 print('{:14}\t{}'.format('collaborators:', len(collaborators)))
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500318 if args.count_unmaintained:
319 print('{:14}\t{}'.format('unmaintained:', unmaintained))
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400320
Anas Nashif8d8875b2020-04-27 14:16:59 -0400321 def _list_cmd(self, args):
322 # 'list' subcommand implementation
323
324 if args.area is None:
325 # List all files that appear in some area
326 for path in _ls_files():
327 for area in self.areas.values():
328 if area._contains(path):
329 print(path)
330 break
331 else:
332 # List all files that appear in the given area
333 area = self.areas.get(args.area)
334 if area is None:
335 _serr("'{}': no such area defined in '{}'"
336 .format(args.area, self.filename))
337
338 for path in _ls_files():
339 if area._contains(path):
340 print(path)
341
342 def _orphaned_cmd(self, args):
343 # 'orphaned' subcommand implementation
344
345 if args.path is not None and not os.path.exists(args.path):
346 _serr("'{}': no such file or directory".format(args.path))
347
348 for path in _ls_files(args.path):
349 for area in self.areas.values():
350 if area._contains(path):
351 break
352 else:
353 print(path) # We get here if we never hit the 'break'
354
355
356class Area:
357 """
358 Represents an entry for an area in MAINTAINERS.yml.
359
360 These attributes are available:
361
362 status:
363 The status of the area, as a string. None if the area has no 'status'
364 key. See MAINTAINERS.yml.
365
366 maintainers:
367 List of maintainers. Empty if the area has no 'maintainers' key.
368
369 collaborators:
370 List of collaborators. Empty if the area has no 'collaborators' key.
371
372 inform:
373 List of people to inform on pull requests. Empty if the area has no
374 'inform' key.
375
376 labels:
377 List of GitHub labels for the area. Empty if the area has no 'labels'
378 key.
379
380 description:
381 Text from 'description' key, or None if the area has no 'description'
382 key
383 """
384 def _contains(self, path):
385 # Returns True if the area contains 'path', and False otherwise
386
387 return self._match_fn and self._match_fn(path) and not \
388 (self._exclude_match_fn and self._exclude_match_fn(path))
389
390 def __repr__(self):
391 return "<Area {}>".format(self.name)
392
393
394def _print_areas(areas):
395 first = True
396 for area in sorted(areas, key=operator.attrgetter("name")):
397 if not first:
398 print()
399 first = False
400
401 print("""\
402{}
403\tstatus: {}
404\tmaintainers: {}
405\tcollaborators: {}
406\tinform: {}
407\tlabels: {}
Anas Nashifcf640fe2023-10-28 12:42:51 -0400408\ttests: {}
409\ttags: {}
Anas Nashif8d8875b2020-04-27 14:16:59 -0400410\tdescription: {}""".format(area.name,
411 area.status,
412 ", ".join(area.maintainers),
413 ", ".join(area.collaborators),
414 ", ".join(area.inform),
415 ", ".join(area.labels),
Anas Nashifcf640fe2023-10-28 12:42:51 -0400416 ", ".join(area.tests),
417 ", ".join(area.tags),
Anas Nashif8d8875b2020-04-27 14:16:59 -0400418 area.description or ""))
419
420
421def _get_match_fn(globs, regexes):
422 # Constructs a single regex that tests for matches against the globs in
423 # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
424 # Returns the search() method of the compiled regex.
425 #
426 # Returns None if there are neither globs nor regexes, which should be
427 # interpreted as no match.
428
429 if not (globs or regexes):
430 return None
431
432 regex = ""
433
434 if globs:
435 glob_regexes = []
436 for glob in globs:
437 # Construct a regex equivalent to the glob
438 glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
439 .replace("?", "[^/]")
440
441 if not glob.endswith("/"):
442 # Require a full match for globs that don't end in /
443 glob_regex += "$"
444
445 glob_regexes.append(glob_regex)
446
447 # The glob regexes must anchor to the beginning of the path, since we
448 # return search(). (?:) is a non-capturing group.
449 regex += "^(?:{})".format("|".join(glob_regexes))
450
451 if regexes:
452 if regex:
453 regex += "|"
454 regex += "|".join(regexes)
455
456 return re.compile(regex).search
457
458
459def _load_maintainers(path):
460 # Returns the parsed contents of the maintainers file 'filename', also
461 # running checks on the contents. The returned format is plain Python
462 # dicts/lists/etc., mirroring the structure of the file.
463
464 with open(path, encoding="utf-8") as f:
465 try:
Moritz Fischer6a76b982022-03-30 11:20:43 -0700466 yaml = load(f, Loader=SafeLoader)
Anas Nashif8d8875b2020-04-27 14:16:59 -0400467 except YAMLError as e:
468 raise MaintainersError("{}: YAML error: {}".format(path, e))
469
470 _check_maintainers(path, yaml)
471 return yaml
472
473
474def _check_maintainers(maints_path, yaml):
475 # Checks the maintainers data in 'yaml', which comes from the maintainers
476 # file at maints_path, which is a pathlib.Path instance
477
478 root = maints_path.parent
479
480 def ferr(msg):
481 _err("{}: {}".format(maints_path, msg)) # Prepend the filename
482
483 if not isinstance(yaml, dict):
484 ferr("empty or malformed YAML (not a dict)")
485
486 ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
487 "files-exclude", "files-regex", "files-regex-exclude",
Anas Nashifcf640fe2023-10-28 12:42:51 -0400488 "labels", "description", "tests", "tags"}
Anas Nashif8d8875b2020-04-27 14:16:59 -0400489
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500490 ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"}
Anas Nashif8d8875b2020-04-27 14:16:59 -0400491 ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages
492
493 for area_name, area_dict in yaml.items():
494 if not isinstance(area_dict, dict):
495 ferr("malformed entry for area '{}' (not a dict)"
496 .format(area_name))
497
498 for key in area_dict:
499 if key not in ok_keys:
500 ferr("unknown key '{}' in area '{}'"
501 .format(key, area_name))
502
503 if "status" in area_dict and \
504 area_dict["status"] not in ok_status:
505 ferr("bad 'status' key on area '{}', should be one of {}"
506 .format(area_name, ok_status_s))
507
508 if not area_dict.keys() & {"files", "files-regex"}:
509 ferr("either 'files' or 'files-regex' (or both) must be specified "
510 "for area '{}'".format(area_name))
511
512 for list_name in "maintainers", "collaborators", "inform", "files", \
Anas Nashifcf640fe2023-10-28 12:42:51 -0400513 "files-regex", "labels", "tags", "tests":
Anas Nashif8d8875b2020-04-27 14:16:59 -0400514 if list_name in area_dict:
515 lst = area_dict[list_name]
516 if not (isinstance(lst, list) and
517 all(isinstance(elm, str) for elm in lst)):
518 ferr("malformed '{}' value for area '{}' -- should "
519 "be a list of strings".format(list_name, area_name))
520
521 for files_key in "files", "files-exclude":
522 if files_key in area_dict:
523 for glob_pattern in area_dict[files_key]:
524 # This could be changed if it turns out to be too slow,
525 # e.g. to only check non-globbing filenames. The tuple() is
526 # needed due to pathlib's glob() returning a generator.
527 paths = tuple(root.glob(glob_pattern))
528 if not paths:
529 ferr("glob pattern '{}' in '{}' in area '{}' does not "
530 "match any files".format(glob_pattern, files_key,
531 area_name))
532 if not glob_pattern.endswith("/"):
Stephanos Ioannidis428f4a62023-04-25 23:37:18 +0900533 if all(path.is_dir() for path in paths):
534 ferr("glob pattern '{}' in '{}' in area '{}' "
535 "matches only directories, but has no "
Anas Nashif8d8875b2020-04-27 14:16:59 -0400536 "trailing '/'"
537 .format(glob_pattern, files_key,
538 area_name))
539
540 for files_regex_key in "files-regex", "files-regex-exclude":
541 if files_regex_key in area_dict:
542 for regex in area_dict[files_regex_key]:
543 try:
544 re.compile(regex)
545 except re.error as e:
546 ferr("bad regular expression '{}' in '{}' in "
547 "'{}': {}".format(regex, files_regex_key,
548 area_name, e.msg))
549
550 if "description" in area_dict and \
551 not isinstance(area_dict["description"], str):
552 ferr("malformed 'description' value for area '{}' -- should be a "
553 "string".format(area_name))
554
555
556def _git(*args):
557 # Helper for running a Git command. Returns the rstrip()ed stdout output.
558 # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
559 # errors.
560
561 git_cmd = ("git",) + args
562 git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors
563
564 try:
565 git_process = subprocess.Popen(
566 git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
567 except FileNotFoundError:
568 _giterr("git executable not found (when running '{}'). Check that "
569 "it's in listed in the PATH environment variable"
570 .format(git_cmd_s))
571 except OSError as e:
572 _giterr("error running '{}': {}".format(git_cmd_s, e))
573
574 stdout, stderr = git_process.communicate()
575 if git_process.returncode:
576 _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
577 git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
578
579 return stdout.decode("utf-8").rstrip()
580
581
582def _ls_files(path=None):
583 cmd = ["ls-files"]
584 if path is not None:
585 cmd.append(path)
586 return _git(*cmd).splitlines()
587
588
589def _err(msg):
590 raise MaintainersError(msg)
591
592
593def _giterr(msg):
594 raise GitError(msg)
595
596
597def _serr(msg):
598 # For reporting errors when get_maintainer.py is run as a script.
599 # sys.exit() shouldn't be used otherwise.
600 sys.exit("{}: error: {}".format(sys.argv[0], msg))
601
602
603class MaintainersError(Exception):
604 "Exception raised for MAINTAINERS.yml-related errors"
605
606
607class GitError(Exception):
608 "Exception raised for Git-related errors"
609
610
611if __name__ == "__main__":
612 _main()