blob: f9f02a6e4d2f528260c301bc57912afaf89a6f6c [file] [log] [blame]
Anas Nashif8d8875b2020-04-27 14:16:59 -04001#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16 ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22 $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33
34from yaml import load, YAMLError
35try:
36 # Use the speedier C LibYAML parser if available
Moritz Fischer6a76b982022-03-30 11:20:43 -070037 from yaml import CSafeLoader as SafeLoader
Anas Nashif8d8875b2020-04-27 14:16:59 -040038except ImportError:
Moritz Fischer6a76b982022-03-30 11:20:43 -070039 from yaml import SafeLoader
Anas Nashif8d8875b2020-04-27 14:16:59 -040040
41
42def _main():
43 # Entry point when run as an executable
44
45 args = _parse_args()
46 try:
47 args.cmd_fn(Maintainers(args.maintainers), args)
48 except (MaintainersError, GitError) as e:
49 _serr(e)
50
51
52def _parse_args():
53 # Parses arguments when run as an executable
54
55 parser = argparse.ArgumentParser(
56 formatter_class=argparse.RawDescriptionHelpFormatter,
57 description=__doc__)
58
59 parser.add_argument(
60 "-m", "--maintainers",
61 metavar="MAINTAINERS_FILE",
62 help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
63 "the top-level repository directory is used, and must exist. "
64 "Paths in the maintainers file will always be taken as relative "
65 "to the top-level directory.")
66
67 subparsers = parser.add_subparsers(
68 help="Available commands (each has a separate --help text)")
69
70 id_parser = subparsers.add_parser(
71 "path",
72 help="List area(s) for paths")
73 id_parser.add_argument(
74 "paths",
75 metavar="PATH",
76 nargs="*",
77 help="Path to list areas for")
78 id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
79
80 commits_parser = subparsers.add_parser(
81 "commits",
82 help="List area(s) for commit range")
83 commits_parser.add_argument(
84 "commits",
85 metavar="COMMIT_RANGE",
86 nargs="*",
87 help="Commit range to list areas for (default: HEAD~..)")
88 commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
89
90 list_parser = subparsers.add_parser(
91 "list",
92 help="List files in areas")
93 list_parser.add_argument(
94 "area",
95 metavar="AREA",
96 nargs="?",
97 help="Name of area to list files in. If not specified, all "
98 "non-orphaned files are listed (all files that do not appear in "
99 "any area).")
100 list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
101
102 areas_parser = subparsers.add_parser(
103 "areas",
104 help="List areas and maintainers")
105 areas_parser.add_argument(
106 "maintainer",
107 metavar="MAINTAINER",
108 nargs="?",
Nazar Kazakovf483b1b2022-03-16 21:07:43 +0000109 help="List all areas maintained by maintainer.")
Anas Nashif8d8875b2020-04-27 14:16:59 -0400110
111 areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
112
113 orphaned_parser = subparsers.add_parser(
114 "orphaned",
115 help="List orphaned files (files that do not appear in any area)")
116 orphaned_parser.add_argument(
117 "path",
118 metavar="PATH",
119 nargs="?",
120 help="Limit to files under PATH")
121 orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
122
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400123 count_parser = subparsers.add_parser(
124 "count",
125 help="Count areas, unique maintainers, and / or unique collaborators")
126 count_parser.add_argument(
127 "-a",
128 "--count-areas",
129 action="store_true",
130 help="Count the number of areas")
131 count_parser.add_argument(
132 "-c",
133 "--count-collaborators",
134 action="store_true",
135 help="Count the number of unique collaborators")
136 count_parser.add_argument(
137 "-n",
138 "--count-maintainers",
139 action="store_true",
140 help="Count the number of unique maintainers")
141 count_parser.add_argument(
142 "-o",
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500143 "--count-unmaintained",
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400144 action="store_true",
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500145 help="Count the number of unmaintained areas")
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400146 count_parser.set_defaults(cmd_fn=Maintainers._count_cmd)
147
Anas Nashif8d8875b2020-04-27 14:16:59 -0400148 args = parser.parse_args()
149 if not hasattr(args, "cmd_fn"):
150 # Called without a subcommand
151 sys.exit(parser.format_usage().rstrip())
152
153 return args
154
155
156class Maintainers:
157 """
158 Represents the contents of a maintainers YAML file.
159
160 These attributes are available:
161
162 areas:
163 A dictionary that maps area names to Area instances, for all areas
164 defined in the maintainers file
165
166 filename:
167 The path to the maintainers file
168 """
169 def __init__(self, filename=None):
170 """
171 Creates a Maintainers instance.
172
173 filename (default: None):
174 Path to the maintainers file to parse. If None, MAINTAINERS.yml in
175 the top-level directory of the Git repository is used, and must
176 exist.
177 """
178 self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
179
180 if filename is None:
181 self.filename = self._toplevel / "MAINTAINERS.yml"
182 else:
183 self.filename = pathlib.Path(filename)
184
185 self.areas = {}
186 for area_name, area_dict in _load_maintainers(self.filename).items():
187 area = Area()
188 area.name = area_name
189 area.status = area_dict.get("status")
190 area.maintainers = area_dict.get("maintainers", [])
191 area.collaborators = area_dict.get("collaborators", [])
192 area.inform = area_dict.get("inform", [])
193 area.labels = area_dict.get("labels", [])
194 area.description = area_dict.get("description")
195
196 # area._match_fn(path) tests if the path matches files and/or
197 # files-regex
198 area._match_fn = \
199 _get_match_fn(area_dict.get("files"),
200 area_dict.get("files-regex"))
201
202 # Like area._match_fn(path), but for files-exclude and
203 # files-regex-exclude
204 area._exclude_match_fn = \
205 _get_match_fn(area_dict.get("files-exclude"),
206 area_dict.get("files-regex-exclude"))
207
208 self.areas[area_name] = area
209
210 def path2areas(self, path):
211 """
212 Returns a list of Area instances for the areas that contain 'path',
213 taken as relative to the current directory
214 """
215 # Make directory paths end in '/' so that foo/bar matches foo/bar/.
216 # Skip this check in _contains() itself, because the isdir() makes it
217 # twice as slow in cases where it's not needed.
218 is_dir = os.path.isdir(path)
219
220 # Make 'path' relative to the repository root and normalize it.
221 # normpath() would remove a trailing '/', so we add it afterwards.
222 path = os.path.normpath(os.path.join(
223 os.path.relpath(os.getcwd(), self._toplevel),
224 path))
225
226 if is_dir:
227 path += "/"
228
229 return [area for area in self.areas.values()
230 if area._contains(path)]
231
232 def commits2areas(self, commits):
233 """
234 Returns a set() of Area instances for the areas that contain files that
235 are modified by the commit range in 'commits'. 'commits' could be e.g.
236 "HEAD~..", to inspect the tip commit
237 """
238 res = set()
239 # Final '--' is to make sure 'commits' is interpreted as a commit range
240 # rather than a path. That might give better error messages.
241 for path in _git("diff", "--name-only", commits, "--").splitlines():
242 res.update(self.path2areas(path))
243 return res
244
245 def __repr__(self):
246 return "<Maintainers for '{}'>".format(self.filename)
247
248 #
249 # Command-line subcommands
250 #
251
252 def _path_cmd(self, args):
253 # 'path' subcommand implementation
254
255 for path in args.paths:
256 if not os.path.exists(path):
257 _serr("'{}': no such file or directory".format(path))
258
259 res = set()
260 orphaned = []
261 for path in args.paths:
262 areas = self.path2areas(path)
263 res.update(areas)
264 if not areas:
265 orphaned.append(path)
266
267 _print_areas(res)
268 if orphaned:
269 if res:
270 print()
271 print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
272
273 def _commits_cmd(self, args):
274 # 'commits' subcommand implementation
275
276 commits = args.commits or ("HEAD~..",)
277 _print_areas({area for commit_range in commits
278 for area in self.commits2areas(commit_range)})
279
280 def _areas_cmd(self, args):
281 # 'areas' subcommand implementation
282 for area in self.areas.values():
283 if args.maintainer:
284 if args.maintainer in area.maintainers:
285 print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
286 else:
287 print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
288
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400289 def _count_cmd(self, args):
290 # 'count' subcommand implementation
291
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500292 if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained):
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400293 # if no specific count is provided, print them all
294 args.count_areas = True
295 args.count_collaborators = True
296 args.count_maintainers = True
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500297 args.count_unmaintained = True
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400298
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500299 unmaintained = 0
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400300 collaborators = set()
301 maintainers = set()
302
303 for area in self.areas.values():
304 if area.status == 'maintained':
305 maintainers = maintainers.union(set(area.maintainers))
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500306 elif area.status == 'odd fixes':
307 unmaintained += 1
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400308 collaborators = collaborators.union(set(area.collaborators))
309
310 if args.count_areas:
311 print('{:14}\t{}'.format('areas:', len(self.areas)))
312 if args.count_maintainers:
313 print('{:14}\t{}'.format('maintainers:', len(maintainers)))
314 if args.count_collaborators:
315 print('{:14}\t{}'.format('collaborators:', len(collaborators)))
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500316 if args.count_unmaintained:
317 print('{:14}\t{}'.format('unmaintained:', unmaintained))
Christopher Friedt8b55eb82021-10-14 12:29:39 -0400318
Anas Nashif8d8875b2020-04-27 14:16:59 -0400319 def _list_cmd(self, args):
320 # 'list' subcommand implementation
321
322 if args.area is None:
323 # List all files that appear in some area
324 for path in _ls_files():
325 for area in self.areas.values():
326 if area._contains(path):
327 print(path)
328 break
329 else:
330 # List all files that appear in the given area
331 area = self.areas.get(args.area)
332 if area is None:
333 _serr("'{}': no such area defined in '{}'"
334 .format(args.area, self.filename))
335
336 for path in _ls_files():
337 if area._contains(path):
338 print(path)
339
340 def _orphaned_cmd(self, args):
341 # 'orphaned' subcommand implementation
342
343 if args.path is not None and not os.path.exists(args.path):
344 _serr("'{}': no such file or directory".format(args.path))
345
346 for path in _ls_files(args.path):
347 for area in self.areas.values():
348 if area._contains(path):
349 break
350 else:
351 print(path) # We get here if we never hit the 'break'
352
353
354class Area:
355 """
356 Represents an entry for an area in MAINTAINERS.yml.
357
358 These attributes are available:
359
360 status:
361 The status of the area, as a string. None if the area has no 'status'
362 key. See MAINTAINERS.yml.
363
364 maintainers:
365 List of maintainers. Empty if the area has no 'maintainers' key.
366
367 collaborators:
368 List of collaborators. Empty if the area has no 'collaborators' key.
369
370 inform:
371 List of people to inform on pull requests. Empty if the area has no
372 'inform' key.
373
374 labels:
375 List of GitHub labels for the area. Empty if the area has no 'labels'
376 key.
377
378 description:
379 Text from 'description' key, or None if the area has no 'description'
380 key
381 """
382 def _contains(self, path):
383 # Returns True if the area contains 'path', and False otherwise
384
385 return self._match_fn and self._match_fn(path) and not \
386 (self._exclude_match_fn and self._exclude_match_fn(path))
387
388 def __repr__(self):
389 return "<Area {}>".format(self.name)
390
391
392def _print_areas(areas):
393 first = True
394 for area in sorted(areas, key=operator.attrgetter("name")):
395 if not first:
396 print()
397 first = False
398
399 print("""\
400{}
401\tstatus: {}
402\tmaintainers: {}
403\tcollaborators: {}
404\tinform: {}
405\tlabels: {}
406\tdescription: {}""".format(area.name,
407 area.status,
408 ", ".join(area.maintainers),
409 ", ".join(area.collaborators),
410 ", ".join(area.inform),
411 ", ".join(area.labels),
412 area.description or ""))
413
414
415def _get_match_fn(globs, regexes):
416 # Constructs a single regex that tests for matches against the globs in
417 # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
418 # Returns the search() method of the compiled regex.
419 #
420 # Returns None if there are neither globs nor regexes, which should be
421 # interpreted as no match.
422
423 if not (globs or regexes):
424 return None
425
426 regex = ""
427
428 if globs:
429 glob_regexes = []
430 for glob in globs:
431 # Construct a regex equivalent to the glob
432 glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
433 .replace("?", "[^/]")
434
435 if not glob.endswith("/"):
436 # Require a full match for globs that don't end in /
437 glob_regex += "$"
438
439 glob_regexes.append(glob_regex)
440
441 # The glob regexes must anchor to the beginning of the path, since we
442 # return search(). (?:) is a non-capturing group.
443 regex += "^(?:{})".format("|".join(glob_regexes))
444
445 if regexes:
446 if regex:
447 regex += "|"
448 regex += "|".join(regexes)
449
450 return re.compile(regex).search
451
452
453def _load_maintainers(path):
454 # Returns the parsed contents of the maintainers file 'filename', also
455 # running checks on the contents. The returned format is plain Python
456 # dicts/lists/etc., mirroring the structure of the file.
457
458 with open(path, encoding="utf-8") as f:
459 try:
Moritz Fischer6a76b982022-03-30 11:20:43 -0700460 yaml = load(f, Loader=SafeLoader)
Anas Nashif8d8875b2020-04-27 14:16:59 -0400461 except YAMLError as e:
462 raise MaintainersError("{}: YAML error: {}".format(path, e))
463
464 _check_maintainers(path, yaml)
465 return yaml
466
467
468def _check_maintainers(maints_path, yaml):
469 # Checks the maintainers data in 'yaml', which comes from the maintainers
470 # file at maints_path, which is a pathlib.Path instance
471
472 root = maints_path.parent
473
474 def ferr(msg):
475 _err("{}: {}".format(maints_path, msg)) # Prepend the filename
476
477 if not isinstance(yaml, dict):
478 ferr("empty or malformed YAML (not a dict)")
479
480 ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
481 "files-exclude", "files-regex", "files-regex-exclude",
482 "labels", "description"}
483
Anas Nashif6e8cb0d2022-02-28 10:33:21 -0500484 ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"}
Anas Nashif8d8875b2020-04-27 14:16:59 -0400485 ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages
486
487 for area_name, area_dict in yaml.items():
488 if not isinstance(area_dict, dict):
489 ferr("malformed entry for area '{}' (not a dict)"
490 .format(area_name))
491
492 for key in area_dict:
493 if key not in ok_keys:
494 ferr("unknown key '{}' in area '{}'"
495 .format(key, area_name))
496
497 if "status" in area_dict and \
498 area_dict["status"] not in ok_status:
499 ferr("bad 'status' key on area '{}', should be one of {}"
500 .format(area_name, ok_status_s))
501
502 if not area_dict.keys() & {"files", "files-regex"}:
503 ferr("either 'files' or 'files-regex' (or both) must be specified "
504 "for area '{}'".format(area_name))
505
506 for list_name in "maintainers", "collaborators", "inform", "files", \
507 "files-regex", "labels":
508 if list_name in area_dict:
509 lst = area_dict[list_name]
510 if not (isinstance(lst, list) and
511 all(isinstance(elm, str) for elm in lst)):
512 ferr("malformed '{}' value for area '{}' -- should "
513 "be a list of strings".format(list_name, area_name))
514
515 for files_key in "files", "files-exclude":
516 if files_key in area_dict:
517 for glob_pattern in area_dict[files_key]:
518 # This could be changed if it turns out to be too slow,
519 # e.g. to only check non-globbing filenames. The tuple() is
520 # needed due to pathlib's glob() returning a generator.
521 paths = tuple(root.glob(glob_pattern))
522 if not paths:
523 ferr("glob pattern '{}' in '{}' in area '{}' does not "
524 "match any files".format(glob_pattern, files_key,
525 area_name))
526 if not glob_pattern.endswith("/"):
527 for path in paths:
528 if path.is_dir():
529 ferr("glob pattern '{}' in '{}' in area '{}' "
530 "matches a directory, but has no "
531 "trailing '/'"
532 .format(glob_pattern, files_key,
533 area_name))
534
535 for files_regex_key in "files-regex", "files-regex-exclude":
536 if files_regex_key in area_dict:
537 for regex in area_dict[files_regex_key]:
538 try:
539 re.compile(regex)
540 except re.error as e:
541 ferr("bad regular expression '{}' in '{}' in "
542 "'{}': {}".format(regex, files_regex_key,
543 area_name, e.msg))
544
545 if "description" in area_dict and \
546 not isinstance(area_dict["description"], str):
547 ferr("malformed 'description' value for area '{}' -- should be a "
548 "string".format(area_name))
549
550
551def _git(*args):
552 # Helper for running a Git command. Returns the rstrip()ed stdout output.
553 # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
554 # errors.
555
556 git_cmd = ("git",) + args
557 git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors
558
559 try:
560 git_process = subprocess.Popen(
561 git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
562 except FileNotFoundError:
563 _giterr("git executable not found (when running '{}'). Check that "
564 "it's in listed in the PATH environment variable"
565 .format(git_cmd_s))
566 except OSError as e:
567 _giterr("error running '{}': {}".format(git_cmd_s, e))
568
569 stdout, stderr = git_process.communicate()
570 if git_process.returncode:
571 _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
572 git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
573
574 return stdout.decode("utf-8").rstrip()
575
576
577def _ls_files(path=None):
578 cmd = ["ls-files"]
579 if path is not None:
580 cmd.append(path)
581 return _git(*cmd).splitlines()
582
583
584def _err(msg):
585 raise MaintainersError(msg)
586
587
588def _giterr(msg):
589 raise GitError(msg)
590
591
592def _serr(msg):
593 # For reporting errors when get_maintainer.py is run as a script.
594 # sys.exit() shouldn't be used otherwise.
595 sys.exit("{}: error: {}".format(sys.argv[0], msg))
596
597
598class MaintainersError(Exception):
599 "Exception raised for MAINTAINERS.yml-related errors"
600
601
602class GitError(Exception):
603 "Exception raised for Git-related errors"
604
605
606if __name__ == "__main__":
607 _main()