blob: d51a2738a26aab2bf982d268c280ab0a7cb4a6b0 [file] [log] [blame]
Anas Nashif8d8875b2020-04-27 14:16:59 -04001#!/usr/bin/env python3
2
3# Copyright (c) 2019 Nordic Semiconductor ASA
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7Lists maintainers for files or commits. Similar in function to
8scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is
9in MAINTAINERS.yml.
10
11The comment at the top of MAINTAINERS.yml in Zephyr documents the file format.
12
13See the help texts for the various subcommands for more information. They can
14be viewed with e.g.
15
16 ./get_maintainer.py path --help
17
18This executable doubles as a Python library. Identifiers not prefixed with '_'
19are part of the library API. The library documentation can be viewed with this
20command:
21
22 $ pydoc get_maintainer
23"""
24
25import argparse
26import operator
27import os
28import pathlib
29import re
30import shlex
31import subprocess
32import sys
33
34from yaml import load, YAMLError
35try:
36 # Use the speedier C LibYAML parser if available
37 from yaml import CLoader as Loader
38except ImportError:
39 from yaml import Loader
40
41
42def _main():
43 # Entry point when run as an executable
44
45 args = _parse_args()
46 try:
47 args.cmd_fn(Maintainers(args.maintainers), args)
48 except (MaintainersError, GitError) as e:
49 _serr(e)
50
51
52def _parse_args():
53 # Parses arguments when run as an executable
54
55 parser = argparse.ArgumentParser(
56 formatter_class=argparse.RawDescriptionHelpFormatter,
57 description=__doc__)
58
59 parser.add_argument(
60 "-m", "--maintainers",
61 metavar="MAINTAINERS_FILE",
62 help="Maintainers file to load. If not specified, MAINTAINERS.yml in "
63 "the top-level repository directory is used, and must exist. "
64 "Paths in the maintainers file will always be taken as relative "
65 "to the top-level directory.")
66
67 subparsers = parser.add_subparsers(
68 help="Available commands (each has a separate --help text)")
69
70 id_parser = subparsers.add_parser(
71 "path",
72 help="List area(s) for paths")
73 id_parser.add_argument(
74 "paths",
75 metavar="PATH",
76 nargs="*",
77 help="Path to list areas for")
78 id_parser.set_defaults(cmd_fn=Maintainers._path_cmd)
79
80 commits_parser = subparsers.add_parser(
81 "commits",
82 help="List area(s) for commit range")
83 commits_parser.add_argument(
84 "commits",
85 metavar="COMMIT_RANGE",
86 nargs="*",
87 help="Commit range to list areas for (default: HEAD~..)")
88 commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd)
89
90 list_parser = subparsers.add_parser(
91 "list",
92 help="List files in areas")
93 list_parser.add_argument(
94 "area",
95 metavar="AREA",
96 nargs="?",
97 help="Name of area to list files in. If not specified, all "
98 "non-orphaned files are listed (all files that do not appear in "
99 "any area).")
100 list_parser.set_defaults(cmd_fn=Maintainers._list_cmd)
101
102 areas_parser = subparsers.add_parser(
103 "areas",
104 help="List areas and maintainers")
105 areas_parser.add_argument(
106 "maintainer",
107 metavar="MAINTAINER",
108 nargs="?",
109 help="List all areas maintained by maintaier.")
110
111 areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd)
112
113 orphaned_parser = subparsers.add_parser(
114 "orphaned",
115 help="List orphaned files (files that do not appear in any area)")
116 orphaned_parser.add_argument(
117 "path",
118 metavar="PATH",
119 nargs="?",
120 help="Limit to files under PATH")
121 orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd)
122
123 args = parser.parse_args()
124 if not hasattr(args, "cmd_fn"):
125 # Called without a subcommand
126 sys.exit(parser.format_usage().rstrip())
127
128 return args
129
130
131class Maintainers:
132 """
133 Represents the contents of a maintainers YAML file.
134
135 These attributes are available:
136
137 areas:
138 A dictionary that maps area names to Area instances, for all areas
139 defined in the maintainers file
140
141 filename:
142 The path to the maintainers file
143 """
144 def __init__(self, filename=None):
145 """
146 Creates a Maintainers instance.
147
148 filename (default: None):
149 Path to the maintainers file to parse. If None, MAINTAINERS.yml in
150 the top-level directory of the Git repository is used, and must
151 exist.
152 """
153 self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel"))
154
155 if filename is None:
156 self.filename = self._toplevel / "MAINTAINERS.yml"
157 else:
158 self.filename = pathlib.Path(filename)
159
160 self.areas = {}
161 for area_name, area_dict in _load_maintainers(self.filename).items():
162 area = Area()
163 area.name = area_name
164 area.status = area_dict.get("status")
165 area.maintainers = area_dict.get("maintainers", [])
166 area.collaborators = area_dict.get("collaborators", [])
167 area.inform = area_dict.get("inform", [])
168 area.labels = area_dict.get("labels", [])
169 area.description = area_dict.get("description")
170
171 # area._match_fn(path) tests if the path matches files and/or
172 # files-regex
173 area._match_fn = \
174 _get_match_fn(area_dict.get("files"),
175 area_dict.get("files-regex"))
176
177 # Like area._match_fn(path), but for files-exclude and
178 # files-regex-exclude
179 area._exclude_match_fn = \
180 _get_match_fn(area_dict.get("files-exclude"),
181 area_dict.get("files-regex-exclude"))
182
183 self.areas[area_name] = area
184
185 def path2areas(self, path):
186 """
187 Returns a list of Area instances for the areas that contain 'path',
188 taken as relative to the current directory
189 """
190 # Make directory paths end in '/' so that foo/bar matches foo/bar/.
191 # Skip this check in _contains() itself, because the isdir() makes it
192 # twice as slow in cases where it's not needed.
193 is_dir = os.path.isdir(path)
194
195 # Make 'path' relative to the repository root and normalize it.
196 # normpath() would remove a trailing '/', so we add it afterwards.
197 path = os.path.normpath(os.path.join(
198 os.path.relpath(os.getcwd(), self._toplevel),
199 path))
200
201 if is_dir:
202 path += "/"
203
204 return [area for area in self.areas.values()
205 if area._contains(path)]
206
207 def commits2areas(self, commits):
208 """
209 Returns a set() of Area instances for the areas that contain files that
210 are modified by the commit range in 'commits'. 'commits' could be e.g.
211 "HEAD~..", to inspect the tip commit
212 """
213 res = set()
214 # Final '--' is to make sure 'commits' is interpreted as a commit range
215 # rather than a path. That might give better error messages.
216 for path in _git("diff", "--name-only", commits, "--").splitlines():
217 res.update(self.path2areas(path))
218 return res
219
220 def __repr__(self):
221 return "<Maintainers for '{}'>".format(self.filename)
222
223 #
224 # Command-line subcommands
225 #
226
227 def _path_cmd(self, args):
228 # 'path' subcommand implementation
229
230 for path in args.paths:
231 if not os.path.exists(path):
232 _serr("'{}': no such file or directory".format(path))
233
234 res = set()
235 orphaned = []
236 for path in args.paths:
237 areas = self.path2areas(path)
238 res.update(areas)
239 if not areas:
240 orphaned.append(path)
241
242 _print_areas(res)
243 if orphaned:
244 if res:
245 print()
246 print("Orphaned paths (not in any area):\n" + "\n".join(orphaned))
247
248 def _commits_cmd(self, args):
249 # 'commits' subcommand implementation
250
251 commits = args.commits or ("HEAD~..",)
252 _print_areas({area for commit_range in commits
253 for area in self.commits2areas(commit_range)})
254
255 def _areas_cmd(self, args):
256 # 'areas' subcommand implementation
257 for area in self.areas.values():
258 if args.maintainer:
259 if args.maintainer in area.maintainers:
260 print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
261 else:
262 print("{:25}\t{}".format(area.name, ",".join(area.maintainers)))
263
264 def _list_cmd(self, args):
265 # 'list' subcommand implementation
266
267 if args.area is None:
268 # List all files that appear in some area
269 for path in _ls_files():
270 for area in self.areas.values():
271 if area._contains(path):
272 print(path)
273 break
274 else:
275 # List all files that appear in the given area
276 area = self.areas.get(args.area)
277 if area is None:
278 _serr("'{}': no such area defined in '{}'"
279 .format(args.area, self.filename))
280
281 for path in _ls_files():
282 if area._contains(path):
283 print(path)
284
285 def _orphaned_cmd(self, args):
286 # 'orphaned' subcommand implementation
287
288 if args.path is not None and not os.path.exists(args.path):
289 _serr("'{}': no such file or directory".format(args.path))
290
291 for path in _ls_files(args.path):
292 for area in self.areas.values():
293 if area._contains(path):
294 break
295 else:
296 print(path) # We get here if we never hit the 'break'
297
298
299class Area:
300 """
301 Represents an entry for an area in MAINTAINERS.yml.
302
303 These attributes are available:
304
305 status:
306 The status of the area, as a string. None if the area has no 'status'
307 key. See MAINTAINERS.yml.
308
309 maintainers:
310 List of maintainers. Empty if the area has no 'maintainers' key.
311
312 collaborators:
313 List of collaborators. Empty if the area has no 'collaborators' key.
314
315 inform:
316 List of people to inform on pull requests. Empty if the area has no
317 'inform' key.
318
319 labels:
320 List of GitHub labels for the area. Empty if the area has no 'labels'
321 key.
322
323 description:
324 Text from 'description' key, or None if the area has no 'description'
325 key
326 """
327 def _contains(self, path):
328 # Returns True if the area contains 'path', and False otherwise
329
330 return self._match_fn and self._match_fn(path) and not \
331 (self._exclude_match_fn and self._exclude_match_fn(path))
332
333 def __repr__(self):
334 return "<Area {}>".format(self.name)
335
336
337def _print_areas(areas):
338 first = True
339 for area in sorted(areas, key=operator.attrgetter("name")):
340 if not first:
341 print()
342 first = False
343
344 print("""\
345{}
346\tstatus: {}
347\tmaintainers: {}
348\tcollaborators: {}
349\tinform: {}
350\tlabels: {}
351\tdescription: {}""".format(area.name,
352 area.status,
353 ", ".join(area.maintainers),
354 ", ".join(area.collaborators),
355 ", ".join(area.inform),
356 ", ".join(area.labels),
357 area.description or ""))
358
359
360def _get_match_fn(globs, regexes):
361 # Constructs a single regex that tests for matches against the globs in
362 # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
363 # Returns the search() method of the compiled regex.
364 #
365 # Returns None if there are neither globs nor regexes, which should be
366 # interpreted as no match.
367
368 if not (globs or regexes):
369 return None
370
371 regex = ""
372
373 if globs:
374 glob_regexes = []
375 for glob in globs:
376 # Construct a regex equivalent to the glob
377 glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
378 .replace("?", "[^/]")
379
380 if not glob.endswith("/"):
381 # Require a full match for globs that don't end in /
382 glob_regex += "$"
383
384 glob_regexes.append(glob_regex)
385
386 # The glob regexes must anchor to the beginning of the path, since we
387 # return search(). (?:) is a non-capturing group.
388 regex += "^(?:{})".format("|".join(glob_regexes))
389
390 if regexes:
391 if regex:
392 regex += "|"
393 regex += "|".join(regexes)
394
395 return re.compile(regex).search
396
397
398def _load_maintainers(path):
399 # Returns the parsed contents of the maintainers file 'filename', also
400 # running checks on the contents. The returned format is plain Python
401 # dicts/lists/etc., mirroring the structure of the file.
402
403 with open(path, encoding="utf-8") as f:
404 try:
405 yaml = load(f, Loader=Loader)
406 except YAMLError as e:
407 raise MaintainersError("{}: YAML error: {}".format(path, e))
408
409 _check_maintainers(path, yaml)
410 return yaml
411
412
413def _check_maintainers(maints_path, yaml):
414 # Checks the maintainers data in 'yaml', which comes from the maintainers
415 # file at maints_path, which is a pathlib.Path instance
416
417 root = maints_path.parent
418
419 def ferr(msg):
420 _err("{}: {}".format(maints_path, msg)) # Prepend the filename
421
422 if not isinstance(yaml, dict):
423 ferr("empty or malformed YAML (not a dict)")
424
425 ok_keys = {"status", "maintainers", "collaborators", "inform", "files",
426 "files-exclude", "files-regex", "files-regex-exclude",
427 "labels", "description"}
428
429 ok_status = {"maintained", "odd fixes", "orphaned", "obsolete"}
430 ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages
431
432 for area_name, area_dict in yaml.items():
433 if not isinstance(area_dict, dict):
434 ferr("malformed entry for area '{}' (not a dict)"
435 .format(area_name))
436
437 for key in area_dict:
438 if key not in ok_keys:
439 ferr("unknown key '{}' in area '{}'"
440 .format(key, area_name))
441
442 if "status" in area_dict and \
443 area_dict["status"] not in ok_status:
444 ferr("bad 'status' key on area '{}', should be one of {}"
445 .format(area_name, ok_status_s))
446
447 if not area_dict.keys() & {"files", "files-regex"}:
448 ferr("either 'files' or 'files-regex' (or both) must be specified "
449 "for area '{}'".format(area_name))
450
451 for list_name in "maintainers", "collaborators", "inform", "files", \
452 "files-regex", "labels":
453 if list_name in area_dict:
454 lst = area_dict[list_name]
455 if not (isinstance(lst, list) and
456 all(isinstance(elm, str) for elm in lst)):
457 ferr("malformed '{}' value for area '{}' -- should "
458 "be a list of strings".format(list_name, area_name))
459
460 for files_key in "files", "files-exclude":
461 if files_key in area_dict:
462 for glob_pattern in area_dict[files_key]:
463 # This could be changed if it turns out to be too slow,
464 # e.g. to only check non-globbing filenames. The tuple() is
465 # needed due to pathlib's glob() returning a generator.
466 paths = tuple(root.glob(glob_pattern))
467 if not paths:
468 ferr("glob pattern '{}' in '{}' in area '{}' does not "
469 "match any files".format(glob_pattern, files_key,
470 area_name))
471 if not glob_pattern.endswith("/"):
472 for path in paths:
473 if path.is_dir():
474 ferr("glob pattern '{}' in '{}' in area '{}' "
475 "matches a directory, but has no "
476 "trailing '/'"
477 .format(glob_pattern, files_key,
478 area_name))
479
480 for files_regex_key in "files-regex", "files-regex-exclude":
481 if files_regex_key in area_dict:
482 for regex in area_dict[files_regex_key]:
483 try:
484 re.compile(regex)
485 except re.error as e:
486 ferr("bad regular expression '{}' in '{}' in "
487 "'{}': {}".format(regex, files_regex_key,
488 area_name, e.msg))
489
490 if "description" in area_dict and \
491 not isinstance(area_dict["description"], str):
492 ferr("malformed 'description' value for area '{}' -- should be a "
493 "string".format(area_name))
494
495
496def _git(*args):
497 # Helper for running a Git command. Returns the rstrip()ed stdout output.
498 # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on
499 # errors.
500
501 git_cmd = ("git",) + args
502 git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors
503
504 try:
505 git_process = subprocess.Popen(
506 git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
507 except FileNotFoundError:
508 _giterr("git executable not found (when running '{}'). Check that "
509 "it's in listed in the PATH environment variable"
510 .format(git_cmd_s))
511 except OSError as e:
512 _giterr("error running '{}': {}".format(git_cmd_s, e))
513
514 stdout, stderr = git_process.communicate()
515 if git_process.returncode:
516 _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format(
517 git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8")))
518
519 return stdout.decode("utf-8").rstrip()
520
521
522def _ls_files(path=None):
523 cmd = ["ls-files"]
524 if path is not None:
525 cmd.append(path)
526 return _git(*cmd).splitlines()
527
528
529def _err(msg):
530 raise MaintainersError(msg)
531
532
533def _giterr(msg):
534 raise GitError(msg)
535
536
537def _serr(msg):
538 # For reporting errors when get_maintainer.py is run as a script.
539 # sys.exit() shouldn't be used otherwise.
540 sys.exit("{}: error: {}".format(sys.argv[0], msg))
541
542
543class MaintainersError(Exception):
544 "Exception raised for MAINTAINERS.yml-related errors"
545
546
547class GitError(Exception):
548 "Exception raised for Git-related errors"
549
550
551if __name__ == "__main__":
552 _main()