Anas Nashif | 8d8875b | 2020-04-27 14:16:59 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | # Copyright (c) 2019 Nordic Semiconductor ASA |
| 4 | # SPDX-License-Identifier: Apache-2.0 |
| 5 | |
| 6 | """ |
| 7 | Lists maintainers for files or commits. Similar in function to |
| 8 | scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is |
| 9 | in MAINTAINERS.yml. |
| 10 | |
| 11 | The comment at the top of MAINTAINERS.yml in Zephyr documents the file format. |
| 12 | |
| 13 | See the help texts for the various subcommands for more information. They can |
| 14 | be viewed with e.g. |
| 15 | |
| 16 | ./get_maintainer.py path --help |
| 17 | |
| 18 | This executable doubles as a Python library. Identifiers not prefixed with '_' |
| 19 | are part of the library API. The library documentation can be viewed with this |
| 20 | command: |
| 21 | |
| 22 | $ pydoc get_maintainer |
| 23 | """ |
| 24 | |
| 25 | import argparse |
| 26 | import operator |
| 27 | import os |
| 28 | import pathlib |
| 29 | import re |
| 30 | import shlex |
| 31 | import subprocess |
| 32 | import sys |
| 33 | |
| 34 | from yaml import load, YAMLError |
| 35 | try: |
| 36 | # Use the speedier C LibYAML parser if available |
| 37 | from yaml import CLoader as Loader |
| 38 | except ImportError: |
| 39 | from yaml import Loader |
| 40 | |
| 41 | |
| 42 | def _main(): |
| 43 | # Entry point when run as an executable |
| 44 | |
| 45 | args = _parse_args() |
| 46 | try: |
| 47 | args.cmd_fn(Maintainers(args.maintainers), args) |
| 48 | except (MaintainersError, GitError) as e: |
| 49 | _serr(e) |
| 50 | |
| 51 | |
| 52 | def _parse_args(): |
| 53 | # Parses arguments when run as an executable |
| 54 | |
| 55 | parser = argparse.ArgumentParser( |
| 56 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 57 | description=__doc__) |
| 58 | |
| 59 | parser.add_argument( |
| 60 | "-m", "--maintainers", |
| 61 | metavar="MAINTAINERS_FILE", |
| 62 | help="Maintainers file to load. If not specified, MAINTAINERS.yml in " |
| 63 | "the top-level repository directory is used, and must exist. " |
| 64 | "Paths in the maintainers file will always be taken as relative " |
| 65 | "to the top-level directory.") |
| 66 | |
| 67 | subparsers = parser.add_subparsers( |
| 68 | help="Available commands (each has a separate --help text)") |
| 69 | |
| 70 | id_parser = subparsers.add_parser( |
| 71 | "path", |
| 72 | help="List area(s) for paths") |
| 73 | id_parser.add_argument( |
| 74 | "paths", |
| 75 | metavar="PATH", |
| 76 | nargs="*", |
| 77 | help="Path to list areas for") |
| 78 | id_parser.set_defaults(cmd_fn=Maintainers._path_cmd) |
| 79 | |
| 80 | commits_parser = subparsers.add_parser( |
| 81 | "commits", |
| 82 | help="List area(s) for commit range") |
| 83 | commits_parser.add_argument( |
| 84 | "commits", |
| 85 | metavar="COMMIT_RANGE", |
| 86 | nargs="*", |
| 87 | help="Commit range to list areas for (default: HEAD~..)") |
| 88 | commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd) |
| 89 | |
| 90 | list_parser = subparsers.add_parser( |
| 91 | "list", |
| 92 | help="List files in areas") |
| 93 | list_parser.add_argument( |
| 94 | "area", |
| 95 | metavar="AREA", |
| 96 | nargs="?", |
| 97 | help="Name of area to list files in. If not specified, all " |
| 98 | "non-orphaned files are listed (all files that do not appear in " |
| 99 | "any area).") |
| 100 | list_parser.set_defaults(cmd_fn=Maintainers._list_cmd) |
| 101 | |
| 102 | areas_parser = subparsers.add_parser( |
| 103 | "areas", |
| 104 | help="List areas and maintainers") |
| 105 | areas_parser.add_argument( |
| 106 | "maintainer", |
| 107 | metavar="MAINTAINER", |
| 108 | nargs="?", |
| 109 | help="List all areas maintained by maintaier.") |
| 110 | |
| 111 | areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd) |
| 112 | |
| 113 | orphaned_parser = subparsers.add_parser( |
| 114 | "orphaned", |
| 115 | help="List orphaned files (files that do not appear in any area)") |
| 116 | orphaned_parser.add_argument( |
| 117 | "path", |
| 118 | metavar="PATH", |
| 119 | nargs="?", |
| 120 | help="Limit to files under PATH") |
| 121 | orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd) |
| 122 | |
| 123 | args = parser.parse_args() |
| 124 | if not hasattr(args, "cmd_fn"): |
| 125 | # Called without a subcommand |
| 126 | sys.exit(parser.format_usage().rstrip()) |
| 127 | |
| 128 | return args |
| 129 | |
| 130 | |
| 131 | class Maintainers: |
| 132 | """ |
| 133 | Represents the contents of a maintainers YAML file. |
| 134 | |
| 135 | These attributes are available: |
| 136 | |
| 137 | areas: |
| 138 | A dictionary that maps area names to Area instances, for all areas |
| 139 | defined in the maintainers file |
| 140 | |
| 141 | filename: |
| 142 | The path to the maintainers file |
| 143 | """ |
| 144 | def __init__(self, filename=None): |
| 145 | """ |
| 146 | Creates a Maintainers instance. |
| 147 | |
| 148 | filename (default: None): |
| 149 | Path to the maintainers file to parse. If None, MAINTAINERS.yml in |
| 150 | the top-level directory of the Git repository is used, and must |
| 151 | exist. |
| 152 | """ |
| 153 | self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel")) |
| 154 | |
| 155 | if filename is None: |
| 156 | self.filename = self._toplevel / "MAINTAINERS.yml" |
| 157 | else: |
| 158 | self.filename = pathlib.Path(filename) |
| 159 | |
| 160 | self.areas = {} |
| 161 | for area_name, area_dict in _load_maintainers(self.filename).items(): |
| 162 | area = Area() |
| 163 | area.name = area_name |
| 164 | area.status = area_dict.get("status") |
| 165 | area.maintainers = area_dict.get("maintainers", []) |
| 166 | area.collaborators = area_dict.get("collaborators", []) |
| 167 | area.inform = area_dict.get("inform", []) |
| 168 | area.labels = area_dict.get("labels", []) |
| 169 | area.description = area_dict.get("description") |
| 170 | |
| 171 | # area._match_fn(path) tests if the path matches files and/or |
| 172 | # files-regex |
| 173 | area._match_fn = \ |
| 174 | _get_match_fn(area_dict.get("files"), |
| 175 | area_dict.get("files-regex")) |
| 176 | |
| 177 | # Like area._match_fn(path), but for files-exclude and |
| 178 | # files-regex-exclude |
| 179 | area._exclude_match_fn = \ |
| 180 | _get_match_fn(area_dict.get("files-exclude"), |
| 181 | area_dict.get("files-regex-exclude")) |
| 182 | |
| 183 | self.areas[area_name] = area |
| 184 | |
| 185 | def path2areas(self, path): |
| 186 | """ |
| 187 | Returns a list of Area instances for the areas that contain 'path', |
| 188 | taken as relative to the current directory |
| 189 | """ |
| 190 | # Make directory paths end in '/' so that foo/bar matches foo/bar/. |
| 191 | # Skip this check in _contains() itself, because the isdir() makes it |
| 192 | # twice as slow in cases where it's not needed. |
| 193 | is_dir = os.path.isdir(path) |
| 194 | |
| 195 | # Make 'path' relative to the repository root and normalize it. |
| 196 | # normpath() would remove a trailing '/', so we add it afterwards. |
| 197 | path = os.path.normpath(os.path.join( |
| 198 | os.path.relpath(os.getcwd(), self._toplevel), |
| 199 | path)) |
| 200 | |
| 201 | if is_dir: |
| 202 | path += "/" |
| 203 | |
| 204 | return [area for area in self.areas.values() |
| 205 | if area._contains(path)] |
| 206 | |
| 207 | def commits2areas(self, commits): |
| 208 | """ |
| 209 | Returns a set() of Area instances for the areas that contain files that |
| 210 | are modified by the commit range in 'commits'. 'commits' could be e.g. |
| 211 | "HEAD~..", to inspect the tip commit |
| 212 | """ |
| 213 | res = set() |
| 214 | # Final '--' is to make sure 'commits' is interpreted as a commit range |
| 215 | # rather than a path. That might give better error messages. |
| 216 | for path in _git("diff", "--name-only", commits, "--").splitlines(): |
| 217 | res.update(self.path2areas(path)) |
| 218 | return res |
| 219 | |
| 220 | def __repr__(self): |
| 221 | return "<Maintainers for '{}'>".format(self.filename) |
| 222 | |
| 223 | # |
| 224 | # Command-line subcommands |
| 225 | # |
| 226 | |
| 227 | def _path_cmd(self, args): |
| 228 | # 'path' subcommand implementation |
| 229 | |
| 230 | for path in args.paths: |
| 231 | if not os.path.exists(path): |
| 232 | _serr("'{}': no such file or directory".format(path)) |
| 233 | |
| 234 | res = set() |
| 235 | orphaned = [] |
| 236 | for path in args.paths: |
| 237 | areas = self.path2areas(path) |
| 238 | res.update(areas) |
| 239 | if not areas: |
| 240 | orphaned.append(path) |
| 241 | |
| 242 | _print_areas(res) |
| 243 | if orphaned: |
| 244 | if res: |
| 245 | print() |
| 246 | print("Orphaned paths (not in any area):\n" + "\n".join(orphaned)) |
| 247 | |
| 248 | def _commits_cmd(self, args): |
| 249 | # 'commits' subcommand implementation |
| 250 | |
| 251 | commits = args.commits or ("HEAD~..",) |
| 252 | _print_areas({area for commit_range in commits |
| 253 | for area in self.commits2areas(commit_range)}) |
| 254 | |
| 255 | def _areas_cmd(self, args): |
| 256 | # 'areas' subcommand implementation |
| 257 | for area in self.areas.values(): |
| 258 | if args.maintainer: |
| 259 | if args.maintainer in area.maintainers: |
| 260 | print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) |
| 261 | else: |
| 262 | print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) |
| 263 | |
| 264 | def _list_cmd(self, args): |
| 265 | # 'list' subcommand implementation |
| 266 | |
| 267 | if args.area is None: |
| 268 | # List all files that appear in some area |
| 269 | for path in _ls_files(): |
| 270 | for area in self.areas.values(): |
| 271 | if area._contains(path): |
| 272 | print(path) |
| 273 | break |
| 274 | else: |
| 275 | # List all files that appear in the given area |
| 276 | area = self.areas.get(args.area) |
| 277 | if area is None: |
| 278 | _serr("'{}': no such area defined in '{}'" |
| 279 | .format(args.area, self.filename)) |
| 280 | |
| 281 | for path in _ls_files(): |
| 282 | if area._contains(path): |
| 283 | print(path) |
| 284 | |
| 285 | def _orphaned_cmd(self, args): |
| 286 | # 'orphaned' subcommand implementation |
| 287 | |
| 288 | if args.path is not None and not os.path.exists(args.path): |
| 289 | _serr("'{}': no such file or directory".format(args.path)) |
| 290 | |
| 291 | for path in _ls_files(args.path): |
| 292 | for area in self.areas.values(): |
| 293 | if area._contains(path): |
| 294 | break |
| 295 | else: |
| 296 | print(path) # We get here if we never hit the 'break' |
| 297 | |
| 298 | |
| 299 | class Area: |
| 300 | """ |
| 301 | Represents an entry for an area in MAINTAINERS.yml. |
| 302 | |
| 303 | These attributes are available: |
| 304 | |
| 305 | status: |
| 306 | The status of the area, as a string. None if the area has no 'status' |
| 307 | key. See MAINTAINERS.yml. |
| 308 | |
| 309 | maintainers: |
| 310 | List of maintainers. Empty if the area has no 'maintainers' key. |
| 311 | |
| 312 | collaborators: |
| 313 | List of collaborators. Empty if the area has no 'collaborators' key. |
| 314 | |
| 315 | inform: |
| 316 | List of people to inform on pull requests. Empty if the area has no |
| 317 | 'inform' key. |
| 318 | |
| 319 | labels: |
| 320 | List of GitHub labels for the area. Empty if the area has no 'labels' |
| 321 | key. |
| 322 | |
| 323 | description: |
| 324 | Text from 'description' key, or None if the area has no 'description' |
| 325 | key |
| 326 | """ |
| 327 | def _contains(self, path): |
| 328 | # Returns True if the area contains 'path', and False otherwise |
| 329 | |
| 330 | return self._match_fn and self._match_fn(path) and not \ |
| 331 | (self._exclude_match_fn and self._exclude_match_fn(path)) |
| 332 | |
| 333 | def __repr__(self): |
| 334 | return "<Area {}>".format(self.name) |
| 335 | |
| 336 | |
| 337 | def _print_areas(areas): |
| 338 | first = True |
| 339 | for area in sorted(areas, key=operator.attrgetter("name")): |
| 340 | if not first: |
| 341 | print() |
| 342 | first = False |
| 343 | |
| 344 | print("""\ |
| 345 | {} |
| 346 | \tstatus: {} |
| 347 | \tmaintainers: {} |
| 348 | \tcollaborators: {} |
| 349 | \tinform: {} |
| 350 | \tlabels: {} |
| 351 | \tdescription: {}""".format(area.name, |
| 352 | area.status, |
| 353 | ", ".join(area.maintainers), |
| 354 | ", ".join(area.collaborators), |
| 355 | ", ".join(area.inform), |
| 356 | ", ".join(area.labels), |
| 357 | area.description or "")) |
| 358 | |
| 359 | |
| 360 | def _get_match_fn(globs, regexes): |
| 361 | # Constructs a single regex that tests for matches against the globs in |
| 362 | # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR). |
| 363 | # Returns the search() method of the compiled regex. |
| 364 | # |
| 365 | # Returns None if there are neither globs nor regexes, which should be |
| 366 | # interpreted as no match. |
| 367 | |
| 368 | if not (globs or regexes): |
| 369 | return None |
| 370 | |
| 371 | regex = "" |
| 372 | |
| 373 | if globs: |
| 374 | glob_regexes = [] |
| 375 | for glob in globs: |
| 376 | # Construct a regex equivalent to the glob |
| 377 | glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \ |
| 378 | .replace("?", "[^/]") |
| 379 | |
| 380 | if not glob.endswith("/"): |
| 381 | # Require a full match for globs that don't end in / |
| 382 | glob_regex += "$" |
| 383 | |
| 384 | glob_regexes.append(glob_regex) |
| 385 | |
| 386 | # The glob regexes must anchor to the beginning of the path, since we |
| 387 | # return search(). (?:) is a non-capturing group. |
| 388 | regex += "^(?:{})".format("|".join(glob_regexes)) |
| 389 | |
| 390 | if regexes: |
| 391 | if regex: |
| 392 | regex += "|" |
| 393 | regex += "|".join(regexes) |
| 394 | |
| 395 | return re.compile(regex).search |
| 396 | |
| 397 | |
| 398 | def _load_maintainers(path): |
| 399 | # Returns the parsed contents of the maintainers file 'filename', also |
| 400 | # running checks on the contents. The returned format is plain Python |
| 401 | # dicts/lists/etc., mirroring the structure of the file. |
| 402 | |
| 403 | with open(path, encoding="utf-8") as f: |
| 404 | try: |
| 405 | yaml = load(f, Loader=Loader) |
| 406 | except YAMLError as e: |
| 407 | raise MaintainersError("{}: YAML error: {}".format(path, e)) |
| 408 | |
| 409 | _check_maintainers(path, yaml) |
| 410 | return yaml |
| 411 | |
| 412 | |
| 413 | def _check_maintainers(maints_path, yaml): |
| 414 | # Checks the maintainers data in 'yaml', which comes from the maintainers |
| 415 | # file at maints_path, which is a pathlib.Path instance |
| 416 | |
| 417 | root = maints_path.parent |
| 418 | |
| 419 | def ferr(msg): |
| 420 | _err("{}: {}".format(maints_path, msg)) # Prepend the filename |
| 421 | |
| 422 | if not isinstance(yaml, dict): |
| 423 | ferr("empty or malformed YAML (not a dict)") |
| 424 | |
| 425 | ok_keys = {"status", "maintainers", "collaborators", "inform", "files", |
| 426 | "files-exclude", "files-regex", "files-regex-exclude", |
| 427 | "labels", "description"} |
| 428 | |
| 429 | ok_status = {"maintained", "odd fixes", "orphaned", "obsolete"} |
| 430 | ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages |
| 431 | |
| 432 | for area_name, area_dict in yaml.items(): |
| 433 | if not isinstance(area_dict, dict): |
| 434 | ferr("malformed entry for area '{}' (not a dict)" |
| 435 | .format(area_name)) |
| 436 | |
| 437 | for key in area_dict: |
| 438 | if key not in ok_keys: |
| 439 | ferr("unknown key '{}' in area '{}'" |
| 440 | .format(key, area_name)) |
| 441 | |
| 442 | if "status" in area_dict and \ |
| 443 | area_dict["status"] not in ok_status: |
| 444 | ferr("bad 'status' key on area '{}', should be one of {}" |
| 445 | .format(area_name, ok_status_s)) |
| 446 | |
| 447 | if not area_dict.keys() & {"files", "files-regex"}: |
| 448 | ferr("either 'files' or 'files-regex' (or both) must be specified " |
| 449 | "for area '{}'".format(area_name)) |
| 450 | |
| 451 | for list_name in "maintainers", "collaborators", "inform", "files", \ |
| 452 | "files-regex", "labels": |
| 453 | if list_name in area_dict: |
| 454 | lst = area_dict[list_name] |
| 455 | if not (isinstance(lst, list) and |
| 456 | all(isinstance(elm, str) for elm in lst)): |
| 457 | ferr("malformed '{}' value for area '{}' -- should " |
| 458 | "be a list of strings".format(list_name, area_name)) |
| 459 | |
| 460 | for files_key in "files", "files-exclude": |
| 461 | if files_key in area_dict: |
| 462 | for glob_pattern in area_dict[files_key]: |
| 463 | # This could be changed if it turns out to be too slow, |
| 464 | # e.g. to only check non-globbing filenames. The tuple() is |
| 465 | # needed due to pathlib's glob() returning a generator. |
| 466 | paths = tuple(root.glob(glob_pattern)) |
| 467 | if not paths: |
| 468 | ferr("glob pattern '{}' in '{}' in area '{}' does not " |
| 469 | "match any files".format(glob_pattern, files_key, |
| 470 | area_name)) |
| 471 | if not glob_pattern.endswith("/"): |
| 472 | for path in paths: |
| 473 | if path.is_dir(): |
| 474 | ferr("glob pattern '{}' in '{}' in area '{}' " |
| 475 | "matches a directory, but has no " |
| 476 | "trailing '/'" |
| 477 | .format(glob_pattern, files_key, |
| 478 | area_name)) |
| 479 | |
| 480 | for files_regex_key in "files-regex", "files-regex-exclude": |
| 481 | if files_regex_key in area_dict: |
| 482 | for regex in area_dict[files_regex_key]: |
| 483 | try: |
| 484 | re.compile(regex) |
| 485 | except re.error as e: |
| 486 | ferr("bad regular expression '{}' in '{}' in " |
| 487 | "'{}': {}".format(regex, files_regex_key, |
| 488 | area_name, e.msg)) |
| 489 | |
| 490 | if "description" in area_dict and \ |
| 491 | not isinstance(area_dict["description"], str): |
| 492 | ferr("malformed 'description' value for area '{}' -- should be a " |
| 493 | "string".format(area_name)) |
| 494 | |
| 495 | |
| 496 | def _git(*args): |
| 497 | # Helper for running a Git command. Returns the rstrip()ed stdout output. |
| 498 | # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on |
| 499 | # errors. |
| 500 | |
| 501 | git_cmd = ("git",) + args |
| 502 | git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors |
| 503 | |
| 504 | try: |
| 505 | git_process = subprocess.Popen( |
| 506 | git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 507 | except FileNotFoundError: |
| 508 | _giterr("git executable not found (when running '{}'). Check that " |
| 509 | "it's in listed in the PATH environment variable" |
| 510 | .format(git_cmd_s)) |
| 511 | except OSError as e: |
| 512 | _giterr("error running '{}': {}".format(git_cmd_s, e)) |
| 513 | |
| 514 | stdout, stderr = git_process.communicate() |
| 515 | if git_process.returncode: |
| 516 | _giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format( |
| 517 | git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8"))) |
| 518 | |
| 519 | return stdout.decode("utf-8").rstrip() |
| 520 | |
| 521 | |
| 522 | def _ls_files(path=None): |
| 523 | cmd = ["ls-files"] |
| 524 | if path is not None: |
| 525 | cmd.append(path) |
| 526 | return _git(*cmd).splitlines() |
| 527 | |
| 528 | |
| 529 | def _err(msg): |
| 530 | raise MaintainersError(msg) |
| 531 | |
| 532 | |
| 533 | def _giterr(msg): |
| 534 | raise GitError(msg) |
| 535 | |
| 536 | |
| 537 | def _serr(msg): |
| 538 | # For reporting errors when get_maintainer.py is run as a script. |
| 539 | # sys.exit() shouldn't be used otherwise. |
| 540 | sys.exit("{}: error: {}".format(sys.argv[0], msg)) |
| 541 | |
| 542 | |
| 543 | class MaintainersError(Exception): |
| 544 | "Exception raised for MAINTAINERS.yml-related errors" |
| 545 | |
| 546 | |
| 547 | class GitError(Exception): |
| 548 | "Exception raised for Git-related errors" |
| 549 | |
| 550 | |
| 551 | if __name__ == "__main__": |
| 552 | _main() |