| # Copyright 2023 The Bazel Authors. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import argparse |
| import json |
| import pathlib |
| import re |
| import sys |
| import zipfile |
| |
| |
| # Generator is the modules_mapping.json file generator. |
| class Generator: |
| stderr = None |
| output_file = None |
| excluded_patterns = None |
| mapping = {} |
| |
| def __init__(self, stderr, output_file, excluded_patterns): |
| self.stderr = stderr |
| self.output_file = output_file |
| self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns] |
| |
| # dig_wheel analyses the wheel .whl file determining the modules it provides |
| # by looking at the directory structure. |
| def dig_wheel(self, whl): |
| with zipfile.ZipFile(whl, "r") as zip_file: |
| for path in zip_file.namelist(): |
| if is_metadata(path): |
| if data_has_purelib_or_platlib(path): |
| self.module_for_path(path, whl) |
| else: |
| continue |
| else: |
| self.module_for_path(path, whl) |
| |
| def simplify(self): |
| simplified = {} |
| for module, wheel_name in sorted(self.mapping.items(), key=lambda x: x[0]): |
| mod = module |
| while True: |
| if mod in simplified: |
| if simplified[mod] != wheel_name: |
| break |
| wheel_name = "" |
| break |
| if mod.count(".") == 0: |
| break |
| mod = mod.rsplit(".", 1)[0] |
| if wheel_name: |
| simplified[module] = wheel_name |
| self.mapping = simplified |
| |
| def module_for_path(self, path, whl): |
| ext = pathlib.Path(path).suffix |
| if ext == ".py" or ext == ".so": |
| if "purelib" in path or "platlib" in path: |
| root = "/".join(path.split("/")[2:]) |
| else: |
| root = path |
| |
| wheel_name = get_wheel_name(whl) |
| |
| if root.endswith("/__init__.py"): |
| # Note the '/' here means that the __init__.py is not in the |
| # root of the wheel, therefore we can index the directory |
| # where this file is as an importable package. |
| module = root[: -len("/__init__.py")].replace("/", ".") |
| if not self.is_excluded(module): |
| self.mapping[module] = wheel_name |
| |
| # Always index the module file. |
| if ext == ".so": |
| # Also remove extra metadata that is embeded as part of |
| # the file name as an extra extension. |
| ext = "".join(pathlib.Path(root).suffixes) |
| module = root[: -len(ext)].replace("/", ".") |
| if not self.is_excluded(module): |
| if not self.is_excluded(module): |
| self.mapping[module] = wheel_name |
| |
| def is_excluded(self, module): |
| for pattern in self.excluded_patterns: |
| if pattern.search(module): |
| return True |
| return False |
| |
| # run is the entrypoint for the generator. |
| def run(self, wheels): |
| for whl in wheels: |
| try: |
| self.dig_wheel(whl) |
| except AssertionError as error: |
| print(error, file=self.stderr) |
| return 1 |
| self.simplify() |
| mapping_json = json.dumps(self.mapping) |
| with open(self.output_file, "w") as f: |
| f.write(mapping_json) |
| return 0 |
| |
| |
| def get_wheel_name(path): |
| pp = pathlib.PurePath(path) |
| if pp.suffix != ".whl": |
| raise RuntimeError( |
| "{} is not a valid wheel file name: the wheel doesn't follow ".format( |
| pp.name |
| ) |
| + "https://www.python.org/dev/peps/pep-0427/#file-name-convention" |
| ) |
| return pp.name[: pp.name.find("-")] |
| |
| |
| # is_metadata checks if the path is in a metadata directory. |
| # Ref: https://www.python.org/dev/peps/pep-0427/#file-contents. |
| def is_metadata(path): |
| top_level = path.split("/")[0].lower() |
| return top_level.endswith(".dist-info") or top_level.endswith(".data") |
| |
| |
| # The .data is allowed to contain a full purelib or platlib directory |
| # These get unpacked into site-packages, so require indexing too. |
| # This is the same if "Root-Is-Purelib: true" is set and the files are at the root. |
| # Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib |
| def data_has_purelib_or_platlib(path): |
| maybe_lib = path.split("/")[1].lower() |
| return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib") |
| |
| |
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser( |
| prog="generator", |
| description="Generates the modules mapping used by the Gazelle manifest.", |
| ) |
| parser.add_argument("--output_file", type=str) |
| parser.add_argument("--exclude_patterns", nargs="+", default=[]) |
| parser.add_argument("--wheels", nargs="+", default=[]) |
| args = parser.parse_args() |
| generator = Generator(sys.stderr, args.output_file, args.exclude_patterns) |
| exit(generator.run(args.wheels)) |