| # Copyright 2023 The Bazel Authors. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| Regenerate a whl file after patching and cleanup the patched contents. |
| |
| This script will take contents of the current directory and create a new wheel |
| out of it and will remove all files that were written to the wheel. |
| """ |
| |
| from __future__ import annotations |
| |
| import argparse |
| import csv |
| import difflib |
| import logging |
| import pathlib |
| import sys |
| import tempfile |
| |
| from tools.wheelmaker import _WhlFile |
| |
| # NOTE: Implement the following matching of what goes into the RECORD |
| # https://peps.python.org/pep-0491/#the-dist-info-directory |
| _EXCLUDES = [ |
| "RECORD", |
| "INSTALLER", |
| "RECORD.jws", |
| "RECORD.p7s", |
| "REQUESTED", |
| ] |
| |
| _DISTINFO = "dist-info" |
| |
| |
| def _unidiff_output(expected, actual, record): |
| """ |
| Helper function. Returns a string containing the unified diff of two |
| multiline strings. |
| """ |
| |
| expected = expected.splitlines(1) |
| actual = actual.splitlines(1) |
| |
| diff = difflib.unified_diff( |
| expected, actual, fromfile=f"a/{record}", tofile=f"b/{record}" |
| ) |
| |
| return "".join(diff) |
| |
| |
| def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]: |
| """Check that the RECORD file entries are correct and print a unified diff on failure.""" |
| |
| # First get existing files by using the RECORD file |
| got_files = [] |
| got_distinfos = [] |
| for row in csv.reader(want_record.splitlines()): |
| rec = row[0] |
| path = dir / rec |
| |
| if not path.exists(): |
| # skip files that do not exist as they won't be present in the final |
| # RECORD file. |
| continue |
| |
| if not path.parent.name.endswith(_DISTINFO): |
| got_files.append(path) |
| elif path.name not in _EXCLUDES: |
| got_distinfos.append(path) |
| |
| # Then get extra files present in the directory but not in the RECORD file |
| extra_files = [] |
| extra_distinfos = [] |
| for path in dir.rglob("*"): |
| if path.is_dir(): |
| continue |
| |
| elif path.parent.name.endswith(_DISTINFO): |
| if path.name in _EXCLUDES: |
| # NOTE: we implement the following matching of what goes into the RECORD |
| # https://peps.python.org/pep-0491/#the-dist-info-directory |
| continue |
| elif path not in got_distinfos: |
| extra_distinfos.append(path) |
| |
| elif path not in got_files: |
| extra_files.append(path) |
| |
| # sort the extra files for reproducibility |
| extra_files.sort() |
| extra_distinfos.sort() |
| |
| # This order ensures that the structure of the RECORD file is always the |
| # same and ensures smaller patchsets to the RECORD file in general |
| return got_files + extra_files + got_distinfos + extra_distinfos |
| |
| |
| def main(sys_argv): |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| "whl_path", |
| type=pathlib.Path, |
| help="The original wheel file that we have patched.", |
| ) |
| parser.add_argument( |
| "--record-patch", |
| type=pathlib.Path, |
| help="The output path that we are going to write the RECORD file patch to.", |
| ) |
| parser.add_argument( |
| "output", |
| type=pathlib.Path, |
| help="The output path that we are going to write a new file to.", |
| ) |
| args = parser.parse_args(sys_argv) |
| |
| cwd = pathlib.Path.cwd() |
| logging.debug("=" * 80) |
| logging.debug("Repackaging the wheel") |
| logging.debug("=" * 80) |
| |
| with tempfile.TemporaryDirectory(dir=cwd) as tmpdir: |
| patched_wheel_dir = cwd / tmpdir |
| logging.debug(f"Created a tmpdir: {patched_wheel_dir}") |
| |
| excludes = [args.whl_path, patched_wheel_dir] |
| |
| logging.debug("Moving whl contents to the newly created tmpdir") |
| for p in cwd.glob("*"): |
| if p in excludes: |
| logging.debug(f"Ignoring: {p}") |
| continue |
| |
| rel_path = p.relative_to(cwd) |
| dst = p.rename(patched_wheel_dir / rel_path) |
| logging.debug(f"mv {p} -> {dst}") |
| |
| distinfo_dir = next(iter(patched_wheel_dir.glob("*dist-info"))) |
| logging.debug(f"Found dist-info dir: {distinfo_dir}") |
| record_path = distinfo_dir / "RECORD" |
| record_contents = record_path.read_text() if record_path.exists() else "" |
| distribution_prefix = distinfo_dir.with_suffix("").name |
| |
| with _WhlFile( |
| args.output, mode="w", distribution_prefix=distribution_prefix |
| ) as out: |
| for p in _files_to_pack(patched_wheel_dir, record_contents): |
| rel_path = p.relative_to(patched_wheel_dir) |
| out.add_file(str(rel_path), p) |
| |
| logging.debug(f"Writing RECORD file") |
| got_record = out.add_recordfile().decode("utf-8", "surrogateescape") |
| |
| if got_record == record_contents: |
| logging.info(f"Created a whl file: {args.output}") |
| return |
| |
| record_diff = _unidiff_output( |
| record_contents, |
| got_record, |
| out.distinfo_path("RECORD"), |
| ) |
| args.record_patch.write_text(record_diff) |
| logging.warning( |
| f"Please apply patch to the RECORD file ({args.record_patch}):\n{record_diff}" |
| ) |
| |
| |
| if __name__ == "__main__": |
| logging.basicConfig( |
| format="%(module)s: %(levelname)s: %(message)s", level=logging.DEBUG |
| ) |
| |
| sys.exit(main(sys.argv[1:])) |