blob: 9052ac39c611330803acfe53aa832a38501ec6d2 [file] [log] [blame]
Ignas Anikeviciusc0e18ed2023-10-20 09:35:37 +09001# Copyright 2023 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""
16Regenerate a whl file after patching and cleanup the patched contents.
17
18This script will take contents of the current directory and create a new wheel
19out of it and will remove all files that were written to the wheel.
20"""
21
22from __future__ import annotations
23
24import argparse
25import difflib
26import logging
27import pathlib
28import sys
29import tempfile
30
31from tools.wheelmaker import _WhlFile
32
33# NOTE: Implement the following matching of what goes into the RECORD
34# https://peps.python.org/pep-0491/#the-dist-info-directory
35_EXCLUDES = [
36 "RECORD",
37 "INSTALLER",
38 "RECORD.jws",
39 "RECORD.p7s",
40 "REQUESTED",
41]
42
43_DISTINFO = "dist-info"
44
45
46def _unidiff_output(expected, actual, record):
47 """
48 Helper function. Returns a string containing the unified diff of two
49 multiline strings.
50 """
51
52 expected = expected.splitlines(1)
53 actual = actual.splitlines(1)
54
55 diff = difflib.unified_diff(
56 expected, actual, fromfile=f"a/{record}", tofile=f"b/{record}"
57 )
58
59 return "".join(diff)
60
61
62def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]:
63 """Check that the RECORD file entries are correct and print a unified diff on failure."""
64
65 # First get existing files by using the RECORD file
66 got_files = []
67 got_distinfos = []
68 for line in want_record.splitlines():
69 rec, _, _ = line.partition(",")
70 path = dir / rec
71
72 if not path.exists():
73 # skip files that do not exist as they won't be present in the final
74 # RECORD file.
75 continue
76
77 if not path.parent.name.endswith(_DISTINFO):
78 got_files.append(path)
79 elif path.name not in _EXCLUDES:
80 got_distinfos.append(path)
81
82 # Then get extra files present in the directory but not in the RECORD file
83 extra_files = []
84 extra_distinfos = []
85 for path in dir.rglob("*"):
86 if path.is_dir():
87 continue
88
89 elif path.parent.name.endswith(_DISTINFO):
90 if path.name in _EXCLUDES:
91 # NOTE: we implement the following matching of what goes into the RECORD
92 # https://peps.python.org/pep-0491/#the-dist-info-directory
93 continue
94 elif path not in got_distinfos:
95 extra_distinfos.append(path)
96
97 elif path not in got_files:
98 extra_files.append(path)
99
100 # sort the extra files for reproducibility
101 extra_files.sort()
102 extra_distinfos.sort()
103
104 # This order ensures that the structure of the RECORD file is always the
105 # same and ensures smaller patchsets to the RECORD file in general
106 return got_files + extra_files + got_distinfos + extra_distinfos
107
108
109def main(sys_argv):
110 parser = argparse.ArgumentParser(description=__doc__)
111 parser.add_argument(
112 "whl_path",
113 type=pathlib.Path,
114 help="The original wheel file that we have patched.",
115 )
116 parser.add_argument(
Ignas Anikevicius10150e52023-12-21 16:04:29 +0900117 "--record-patch",
118 type=pathlib.Path,
119 help="The output path that we are going to write the RECORD file patch to.",
120 )
121 parser.add_argument(
Ignas Anikeviciusc0e18ed2023-10-20 09:35:37 +0900122 "output",
123 type=pathlib.Path,
124 help="The output path that we are going to write a new file to.",
125 )
126 args = parser.parse_args(sys_argv)
127
128 cwd = pathlib.Path.cwd()
129 logging.debug("=" * 80)
130 logging.debug("Repackaging the wheel")
131 logging.debug("=" * 80)
132
133 with tempfile.TemporaryDirectory(dir=cwd) as tmpdir:
134 patched_wheel_dir = cwd / tmpdir
135 logging.debug(f"Created a tmpdir: {patched_wheel_dir}")
136
137 excludes = [args.whl_path, patched_wheel_dir]
138
139 logging.debug("Moving whl contents to the newly created tmpdir")
140 for p in cwd.glob("*"):
141 if p in excludes:
142 logging.debug(f"Ignoring: {p}")
143 continue
144
145 rel_path = p.relative_to(cwd)
146 dst = p.rename(patched_wheel_dir / rel_path)
147 logging.debug(f"mv {p} -> {dst}")
148
149 distinfo_dir = next(iter(patched_wheel_dir.glob("*dist-info")))
150 logging.debug(f"Found dist-info dir: {distinfo_dir}")
151 record_path = distinfo_dir / "RECORD"
152 record_contents = record_path.read_text() if record_path.exists() else ""
Brad Holmesc5c03b22024-03-21 08:59:12 -0400153 distribution_prefix = distinfo_dir.with_suffix("").name
Ignas Anikeviciusc0e18ed2023-10-20 09:35:37 +0900154
Douglas Thor55af3eb2024-04-17 13:16:33 -0700155 with _WhlFile(
156 args.output, mode="w", distribution_prefix=distribution_prefix
157 ) as out:
Ignas Anikeviciusc0e18ed2023-10-20 09:35:37 +0900158 for p in _files_to_pack(patched_wheel_dir, record_contents):
159 rel_path = p.relative_to(patched_wheel_dir)
160 out.add_file(str(rel_path), p)
161
162 logging.debug(f"Writing RECORD file")
163 got_record = out.add_recordfile().decode("utf-8", "surrogateescape")
164
165 if got_record == record_contents:
166 logging.info(f"Created a whl file: {args.output}")
167 return
168
169 record_diff = _unidiff_output(
170 record_contents,
171 got_record,
172 out.distinfo_path("RECORD"),
173 )
Ignas Anikevicius10150e52023-12-21 16:04:29 +0900174 args.record_patch.write_text(record_diff)
175 logging.warning(
176 f"Please apply patch to the RECORD file ({args.record_patch}):\n{record_diff}"
177 )
Ignas Anikeviciusc0e18ed2023-10-20 09:35:37 +0900178
179
180if __name__ == "__main__":
181 logging.basicConfig(
182 format="%(module)s: %(levelname)s: %(message)s", level=logging.DEBUG
183 )
184
185 sys.exit(main(sys.argv[1:]))