blob: 519631f272b458b3e982c78d7cca7b633a9ca432 [file] [log] [blame]
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Regenerate a whl file after patching and cleanup the patched contents.
This script will take contents of the current directory and create a new wheel
out of it and will remove all files that were written to the wheel.
"""
from __future__ import annotations
import argparse
import csv
import difflib
import logging
import pathlib
import sys
import tempfile
from tools.wheelmaker import _WhlFile
# NOTE: Implement the following matching of what goes into the RECORD
# https://peps.python.org/pep-0491/#the-dist-info-directory
_EXCLUDES = [
"RECORD",
"INSTALLER",
"RECORD.jws",
"RECORD.p7s",
"REQUESTED",
]
_DISTINFO = "dist-info"
def _unidiff_output(expected, actual, record):
"""
Helper function. Returns a string containing the unified diff of two
multiline strings.
"""
expected = expected.splitlines(1)
actual = actual.splitlines(1)
diff = difflib.unified_diff(
expected, actual, fromfile=f"a/{record}", tofile=f"b/{record}"
)
return "".join(diff)
def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]:
"""Check that the RECORD file entries are correct and print a unified diff on failure."""
# First get existing files by using the RECORD file
got_files = []
got_distinfos = []
for row in csv.reader(want_record.splitlines()):
rec = row[0]
path = dir / rec
if not path.exists():
# skip files that do not exist as they won't be present in the final
# RECORD file.
continue
if not path.parent.name.endswith(_DISTINFO):
got_files.append(path)
elif path.name not in _EXCLUDES:
got_distinfos.append(path)
# Then get extra files present in the directory but not in the RECORD file
extra_files = []
extra_distinfos = []
for path in dir.rglob("*"):
if path.is_dir():
continue
elif path.parent.name.endswith(_DISTINFO):
if path.name in _EXCLUDES:
# NOTE: we implement the following matching of what goes into the RECORD
# https://peps.python.org/pep-0491/#the-dist-info-directory
continue
elif path not in got_distinfos:
extra_distinfos.append(path)
elif path not in got_files:
extra_files.append(path)
# sort the extra files for reproducibility
extra_files.sort()
extra_distinfos.sort()
# This order ensures that the structure of the RECORD file is always the
# same and ensures smaller patchsets to the RECORD file in general
return got_files + extra_files + got_distinfos + extra_distinfos
def main(sys_argv):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"whl_path",
type=pathlib.Path,
help="The original wheel file that we have patched.",
)
parser.add_argument(
"--record-patch",
type=pathlib.Path,
help="The output path that we are going to write the RECORD file patch to.",
)
parser.add_argument(
"output",
type=pathlib.Path,
help="The output path that we are going to write a new file to.",
)
args = parser.parse_args(sys_argv)
cwd = pathlib.Path.cwd()
logging.debug("=" * 80)
logging.debug("Repackaging the wheel")
logging.debug("=" * 80)
with tempfile.TemporaryDirectory(dir=cwd) as tmpdir:
patched_wheel_dir = cwd / tmpdir
logging.debug(f"Created a tmpdir: {patched_wheel_dir}")
excludes = [args.whl_path, patched_wheel_dir]
logging.debug("Moving whl contents to the newly created tmpdir")
for p in cwd.glob("*"):
if p in excludes:
logging.debug(f"Ignoring: {p}")
continue
rel_path = p.relative_to(cwd)
dst = p.rename(patched_wheel_dir / rel_path)
logging.debug(f"mv {p} -> {dst}")
distinfo_dir = next(iter(patched_wheel_dir.glob("*dist-info")))
logging.debug(f"Found dist-info dir: {distinfo_dir}")
record_path = distinfo_dir / "RECORD"
record_contents = record_path.read_text() if record_path.exists() else ""
distribution_prefix = distinfo_dir.with_suffix("").name
with _WhlFile(
args.output, mode="w", distribution_prefix=distribution_prefix
) as out:
for p in _files_to_pack(patched_wheel_dir, record_contents):
rel_path = p.relative_to(patched_wheel_dir)
out.add_file(str(rel_path), p)
logging.debug(f"Writing RECORD file")
got_record = out.add_recordfile().decode("utf-8", "surrogateescape")
if got_record == record_contents:
logging.info(f"Created a whl file: {args.output}")
return
record_diff = _unidiff_output(
record_contents,
got_record,
out.distinfo_path("RECORD"),
)
args.record_patch.write_text(record_diff)
logging.warning(
f"Please apply patch to the RECORD file ({args.record_patch}):\n{record_diff}"
)
if __name__ == "__main__":
logging.basicConfig(
format="%(module)s: %(levelname)s: %(message)s", level=logging.DEBUG
)
sys.exit(main(sys.argv[1:]))