feat(whlmaker): introduce an internal _WhlFile class and stop sorting RECORD (#1488)
This class is for being able to more easily recreate a wheel file after
extracting it. This is not intended for usage outside the rules_python
project. Also stop sorting the entries when writing a RECORD file making
the order of the RECORD file to be the same as the order the files to
the zip file are added.
Towards #1076
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ddfed3f..0e01615 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,8 +31,12 @@
* Skip aliases for unloaded toolchains. Some Python versions that don't have full
platform support, and referencing their undefined repositories can break operations
like `bazel query rdeps(...)`.
+
* Python code generated from `proto_library` with `strip_import_prefix` can be imported now.
+* (py_wheel) Produce deterministic wheel files and make `RECORD` file entries
+ follow the order of files written to the `.whl` archive.
+
## [0.26.0] - 2023-10-06
### Changed
@@ -106,8 +110,6 @@
* (gazelle) Improve runfiles lookup hermeticity.
-* (py_wheel) Produce deterministic wheel files
-
## [0.25.0] - 2023-08-22
### Changed
diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
index 23b1c8a..ab7b59d 100644
--- a/examples/wheel/wheel_test.py
+++ b/examples/wheel/wheel_test.py
@@ -44,7 +44,7 @@
else:
return path
- def assertFileSha256Equal(self, filename, sha):
+ def assertFileSha256Equal(self, filename, want):
hash = hashlib.sha256()
with open(filename, "rb") as f:
while True:
@@ -52,7 +52,7 @@
if not buf:
break
hash.update(buf)
- self.assertEqual(hash.hexdigest(), sha)
+ self.assertEqual(want, hash.hexdigest())
def assertAllEntriesHasReproducibleMetadata(self, zf):
for zinfo in zf.infolist():
@@ -78,7 +78,7 @@
],
)
self.assertFileSha256Equal(
- filename, "6da8e06a3fdd9ae5ee9fa8f796610723c05a4b0d7fde0ec5179401e956204139"
+ filename, "2818e70fdebd148934f41820f8c54d5d7676d783c0d66c7c8af2ee9141e7ddc7"
)
def test_py_package_wheel(self):
@@ -100,7 +100,7 @@
],
)
self.assertFileSha256Equal(
- filename, "2948b0b5e0aa421e0b40f78b74018bbc2f218165f211da0a4609e431e8e52bee"
+ filename, "273e27adf9bf90287a42ac911dcece8aa95f2905c37d786725477b26de23627c"
)
def test_customized_wheel(self):
@@ -135,16 +135,16 @@
record_contents,
# The entries are guaranteed to be sorted.
b"""\
-example_customized-0.0.1.dist-info/METADATA,sha256=QYQcDJFQSIqan8eiXqL67bqsUfgEAwf2hoK_Lgi1S-0,559
-example_customized-0.0.1.dist-info/NOTICE,sha256=Xpdw-FXET1IRgZ_wTkx1YQfo1-alET0FVf6V1LXO4js,76
-example_customized-0.0.1.dist-info/README,sha256=WmOFwZ3Jga1bHG3JiGRsUheb4UbLffUxyTdHczS27-o,40
-example_customized-0.0.1.dist-info/RECORD,,
-example_customized-0.0.1.dist-info/WHEEL,sha256=sobxWSyDDkdg_rinUth-jxhXHqoNqlmNMJY3aTZn2Us,91
-example_customized-0.0.1.dist-info/entry_points.txt,sha256=pqzpbQ8MMorrJ3Jp0ntmpZcuvfByyqzMXXi2UujuXD0,137
examples/wheel/lib/data.txt,sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
examples/wheel/lib/module_with_data.py,sha256=8s0Khhcqz3yVsBKv2IB5u4l4TMKh7-c_V6p65WVHPms,637
examples/wheel/lib/simple_module.py,sha256=z2hwciab_XPNIBNH8B1Q5fYgnJvQTeYf0ZQJpY8yLLY,637
examples/wheel/main.py,sha256=sgg5iWN_9inYBjm6_Zw27hYdmo-l24fA-2rfphT-IlY,909
+example_customized-0.0.1.dist-info/WHEEL,sha256=sobxWSyDDkdg_rinUth-jxhXHqoNqlmNMJY3aTZn2Us,91
+example_customized-0.0.1.dist-info/METADATA,sha256=QYQcDJFQSIqan8eiXqL67bqsUfgEAwf2hoK_Lgi1S-0,559
+example_customized-0.0.1.dist-info/entry_points.txt,sha256=pqzpbQ8MMorrJ3Jp0ntmpZcuvfByyqzMXXi2UujuXD0,137
+example_customized-0.0.1.dist-info/NOTICE,sha256=Xpdw-FXET1IRgZ_wTkx1YQfo1-alET0FVf6V1LXO4js,76
+example_customized-0.0.1.dist-info/README,sha256=WmOFwZ3Jga1bHG3JiGRsUheb4UbLffUxyTdHczS27-o,40
+example_customized-0.0.1.dist-info/RECORD,,
""",
)
self.assertEqual(
@@ -189,7 +189,7 @@
second = second.main:s""",
)
self.assertFileSha256Equal(
- filename, "66f0c1bfe2cedb2f4cf08d4fe955096860186c0a2f3524e0cb02387a55ac3e63"
+ filename, "48eed93258bba0bb366c879b77917d947267d89e7e60005d1766d844fb909118"
)
def test_legacy_filename_escaping(self):
@@ -227,7 +227,7 @@
""",
)
self.assertFileSha256Equal(
- filename, "593c6ab58627f2446d0f1ef2956fd6d42104eedce4493c72d462f7ebf8cb74fa"
+ filename, "ace5fab6458f8c3b4b50801b8e8214288bba786472e81547fced743a67531312"
)
def test_filename_escaping(self):
@@ -293,7 +293,7 @@
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
- filename, "1b1fa3a4e840211084ef80049d07947b845c99bedb2778496d30e0c1524686ac"
+ filename, "16e0345c102c6866fed34999d8de5aed7f351adbf372b27adef3bc15161db65e"
)
def test_custom_package_root_multi_prefix_wheel(self):
@@ -324,7 +324,7 @@
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
- filename, "f0422d7a338de3c76bf2525927fd93c0f47f2e9c60ecc0944e3e32b642c28137"
+ filename, "d2031eb21c69e290db5eac76b0dc026858e9dbdb3da2dc0314e4e9f69eab2e1a"
)
def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
@@ -355,7 +355,7 @@
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
- filename, "4f9e8c917b4050f121ac81e9a2bb65723ef09a1b90b35d93792ac3a62a60efa3"
+ filename, "a37b90685600ccfa56cc5405d1e9a3729ed21dfb31c76fd356e491e2af989566"
)
def test_python_requires_wheel(self):
@@ -380,7 +380,7 @@
""",
)
self.assertFileSha256Equal(
- filename, "9bfe8197d379f88715458a75e45c1f521a8b9d3cc43fe19b407c4ab207228b7c"
+ filename, "529afa454113572e6cd91f069cc9cfe5c28369f29cd495fff19d0ecce389d8e4"
)
def test_python_abi3_binary_wheel(self):
@@ -445,7 +445,7 @@
],
)
self.assertFileSha256Equal(
- filename, "8ad5f639cc41ac6ac67eb70f6553a7fdecabaf3a1b952c3134eaea59610c2a64"
+ filename, "cc9484d527075f07651ca0e7dff4a185c1314020726bcad55fe28d1bba0fec2e"
)
def test_rule_expands_workspace_status_keys_in_wheel_metadata(self):
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index f2ecbaf..b051564 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -84,118 +84,38 @@
except packaging.version.InvalidVersion:
pass
- sanitized = re.sub(r'[^a-z0-9]+', '.', version.lower()).strip('.')
- substituted = re.sub(r'\{\w+\}', '0', version)
- delimiter = '.' if '+' in substituted else '+'
+ sanitized = re.sub(r"[^a-z0-9]+", ".", version.lower()).strip(".")
+ substituted = re.sub(r"\{\w+\}", "0", version)
+ delimiter = "." if "+" in substituted else "+"
try:
- return str(
- packaging.version.Version(f'{substituted}{delimiter}{sanitized}')
- )
+ return str(packaging.version.Version(f"{substituted}{delimiter}{sanitized}"))
except packaging.version.InvalidVersion:
- return str(packaging.version.Version(f'0+{sanitized}'))
+ return str(packaging.version.Version(f"0+{sanitized}"))
-class WheelMaker(object):
+class _WhlFile(zipfile.ZipFile):
def __init__(
self,
- name,
- version,
- build_tag,
- python_tag,
- abi,
- platform,
- outfile=None,
+ filename,
+ *,
+ mode,
+ distinfo_dir,
strip_path_prefixes=None,
- incompatible_normalize_name=False,
- incompatible_normalize_version=False,
+ compression=zipfile.ZIP_DEFLATED,
+ **kwargs,
):
- self._name = name
- self._version = version
- self._build_tag = build_tag
- self._python_tag = python_tag
- self._abi = abi
- self._platform = platform
- self._outfile = outfile
- self._strip_path_prefixes = (
- strip_path_prefixes if strip_path_prefixes is not None else []
- )
-
- if incompatible_normalize_version:
- self._version = normalize_pep440(self._version)
- self._escaped_version = self._version
- else:
- self._escaped_version = escape_filename_segment(self._version)
-
- if incompatible_normalize_name:
- escaped_name = escape_filename_distribution_name(self._name)
- self._distinfo_dir = (
- escaped_name + "-" + self._escaped_version + ".dist-info/"
- )
- self._wheelname_fragment_distribution_name = escaped_name
- else:
- # The legacy behavior escapes the distinfo dir but not the
- # wheel name. Enable incompatible_normalize_name to fix it.
- # https://github.com/bazelbuild/rules_python/issues/1132
- self._distinfo_dir = (
- escape_filename_segment(self._name)
- + "-"
- + self._escaped_version
- + ".dist-info/"
- )
- self._wheelname_fragment_distribution_name = self._name
-
- self._zipfile = None
+ self._distinfo_dir = distinfo_dir
+ if not self._distinfo_dir.endswith("/"):
+ self._distinfo_dir += "/"
+ self._strip_path_prefixes = strip_path_prefixes or []
# Entries for the RECORD file as (filename, hash, size) tuples.
self._record = []
- def __enter__(self):
- self._zipfile = zipfile.ZipFile(
- self.filename(), mode="w", compression=zipfile.ZIP_DEFLATED
- )
- return self
-
- def __exit__(self, type, value, traceback):
- self._zipfile.close()
- self._zipfile = None
-
- def wheelname(self) -> str:
- components = [
- self._wheelname_fragment_distribution_name,
- self._version,
- ]
- if self._build_tag:
- components.append(self._build_tag)
- components += [self._python_tag, self._abi, self._platform]
- return "-".join(components) + ".whl"
-
- def filename(self) -> str:
- if self._outfile:
- return self._outfile
- return self.wheelname()
-
- def disttags(self):
- return ["-".join([self._python_tag, self._abi, self._platform])]
+ super().__init__(filename, mode=mode, compression=compression, **kwargs)
def distinfo_path(self, basename):
return self._distinfo_dir + basename
- def _serialize_digest(self, hash):
- # https://www.python.org/dev/peps/pep-0376/#record
- # "base64.urlsafe_b64encode(digest) with trailing = removed"
- digest = base64.urlsafe_b64encode(hash.digest())
- digest = b"sha256=" + digest.rstrip(b"=")
- return digest
-
- def add_string(self, filename, contents):
- """Add given 'contents' as filename to the distribution."""
- if sys.version_info[0] > 2 and isinstance(contents, str):
- contents = contents.encode("utf-8", "surrogateescape")
- zinfo = self._zipinfo(filename)
- self._zipfile.writestr(zinfo, contents)
- hash = hashlib.sha256()
- hash.update(contents)
- self._add_to_record(filename, self._serialize_digest(hash), len(contents))
-
def add_file(self, package_filename, real_filename):
"""Add given file to the distribution."""
@@ -227,7 +147,7 @@
hash = hashlib.sha256()
size = 0
with open(real_filename, "rb") as fsrc:
- with self._zipfile.open(zinfo, "w") as fdst:
+ with self.open(zinfo, "w") as fdst:
while True:
block = fsrc.read(2**20)
if not block:
@@ -237,6 +157,27 @@
size += len(block)
self._add_to_record(arcname, self._serialize_digest(hash), size)
+ def add_string(self, filename, contents):
+ """Add given 'contents' as filename to the distribution."""
+ if sys.version_info[0] > 2 and isinstance(contents, str):
+ contents = contents.encode("utf-8", "surrogateescape")
+ zinfo = self._zipinfo(filename)
+ self.writestr(zinfo, contents)
+ hash = hashlib.sha256()
+ hash.update(contents)
+ self._add_to_record(filename, self._serialize_digest(hash), len(contents))
+
+ def _serialize_digest(self, hash):
+ # https://www.python.org/dev/peps/pep-0376/#record
+ # "base64.urlsafe_b64encode(digest) with trailing = removed"
+ digest = base64.urlsafe_b64encode(hash.digest())
+ digest = b"sha256=" + digest.rstrip(b"=")
+ return digest
+
+ def _add_to_record(self, filename, hash, size):
+ size = str(size).encode("ascii")
+ self._record.append((filename, hash, size))
+
def _zipinfo(self, filename):
"""Construct deterministic ZipInfo entry for a file named filename"""
# Strip leading path separators to mirror ZipInfo.from_file behavior
@@ -248,9 +189,110 @@
zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
zinfo.create_system = 3 # ZipInfo entry created on a unix-y system
zinfo.external_attr = 0o777 << 16 # permissions: rwxrwxrwx
- zinfo.compress_type = self._zipfile.compression
+ zinfo.compress_type = self.compression
return zinfo
+ def add_recordfile(self):
+ """Write RECORD file to the distribution."""
+ record_path = self.distinfo_path("RECORD")
+ entries = self._record + [(record_path, b"", b"")]
+ contents = b""
+ for filename, digest, size in entries:
+ if sys.version_info[0] > 2 and isinstance(filename, str):
+ filename = filename.lstrip("/").encode("utf-8", "surrogateescape")
+ contents += b"%s,%s,%s\n" % (filename, digest, size)
+
+ self.add_string(record_path, contents)
+ return contents
+
+
+class WheelMaker(object):
+ def __init__(
+ self,
+ name,
+ version,
+ build_tag,
+ python_tag,
+ abi,
+ platform,
+ outfile=None,
+ strip_path_prefixes=None,
+ incompatible_normalize_name=False,
+ incompatible_normalize_version=False,
+ ):
+ self._name = name
+ self._version = version
+ self._build_tag = build_tag
+ self._python_tag = python_tag
+ self._abi = abi
+ self._platform = platform
+ self._outfile = outfile
+ self._strip_path_prefixes = strip_path_prefixes
+
+ if incompatible_normalize_version:
+ self._version = normalize_pep440(self._version)
+ self._escaped_version = self._version
+ else:
+ self._escaped_version = escape_filename_segment(self._version)
+
+ if incompatible_normalize_name:
+ escaped_name = escape_filename_distribution_name(self._name)
+ self._distinfo_dir = (
+ escaped_name + "-" + self._escaped_version + ".dist-info/"
+ )
+ self._wheelname_fragment_distribution_name = escaped_name
+ else:
+ # The legacy behavior escapes the distinfo dir but not the
+ # wheel name. Enable incompatible_normalize_name to fix it.
+ # https://github.com/bazelbuild/rules_python/issues/1132
+ self._distinfo_dir = (
+ escape_filename_segment(self._name)
+ + "-"
+ + self._escaped_version
+ + ".dist-info/"
+ )
+ self._wheelname_fragment_distribution_name = self._name
+
+ self._whlfile = None
+
+ def __enter__(self):
+ self._whlfile = _WhlFile(
+ self.filename(),
+ mode="w",
+ distinfo_dir=self._distinfo_dir,
+ strip_path_prefixes=self._strip_path_prefixes,
+ )
+ return self
+
+ def __exit__(self, type, value, traceback):
+ self._whlfile.close()
+ self._whlfile = None
+
+ def wheelname(self) -> str:
+ components = [
+ self._wheelname_fragment_distribution_name,
+ self._version,
+ ]
+ if self._build_tag:
+ components.append(self._build_tag)
+ components += [self._python_tag, self._abi, self._platform]
+ return "-".join(components) + ".whl"
+
+ def filename(self) -> str:
+ if self._outfile:
+ return self._outfile
+ return self.wheelname()
+
+ def disttags(self):
+ return ["-".join([self._python_tag, self._abi, self._platform])]
+
+ def distinfo_path(self, basename):
+ return self._whlfile.distinfo_path(basename)
+
+ def add_file(self, package_filename, real_filename):
+ """Add given file to the distribution."""
+ self._whlfile.add_file(package_filename, real_filename)
+
def add_wheelfile(self):
"""Write WHEEL file to the distribution"""
# TODO(pstradomski): Support non-purelib wheels.
@@ -263,7 +305,7 @@
)
for tag in self.disttags():
wheel_contents += "Tag: %s\n" % tag
- self.add_string(self.distinfo_path("WHEEL"), wheel_contents)
+ self._whlfile.add_string(self.distinfo_path("WHEEL"), wheel_contents)
def add_metadata(self, metadata, name, description, version):
"""Write METADATA file to the distribution."""
@@ -275,23 +317,11 @@
# provided.
metadata += description if description else "UNKNOWN"
metadata += "\n"
- self.add_string(self.distinfo_path("METADATA"), metadata)
+ self._whlfile.add_string(self.distinfo_path("METADATA"), metadata)
def add_recordfile(self):
"""Write RECORD file to the distribution."""
- record_path = self.distinfo_path("RECORD")
- entries = self._record + [(record_path, b"", b"")]
- entries.sort()
- contents = b""
- for filename, digest, size in entries:
- if sys.version_info[0] > 2 and isinstance(filename, str):
- filename = filename.lstrip("/").encode("utf-8", "surrogateescape")
- contents += b"%s,%s,%s\n" % (filename, digest, size)
- self.add_string(record_path, contents)
-
- def _add_to_record(self, filename, hash, size):
- size = str(size).encode("ascii")
- self._record.append((filename, hash, size))
+ self._whlfile.add_recordfile()
def get_files_to_package(input_files):