fix(py_wheel): produce deterministic wheel files (#1453)
Current implementation does not produce deterministic output because:
- `ZipFile.writestr()` leaks current date and time
- `ZipFile.write()` leaks the source file's mtime and mode bits
(permissions) into the resulting zip archive.
By manually creating our own `ZipInfo` objects we can explicitly set
date and time fields to `Jan 1, 1980, 00:00` (minimum value allowed by
the zip file standard), and ensure that other file attributes are
uniform across all entries in a zip file.
---------
Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be69f5e..e13868a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -105,6 +105,8 @@
* (gazelle) Improve runfiles lookup hermeticity.
+* (py_wheel) Produce deterministic wheel files
+
## [0.25.0] - 2023-08-22
### Changed
diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
index 8c0f53e..23b1c8a 100644
--- a/examples/wheel/wheel_test.py
+++ b/examples/wheel/wheel_test.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import hashlib
import os
import platform
import subprocess
@@ -43,9 +44,29 @@
else:
return path
+ def assertFileSha256Equal(self, filename, sha):
+ hash = hashlib.sha256()
+ with open(filename, "rb") as f:
+ while True:
+ buf = f.read(2**20)
+ if not buf:
+ break
+ hash.update(buf)
+ self.assertEqual(hash.hexdigest(), sha)
+
+ def assertAllEntriesHasReproducibleMetadata(self, zf):
+ for zinfo in zf.infolist():
+ self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0), msg=zinfo.filename)
+ self.assertEqual(zinfo.create_system, 3, msg=zinfo.filename)
+ self.assertEqual(zinfo.external_attr, 0o777 << 16, msg=zinfo.filename)
+ self.assertEqual(
+ zinfo.compress_type, zipfile.ZIP_DEFLATED, msg=zinfo.filename
+ )
+
def test_py_library_wheel(self):
filename = self._get_path("example_minimal_library-0.0.1-py3-none-any.whl")
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -56,12 +77,16 @@
"example_minimal_library-0.0.1.dist-info/RECORD",
],
)
+ self.assertFileSha256Equal(
+ filename, "6da8e06a3fdd9ae5ee9fa8f796610723c05a4b0d7fde0ec5179401e956204139"
+ )
def test_py_package_wheel(self):
filename = self._get_path(
"example_minimal_package-0.0.1-py3-none-any.whl",
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -74,12 +99,16 @@
"example_minimal_package-0.0.1.dist-info/RECORD",
],
)
+ self.assertFileSha256Equal(
+ filename, "2948b0b5e0aa421e0b40f78b74018bbc2f218165f211da0a4609e431e8e52bee"
+ )
def test_customized_wheel(self):
filename = self._get_path(
"example_customized-0.0.1-py3-none-any.whl",
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -159,12 +188,16 @@
first = first.main:f
second = second.main:s""",
)
+ self.assertFileSha256Equal(
+ filename, "66f0c1bfe2cedb2f4cf08d4fe955096860186c0a2f3524e0cb02387a55ac3e63"
+ )
def test_legacy_filename_escaping(self):
filename = self._get_path(
"file_name_escaping-0.0.1_r7-py3-none-any.whl",
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEquals(
zf.namelist(),
[
@@ -193,6 +226,9 @@
UNKNOWN
""",
)
+ self.assertFileSha256Equal(
+ filename, "593c6ab58627f2446d0f1ef2956fd6d42104eedce4493c72d462f7ebf8cb74fa"
+ )
def test_filename_escaping(self):
filename = self._get_path(
@@ -234,6 +270,7 @@
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -255,6 +292,9 @@
# Ensure RECORD files do not have leading forward slashes
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
+ self.assertFileSha256Equal(
+ filename, "1b1fa3a4e840211084ef80049d07947b845c99bedb2778496d30e0c1524686ac"
+ )
def test_custom_package_root_multi_prefix_wheel(self):
filename = self._get_path(
@@ -262,6 +302,7 @@
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -282,6 +323,9 @@
# Ensure RECORD files do not have leading forward slashes
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
+ self.assertFileSha256Equal(
+ filename, "f0422d7a338de3c76bf2525927fd93c0f47f2e9c60ecc0944e3e32b642c28137"
+ )
def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
filename = self._get_path(
@@ -289,6 +333,7 @@
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -309,12 +354,16 @@
# Ensure RECORD files do not have leading forward slashes
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
+ self.assertFileSha256Equal(
+ filename, "4f9e8c917b4050f121ac81e9a2bb65723ef09a1b90b35d93792ac3a62a60efa3"
+ )
def test_python_requires_wheel(self):
filename = self._get_path(
"example_python_requires_in_a_package-0.0.1-py3-none-any.whl",
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
metadata_contents = zf.read(
"example_python_requires_in_a_package-0.0.1.dist-info/METADATA"
)
@@ -330,6 +379,9 @@
UNKNOWN
""",
)
+ self.assertFileSha256Equal(
+ filename, "9bfe8197d379f88715458a75e45c1f521a8b9d3cc43fe19b407c4ab207228b7c"
+ )
def test_python_abi3_binary_wheel(self):
arch = "amd64"
@@ -346,6 +398,7 @@
f"example_python_abi3_binary_wheel-0.0.1-cp38-abi3-{os_string}_{arch}.whl",
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
metadata_contents = zf.read(
"example_python_abi3_binary_wheel-0.0.1.dist-info/METADATA"
)
@@ -380,6 +433,7 @@
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
self.assertEqual(
zf.namelist(),
[
@@ -390,6 +444,9 @@
"use_rule_with_dir_in_outs-0.0.1.dist-info/RECORD",
],
)
+ self.assertFileSha256Equal(
+ filename, "8ad5f639cc41ac6ac67eb70f6553a7fdecabaf3a1b952c3134eaea59610c2a64"
+ )
def test_rule_expands_workspace_status_keys_in_wheel_metadata(self):
filename = self._get_path(
@@ -397,6 +454,7 @@
)
with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
metadata_file = None
for f in zf.namelist():
self.assertNotIn("_BUILD_TIMESTAMP_", f)
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index dce5406..f2ecbaf 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -14,7 +14,6 @@
import argparse
import base64
-import collections
import hashlib
import os
import re
@@ -22,6 +21,8 @@
import zipfile
from pathlib import Path
+_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
+
def commonpath(path1, path2):
ret = []
@@ -189,7 +190,8 @@
"""Add given 'contents' as filename to the distribution."""
if sys.version_info[0] > 2 and isinstance(contents, str):
contents = contents.encode("utf-8", "surrogateescape")
- self._zipfile.writestr(filename, contents)
+ zinfo = self._zipinfo(filename)
+ self._zipfile.writestr(zinfo, contents)
hash = hashlib.sha256()
hash.update(contents)
self._add_to_record(filename, self._serialize_digest(hash), len(contents))
@@ -219,20 +221,36 @@
return
arcname = arcname_from(package_filename)
+ zinfo = self._zipinfo(arcname)
- self._zipfile.write(real_filename, arcname=arcname)
- # Find the hash and length
+ # Write file to the zip archive while computing the hash and length
hash = hashlib.sha256()
size = 0
- with open(real_filename, "rb") as f:
- while True:
- block = f.read(2**20)
- if not block:
- break
- hash.update(block)
- size += len(block)
+ with open(real_filename, "rb") as fsrc:
+ with self._zipfile.open(zinfo, "w") as fdst:
+ while True:
+ block = fsrc.read(2**20)
+ if not block:
+ break
+ fdst.write(block)
+ hash.update(block)
+ size += len(block)
self._add_to_record(arcname, self._serialize_digest(hash), size)
+ def _zipinfo(self, filename):
+ """Construct deterministic ZipInfo entry for a file named filename"""
+ # Strip leading path separators to mirror ZipInfo.from_file behavior
+ separators = os.path.sep
+ if os.path.altsep is not None:
+ separators += os.path.altsep
+ arcname = filename.lstrip(separators)
+
+ zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
+ zinfo.create_system = 3 # ZipInfo entry created on a unix-y system
+ zinfo.external_attr = 0o777 << 16 # permissions: rwxrwxrwx
+ zinfo.compress_type = self._zipfile.compression
+ return zinfo
+
def add_wheelfile(self):
"""Write WHEEL file to the distribution"""
# TODO(pstradomski): Support non-purelib wheels.