fix(py_wheel): produce deterministic wheel files (#1453)

Current implementation does not produce deterministic output because:
- `ZipFile.writestr()` leaks current date and time
- `ZipFile.write()` leaks the source file's mtime and mode bits
(permissions) into the resulting zip archive.

By manually creating our own `ZipInfo` objects we can explicitly set
date and time fields to `Jan 1, 1980, 00:00` (minimum value allowed by
the zip file standard), and ensure that other file attributes are
uniform across all entries in a zip file.

---------

Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be69f5e..e13868a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -105,6 +105,8 @@
 
 * (gazelle) Improve runfiles lookup hermeticity.
 
+* (py_wheel) Produce deterministic wheel files
+
 ## [0.25.0] - 2023-08-22
 
 ### Changed
diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
index 8c0f53e..23b1c8a 100644
--- a/examples/wheel/wheel_test.py
+++ b/examples/wheel/wheel_test.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import hashlib
 import os
 import platform
 import subprocess
@@ -43,9 +44,29 @@
         else:
             return path
 
+    def assertFileSha256Equal(self, filename, sha):
+        hash = hashlib.sha256()
+        with open(filename, "rb") as f:
+            while True:
+                buf = f.read(2**20)
+                if not buf:
+                    break
+                hash.update(buf)
+        self.assertEqual(hash.hexdigest(), sha)
+
+    def assertAllEntriesHasReproducibleMetadata(self, zf):
+        for zinfo in zf.infolist():
+            self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0), msg=zinfo.filename)
+            self.assertEqual(zinfo.create_system, 3, msg=zinfo.filename)
+            self.assertEqual(zinfo.external_attr, 0o777 << 16, msg=zinfo.filename)
+            self.assertEqual(
+                zinfo.compress_type, zipfile.ZIP_DEFLATED, msg=zinfo.filename
+            )
+
     def test_py_library_wheel(self):
         filename = self._get_path("example_minimal_library-0.0.1-py3-none-any.whl")
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -56,12 +77,16 @@
                     "example_minimal_library-0.0.1.dist-info/RECORD",
                 ],
             )
+        self.assertFileSha256Equal(
+            filename, "6da8e06a3fdd9ae5ee9fa8f796610723c05a4b0d7fde0ec5179401e956204139"
+        )
 
     def test_py_package_wheel(self):
         filename = self._get_path(
             "example_minimal_package-0.0.1-py3-none-any.whl",
         )
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -74,12 +99,16 @@
                     "example_minimal_package-0.0.1.dist-info/RECORD",
                 ],
             )
+        self.assertFileSha256Equal(
+            filename, "2948b0b5e0aa421e0b40f78b74018bbc2f218165f211da0a4609e431e8e52bee"
+        )
 
     def test_customized_wheel(self):
         filename = self._get_path(
             "example_customized-0.0.1-py3-none-any.whl",
         )
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -159,12 +188,16 @@
 first = first.main:f
 second = second.main:s""",
             )
+        self.assertFileSha256Equal(
+            filename, "66f0c1bfe2cedb2f4cf08d4fe955096860186c0a2f3524e0cb02387a55ac3e63"
+        )
 
     def test_legacy_filename_escaping(self):
         filename = self._get_path(
             "file_name_escaping-0.0.1_r7-py3-none-any.whl",
         )
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEquals(
                 zf.namelist(),
                 [
@@ -193,6 +226,9 @@
 UNKNOWN
 """,
             )
+        self.assertFileSha256Equal(
+            filename, "593c6ab58627f2446d0f1ef2956fd6d42104eedce4493c72d462f7ebf8cb74fa"
+        )
 
     def test_filename_escaping(self):
         filename = self._get_path(
@@ -234,6 +270,7 @@
         )
 
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -255,6 +292,9 @@
             # Ensure RECORD files do not have leading forward slashes
             for line in record_contents.splitlines():
                 self.assertFalse(line.startswith("/"))
+        self.assertFileSha256Equal(
+            filename, "1b1fa3a4e840211084ef80049d07947b845c99bedb2778496d30e0c1524686ac"
+        )
 
     def test_custom_package_root_multi_prefix_wheel(self):
         filename = self._get_path(
@@ -262,6 +302,7 @@
         )
 
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -282,6 +323,9 @@
             # Ensure RECORD files do not have leading forward slashes
             for line in record_contents.splitlines():
                 self.assertFalse(line.startswith("/"))
+        self.assertFileSha256Equal(
+            filename, "f0422d7a338de3c76bf2525927fd93c0f47f2e9c60ecc0944e3e32b642c28137"
+        )
 
     def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
         filename = self._get_path(
@@ -289,6 +333,7 @@
         )
 
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -309,12 +354,16 @@
             # Ensure RECORD files do not have leading forward slashes
             for line in record_contents.splitlines():
                 self.assertFalse(line.startswith("/"))
+        self.assertFileSha256Equal(
+            filename, "4f9e8c917b4050f121ac81e9a2bb65723ef09a1b90b35d93792ac3a62a60efa3"
+        )
 
     def test_python_requires_wheel(self):
         filename = self._get_path(
             "example_python_requires_in_a_package-0.0.1-py3-none-any.whl",
         )
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             metadata_contents = zf.read(
                 "example_python_requires_in_a_package-0.0.1.dist-info/METADATA"
             )
@@ -330,6 +379,9 @@
 UNKNOWN
 """,
             )
+        self.assertFileSha256Equal(
+            filename, "9bfe8197d379f88715458a75e45c1f521a8b9d3cc43fe19b407c4ab207228b7c"
+        )
 
     def test_python_abi3_binary_wheel(self):
         arch = "amd64"
@@ -346,6 +398,7 @@
             f"example_python_abi3_binary_wheel-0.0.1-cp38-abi3-{os_string}_{arch}.whl",
         )
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             metadata_contents = zf.read(
                 "example_python_abi3_binary_wheel-0.0.1.dist-info/METADATA"
             )
@@ -380,6 +433,7 @@
         )
 
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             self.assertEqual(
                 zf.namelist(),
                 [
@@ -390,6 +444,9 @@
                     "use_rule_with_dir_in_outs-0.0.1.dist-info/RECORD",
                 ],
             )
+        self.assertFileSha256Equal(
+            filename, "8ad5f639cc41ac6ac67eb70f6553a7fdecabaf3a1b952c3134eaea59610c2a64"
+        )
 
     def test_rule_expands_workspace_status_keys_in_wheel_metadata(self):
         filename = self._get_path(
@@ -397,6 +454,7 @@
         )
 
         with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
             metadata_file = None
             for f in zf.namelist():
                 self.assertNotIn("_BUILD_TIMESTAMP_", f)
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index dce5406..f2ecbaf 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -14,7 +14,6 @@
 
 import argparse
 import base64
-import collections
 import hashlib
 import os
 import re
@@ -22,6 +21,8 @@
 import zipfile
 from pathlib import Path
 
+_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
+
 
 def commonpath(path1, path2):
     ret = []
@@ -189,7 +190,8 @@
         """Add given 'contents' as filename to the distribution."""
         if sys.version_info[0] > 2 and isinstance(contents, str):
             contents = contents.encode("utf-8", "surrogateescape")
-        self._zipfile.writestr(filename, contents)
+        zinfo = self._zipinfo(filename)
+        self._zipfile.writestr(zinfo, contents)
         hash = hashlib.sha256()
         hash.update(contents)
         self._add_to_record(filename, self._serialize_digest(hash), len(contents))
@@ -219,20 +221,36 @@
             return
 
         arcname = arcname_from(package_filename)
+        zinfo = self._zipinfo(arcname)
 
-        self._zipfile.write(real_filename, arcname=arcname)
-        # Find the hash and length
+        # Write file to the zip archive while computing the hash and length
         hash = hashlib.sha256()
         size = 0
-        with open(real_filename, "rb") as f:
-            while True:
-                block = f.read(2**20)
-                if not block:
-                    break
-                hash.update(block)
-                size += len(block)
+        with open(real_filename, "rb") as fsrc:
+            with self._zipfile.open(zinfo, "w") as fdst:
+                while True:
+                    block = fsrc.read(2**20)
+                    if not block:
+                        break
+                    fdst.write(block)
+                    hash.update(block)
+                    size += len(block)
         self._add_to_record(arcname, self._serialize_digest(hash), size)
 
+    def _zipinfo(self, filename):
+        """Construct deterministic ZipInfo entry for a file named filename"""
+        # Strip leading path separators to mirror ZipInfo.from_file behavior
+        separators = os.path.sep
+        if os.path.altsep is not None:
+            separators += os.path.altsep
+        arcname = filename.lstrip(separators)
+
+        zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
+        zinfo.create_system = 3  # ZipInfo entry created on a unix-y system
+        zinfo.external_attr = 0o777 << 16  # permissions: rwxrwxrwx
+        zinfo.compress_type = self._zipfile.compression
+        return zinfo
+
     def add_wheelfile(self):
         """Write WHEEL file to the distribution"""
         # TODO(pstradomski): Support non-purelib wheels.