fix(py_wheel): Quote wheel RECORD file entry elements if needed (#2269)
The `RECORD` file written when wheels are patched using `pip.override()`
is not quoting filenames as needed, so wheels that (unfortunately)
include files whose name contain commas would be written in such a way
that https://pypi.org/project/installer/ would fail to parse them,
resulting in an error like:
```
installer.records.InvalidRecordEntry: Row Index 360: expected 3 elements, got 5
```
This PR fixes that by using `csv` to read and write `RECORD` file
entries which takes care of quoting elements of record entries as
needed. See PEP376 for more info about the `RECORD` file format here:
https://peps.python.org/pep-0376/#record
Fixes #2261
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d58351a..c850d18 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,8 @@
{attr}`pip.parse.experimental_index_url`. See
[#2239](https://github.com/bazelbuild/rules_python/issues/2239).
* (whl_filegroup): Provide per default also the `RECORD` file
+* (py_wheel): `RECORD` file entry elements are now quoted if necessary when a
+ wheel is created
### Added
* (py_wheel) Now supports `compress = (True|False)` to allow disabling
diff --git a/examples/wheel/lib/BUILD.bazel b/examples/wheel/lib/BUILD.bazel
index 3b59662..755818d 100644
--- a/examples/wheel/lib/BUILD.bazel
+++ b/examples/wheel/lib/BUILD.bazel
@@ -26,7 +26,10 @@
py_library(
name = "module_with_data",
srcs = ["module_with_data.py"],
- data = [":data.txt"],
+ data = [
+ "data,with,commas.txt",
+ ":data.txt",
+ ],
)
genrule(
@@ -34,3 +37,9 @@
outs = ["data.txt"],
cmd = "echo foo bar baz > $@",
)
+
+genrule(
+ name = "make_data_with_commas_in_name",
+ outs = ["data,with,commas.txt"],
+ cmd = "echo foo bar baz > $@",
+)
diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
index 7212423..4494ee1 100644
--- a/examples/wheel/wheel_test.py
+++ b/examples/wheel/wheel_test.py
@@ -95,6 +95,7 @@
self.assertEqual(
zf.namelist(),
[
+ "examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
@@ -105,7 +106,7 @@
],
)
self.assertFileSha256Equal(
- filename, "b4815a1d3a17cc6a5ce717ed42b940fa7788cb5168f5c1de02f5f50abed7083e"
+ filename, "82370bf61310e2d3c7b1218368457dc7e161bf5dc1a280d7d45102b5e56acf43"
)
def test_customized_wheel(self):
@@ -117,6 +118,7 @@
self.assertEqual(
zf.namelist(),
[
+ "examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
@@ -140,6 +142,7 @@
record_contents,
# The entries are guaranteed to be sorted.
b"""\
+"examples/wheel/lib/data,with,commas.txt",sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
examples/wheel/lib/data.txt,sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
examples/wheel/lib/module_with_data.py,sha256=8s0Khhcqz3yVsBKv2IB5u4l4TMKh7-c_V6p65WVHPms,637
examples/wheel/lib/simple_module.py,sha256=z2hwciab_XPNIBNH8B1Q5fYgnJvQTeYf0ZQJpY8yLLY,637
@@ -194,7 +197,7 @@
second = second.main:s""",
)
self.assertFileSha256Equal(
- filename, "27f3038be6e768d28735441a1bc567eca2213bd3568d18b22a414e6399a2d48e"
+ filename, "706e8dd45884d8cb26e92869f7d29ab7ed9f683b4e2d08f06c03dbdaa12191b8"
)
def test_filename_escaping(self):
@@ -205,6 +208,7 @@
self.assertEqual(
zf.namelist(),
[
+ "examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
@@ -241,6 +245,7 @@
self.assertEqual(
zf.namelist(),
[
+ "wheel/lib/data,with,commas.txt",
"wheel/lib/data.txt",
"wheel/lib/module_with_data.py",
"wheel/lib/simple_module.py",
@@ -260,7 +265,7 @@
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
- filename, "f034b3278781f4df32a33df70d794bb94170b450e477c8bd9cd42d2d922476ae"
+ filename, "568922541703f6edf4b090a8413991f9fa625df2844e644dd30bdbe9deb660be"
)
def test_custom_package_root_multi_prefix_wheel(self):
@@ -273,6 +278,7 @@
self.assertEqual(
zf.namelist(),
[
+ "data,with,commas.txt",
"data.txt",
"module_with_data.py",
"simple_module.py",
@@ -291,7 +297,7 @@
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
- filename, "ff19f5e4540948247742716338bb4194d619cb56df409045d1a99f265ce8e36c"
+ filename, "a8b91ce9d6f570e97b40a357a292a6f595d3470f07c479cb08550257cc9c8306"
)
def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
@@ -304,6 +310,7 @@
self.assertEqual(
zf.namelist(),
[
+ "lib/data,with,commas.txt",
"lib/data.txt",
"lib/module_with_data.py",
"lib/simple_module.py",
@@ -322,7 +329,7 @@
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
- filename, "4331e378ea8b8148409ae7c02177e4eb24d151a85ef937bb44b79ff5258d634b"
+ filename, "8f44e940731757c186079a42cfe7ea3d43cd96b526e3fb2ca2a3ea3048a9d489"
)
def test_python_requires_wheel(self):
@@ -347,7 +354,7 @@
""",
)
self.assertFileSha256Equal(
- filename, "b34676828f93da8cd898d50dcd4f36e02fe273150e213aacb999310a05f5f38c"
+ filename, "ba32493f5e43e481346384aaab9e8fa09c23884276ad057c5f432096a0350101"
)
def test_python_abi3_binary_wheel(self):
diff --git a/python/private/pypi/repack_whl.py b/python/private/pypi/repack_whl.py
index 9052ac3..519631f 100644
--- a/python/private/pypi/repack_whl.py
+++ b/python/private/pypi/repack_whl.py
@@ -22,6 +22,7 @@
from __future__ import annotations
import argparse
+import csv
import difflib
import logging
import pathlib
@@ -65,8 +66,8 @@
# First get existing files by using the RECORD file
got_files = []
got_distinfos = []
- for line in want_record.splitlines():
- rec, _, _ = line.partition(",")
+ for row in csv.reader(want_record.splitlines()):
+ rec = row[0]
path = dir / rec
if not path.exists():
diff --git a/python/private/whl_filegroup/extract_wheel_files.py b/python/private/whl_filegroup/extract_wheel_files.py
index 01d0bc6..5b799c9 100644
--- a/python/private/whl_filegroup/extract_wheel_files.py
+++ b/python/private/whl_filegroup/extract_wheel_files.py
@@ -1,5 +1,6 @@
"""Extract files from a wheel's RECORD."""
+import csv
import re
import sys
import zipfile
@@ -20,10 +21,7 @@
except ValueError:
raise RuntimeError(f"{whl_path} doesn't contain exactly one .dist-info/RECORD")
record_lines = zipf.read(record_file).decode().splitlines()
- return (
- line.split(",")[0]
- for line in record_lines
- )
+ return (row[0] for row in csv.reader(record_lines))
def get_files(whl_record: WhlRecord, regex_pattern: str) -> list[str]:
diff --git a/tests/whl_filegroup/extract_wheel_files_test.py b/tests/whl_filegroup/extract_wheel_files_test.py
index 387e56c..434899d 100644
--- a/tests/whl_filegroup/extract_wheel_files_test.py
+++ b/tests/whl_filegroup/extract_wheel_files_test.py
@@ -11,6 +11,7 @@
def test_get_wheel_record(self) -> None:
record = extract_wheel_files.get_record(_WHEEL)
expected = (
+ "examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
@@ -26,11 +27,19 @@
pattern = "(examples/wheel/lib/.*\.txt$|.*main)"
record = extract_wheel_files.get_record(_WHEEL)
files = extract_wheel_files.get_files(record, pattern)
- expected = ["examples/wheel/lib/data.txt", "examples/wheel/main.py"]
+ expected = [
+ "examples/wheel/lib/data,with,commas.txt",
+ "examples/wheel/lib/data.txt",
+ "examples/wheel/main.py",
+ ]
self.assertEqual(files, expected)
def test_extract(self) -> None:
- files = {"examples/wheel/lib/data.txt", "examples/wheel/main.py"}
+ files = {
+ "examples/wheel/lib/data,with,commas.txt",
+ "examples/wheel/lib/data.txt",
+ "examples/wheel/main.py",
+ }
with tempfile.TemporaryDirectory() as tmpdir:
outdir = Path(tmpdir)
extract_wheel_files.extract_files(_WHEEL, files, outdir)
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index db287eb..23b18ec 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -16,7 +16,9 @@
import argparse
import base64
+import csv
import hashlib
+import io
import os
import re
import stat
@@ -208,14 +210,23 @@
"""Write RECORD file to the distribution."""
record_path = self.distinfo_path("RECORD")
entries = self._record + [(record_path, b"", b"")]
- contents = b""
- for filename, digest, size in entries:
- if isinstance(filename, str):
- filename = filename.lstrip("/").encode("utf-8", "surrogateescape")
- contents += b"%s,%s,%s\n" % (filename, digest, size)
+ with io.StringIO() as contents_io:
+ writer = csv.writer(contents_io, lineterminator="\n")
+ for filename, digest, size in entries:
+ if isinstance(filename, str):
+ filename = filename.lstrip("/")
+ writer.writerow(
+ (
+ c
+ if isinstance(c, str)
+ else c.decode("utf-8", "surrogateescape")
+ for c in (filename, digest, size)
+ )
+ )
- self.add_string(record_path, contents)
- return contents
+ contents = contents_io.getvalue()
+ self.add_string(record_path, contents)
+ return contents.encode("utf-8", "surrogateescape")
class WheelMaker(object):