feat: Add support for python-wheel data directory (#1801)
Fixes #1777
* Adds `data_files` attribute to `py_wheel` rule.
* Minimal validation of the data-files target directories per
[specification](https://packaging.python.org/en/latest/specifications/binary-distribution-format/#installing-a-wheel-distribution-1-0-py32-none-any-whl)
* Added two tests.
* Added example
diff --git a/CHANGELOG.md b/CHANGELOG.md
index af4c108..ff61b2b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,9 @@
_default_ visibility of generated targets. See the [docs][python_default_visibility]
for details.
+* (wheel) Add support for `data_files` attributes in py_wheel rule
+ ([#1777](https://github.com/bazelbuild/rules_python/issues/1777))
+
[0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0
[python_default_visibility]: gazelle/README.md#directive-python_default_visibility
diff --git a/examples/wheel/BUILD.bazel b/examples/wheel/BUILD.bazel
index 699bf68..2e45d7d 100644
--- a/examples/wheel/BUILD.bazel
+++ b/examples/wheel/BUILD.bazel
@@ -312,6 +312,20 @@
deps = [":example_pkg"],
)
+# Package just a specific py_libraries, without their dependencies
+py_wheel(
+ name = "minimal_data_files",
+ testonly = True, # Set this to verify the generated .dist target doesn't break things
+
+ # Re-using some files already checked into the repo.
+ data_files = {
+ "//examples/wheel:NOTICE": "scripts/NOTICE",
+ "README.md": "data/target/path/README.md",
+ },
+ distribution = "minimal_data_files",
+ version = "0.0.1",
+)
+
py_test(
name = "wheel_test",
srcs = ["wheel_test.py"],
@@ -321,6 +335,7 @@
":custom_package_root_multi_prefix_reverse_order",
":customized",
":filename_escaping",
+ ":minimal_data_files",
":minimal_with_py_library",
":minimal_with_py_library_with_stamp",
":minimal_with_py_package",
diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
index 0c3e87b..e135eaa 100644
--- a/examples/wheel/wheel_test.py
+++ b/examples/wheel/wheel_test.py
@@ -472,6 +472,23 @@
requires,
)
+ def test_minimal_data_files(self):
+ filename = self._get_path("minimal_data_files-0.0.1-py3-none-any.whl")
+
+ with zipfile.ZipFile(filename) as zf:
+ self.assertAllEntriesHasReproducibleMetadata(zf)
+ metadata_file = None
+ self.assertEqual(
+ zf.namelist(),
+ [
+ "minimal_data_files-0.0.1.dist-info/WHEEL",
+ "minimal_data_files-0.0.1.dist-info/METADATA",
+ "minimal_data_files-0.0.1.data/data/target/path/README.md",
+ "minimal_data_files-0.0.1.data/scripts/NOTICE",
+ "minimal_data_files-0.0.1.dist-info/RECORD",
+ ]
+ )
+
if __name__ == "__main__":
unittest.main()
diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl
index 5919abe..2aed9b9 100644
--- a/python/private/py_wheel.bzl
+++ b/python/private/py_wheel.bzl
@@ -120,6 +120,7 @@
_feature_flags = {}
+ALLOWED_DATA_FILE_PREFIX = ("purelib", "platlib", "headers", "scripts", "data")
_requirement_attrs = {
"extra_requires": attr.string_list_dict(
doc = ("A mapping of [extras](https://peps.python.org/pep-0508/#extras) options to lists of requirements (similar to `requires`). This attribute " +
@@ -172,6 +173,11 @@
"classifiers": attr.string_list(
doc = "A list of strings describing the categories for the package. For valid classifiers see https://pypi.org/classifiers",
),
+ "data_files": attr.label_keyed_string_dict(
+ doc = ("Any file that is not normally installed inside site-packages goes into the .data directory, named " +
+ "as the .dist-info directory but with the .data/ extension. Allowed paths: {prefixes}".format(prefixes = ALLOWED_DATA_FILE_PREFIX)),
+ allow_files = True,
+ ),
"description_content_type": attr.string(
doc = ("The type of contents in description_file. " +
"If not provided, the type will be inferred from the extension of description_file. " +
@@ -473,6 +479,28 @@
filename + ";" + target_files[0].path,
)
+ for target, filename in ctx.attr.data_files.items():
+ target_files = target.files.to_list()
+ if len(target_files) != 1:
+ fail(
+ "Multi-file target listed in data_files %s",
+ filename,
+ )
+
+ if filename.partition("/")[0] not in ALLOWED_DATA_FILE_PREFIX:
+ fail(
+ "The target data file must start with one of these prefixes: '%s'. Target filepath: '%s'" %
+ (
+ ",".join(ALLOWED_DATA_FILE_PREFIX),
+ filename,
+ ),
+ )
+ other_inputs.extend(target_files)
+ args.add(
+ "--data_files",
+ filename + ";" + target_files[0].path,
+ )
+
ctx.actions.run(
mnemonic = "PyWheel",
inputs = depset(direct = other_inputs, transitive = [inputs_to_package]),
diff --git a/python/private/repack_whl.py b/python/private/repack_whl.py
index be113ef..ea9c01f 100644
--- a/python/private/repack_whl.py
+++ b/python/private/repack_whl.py
@@ -150,8 +150,9 @@
logging.debug(f"Found dist-info dir: {distinfo_dir}")
record_path = distinfo_dir / "RECORD"
record_contents = record_path.read_text() if record_path.exists() else ""
+ distribution_prefix = distinfo_dir.with_suffix("").name
- with _WhlFile(args.output, mode="w", distinfo_dir=distinfo_dir) as out:
+ with _WhlFile(args.output, mode="w", distribution_prefix=distribution_prefix) as out:
for p in _files_to_pack(patched_wheel_dir, record_contents):
rel_path = p.relative_to(patched_wheel_dir)
out.add_file(str(rel_path), p)
diff --git a/tests/py_wheel/py_wheel_tests.bzl b/tests/py_wheel/py_wheel_tests.bzl
index 3c03a1b..091e01c 100644
--- a/tests/py_wheel/py_wheel_tests.bzl
+++ b/tests/py_wheel/py_wheel_tests.bzl
@@ -14,6 +14,7 @@
"""Test for py_wheel."""
load("@rules_testing//lib:analysis_test.bzl", "analysis_test", "test_suite")
+load("@rules_testing//lib:truth.bzl", "matching")
load("@rules_testing//lib:util.bzl", rt_util = "util")
load("//python:packaging.bzl", "py_wheel")
load("//python/private:py_wheel_normalize_pep440.bzl", "normalize_pep440") # buildifier: disable=bzl-visibility
@@ -46,6 +47,79 @@
_tests.append(_test_metadata)
+def _test_data(name):
+ rt_util.helper_target(
+ py_wheel,
+ name = name + "_data",
+ distribution = "mydist_" + name,
+ version = "0.0.0",
+ data_files = {
+ "source_name": "scripts/wheel_name",
+ },
+ )
+ analysis_test(
+ name = name,
+ impl = _test_data_impl,
+ target = name + "_data",
+ )
+
+def _test_data_impl(env, target):
+ action = env.expect.that_target(target).action_named(
+ "PyWheel",
+ )
+ action.contains_at_least_args(["--data_files", "scripts/wheel_name;tests/py_wheel/source_name"])
+ action.contains_at_least_inputs(["tests/py_wheel/source_name"])
+
+_tests.append(_test_data)
+
+def _test_data_bad_path(name):
+ rt_util.helper_target(
+ py_wheel,
+ name = name + "_data",
+ distribution = "mydist_" + name,
+ version = "0.0.0",
+ data_files = {
+ "source_name": "unsupported_path/wheel_name",
+ },
+ )
+ analysis_test(
+ name = name,
+ impl = _test_data_bad_path_impl,
+ target = name + "_data",
+ expect_failure = True,
+ )
+
+def _test_data_bad_path_impl(env, target):
+ env.expect.that_target(target).failures().contains_predicate(
+ matching.str_matches("target data file must start with"),
+ )
+
+_tests.append(_test_data_bad_path)
+
+def _test_data_bad_path_but_right_prefix(name):
+ rt_util.helper_target(
+ py_wheel,
+ name = name + "_data",
+ distribution = "mydist_" + name,
+ version = "0.0.0",
+ data_files = {
+ "source_name": "scripts2/wheel_name",
+ },
+ )
+ analysis_test(
+ name = name,
+ impl = _test_data_bad_path_but_right_prefix_impl,
+ target = name + "_data",
+ expect_failure = True,
+ )
+
+def _test_data_bad_path_but_right_prefix_impl(env, target):
+ env.expect.that_target(target).failures().contains_predicate(
+ matching.str_matches("target data file must start with"),
+ )
+
+_tests.append(_test_data_bad_path_but_right_prefix)
+
def _test_content_type_from_attr(name):
rt_util.helper_target(
py_wheel,
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index 26153f6..8fa3e02 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -102,12 +102,13 @@
filename,
*,
mode,
- distinfo_dir: str | Path,
+ distribution_prefix: str,
strip_path_prefixes=None,
compression=zipfile.ZIP_DEFLATED,
**kwargs,
):
- self._distinfo_dir: str = Path(distinfo_dir).name
+ self._distribution_prefix = distribution_prefix
+
self._strip_path_prefixes = strip_path_prefixes or []
# Entries for the RECORD file as (filename, hash, size) tuples.
self._record = []
@@ -115,7 +116,10 @@
super().__init__(filename, mode=mode, compression=compression, **kwargs)
def distinfo_path(self, basename):
- return f"{self._distinfo_dir}/{basename}"
+ return f"{self._distribution_prefix}.dist-info/{basename}"
+
+ def data_path(self, basename):
+ return f"{self._distribution_prefix}.data/{basename}"
def add_file(self, package_filename, real_filename):
"""Add given file to the distribution."""
@@ -123,8 +127,8 @@
def arcname_from(name):
# Always use unix path separators.
normalized_arcname = name.replace(os.path.sep, "/")
- # Don't manipulate names filenames in the .distinfo directory.
- if normalized_arcname.startswith(self._distinfo_dir):
+ # Don't manipulate names filenames in the .distinfo or .data directories.
+ if normalized_arcname.startswith(self._distribution_prefix):
return normalized_arcname
for prefix in self._strip_path_prefixes:
if normalized_arcname.startswith(prefix):
@@ -237,11 +241,9 @@
self._wheelname_fragment_distribution_name = escape_filename_distribution_name(
self._name
)
- self._distinfo_dir = (
- self._wheelname_fragment_distribution_name
- + "-"
- + self._version
- + ".dist-info/"
+
+ self._distribution_prefix = (
+ self._wheelname_fragment_distribution_name + "-" + self._version
)
self._whlfile = None
@@ -250,7 +252,7 @@
self._whlfile = _WhlFile(
self.filename(),
mode="w",
- distinfo_dir=self._distinfo_dir,
+ distribution_prefix=self._distribution_prefix,
strip_path_prefixes=self._strip_path_prefixes,
)
return self
@@ -280,6 +282,9 @@
def distinfo_path(self, basename):
return self._whlfile.distinfo_path(basename)
+ def data_path(self, basename):
+ return self._whlfile.data_path(basename)
+
def add_file(self, package_filename, real_filename):
"""Add given file to the distribution."""
self._whlfile.add_file(package_filename, real_filename)
@@ -436,6 +441,12 @@
help="'filename;real_path' pairs listing extra files to include in"
"dist-info directory. Can be supplied multiple times.",
)
+ contents_group.add_argument(
+ "--data_files",
+ action="append",
+ help="'filename;real_path' pairs listing data files to include in"
+ "data directory. Can be supplied multiple times.",
+ )
build_group = parser.add_argument_group("Building requirements")
build_group.add_argument(
@@ -452,25 +463,25 @@
return parser.parse_args(sys.argv[1:])
+def _parse_file_pairs(content: List[str]) -> List[List[str]]:
+ """
+ Parse ; delimited lists of files into a 2D list.
+ """
+ return [i.split(";", maxsplit=1) for i in content or []]
+
+
def main() -> None:
arguments = parse_args()
- if arguments.input_file:
- input_files = [i.split(";") for i in arguments.input_file]
- else:
- input_files = []
+ input_files = _parse_file_pairs(arguments.input_file)
+ extra_distinfo_file = _parse_file_pairs(arguments.extra_distinfo_file)
+ data_files = _parse_file_pairs(arguments.data_files)
- if arguments.extra_distinfo_file:
- extra_distinfo_file = [i.split(";") for i in arguments.extra_distinfo_file]
- else:
- extra_distinfo_file = []
-
- if arguments.input_file_list:
- for input_file in arguments.input_file_list:
- with open(input_file) as _file:
- input_file_list = _file.read().splitlines()
- for _input_file in input_file_list:
- input_files.append(_input_file.split(";"))
+ for input_file in arguments.input_file_list:
+ with open(input_file) as _file:
+ input_file_list = _file.read().splitlines()
+ for _input_file in input_file_list:
+ input_files.append(_input_file.split(";"))
all_files = get_files_to_package(input_files)
# Sort the files for reproducible order in the archive.
@@ -570,6 +581,8 @@
)
# Sort the files for reproducible order in the archive.
+ for filename, real_path in sorted(data_files):
+ maker.add_file(maker.data_path(filename), real_path)
for filename, real_path in sorted(extra_distinfo_file):
maker.add_file(maker.distinfo_path(filename), real_path)