feat: Add support for python-wheel data directory (#1801)

Fixes #1777

* Adds `data_files` attribute to `py_wheel` rule. 
* Minimal validation of the data-files target directories per
[specification](https://packaging.python.org/en/latest/specifications/binary-distribution-format/#installing-a-wheel-distribution-1-0-py32-none-any-whl)
* Added two tests.  
* Added example
diff --git a/CHANGELOG.md b/CHANGELOG.md
index af4c108..ff61b2b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,9 @@
   _default_ visibility of generated targets. See the [docs][python_default_visibility]
   for details.
 
+* (wheel) Add support for `data_files` attributes in py_wheel rule
+  ([#1777](https://github.com/bazelbuild/rules_python/issues/1777))
+
 [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0
 [python_default_visibility]: gazelle/README.md#directive-python_default_visibility
 
diff --git a/examples/wheel/BUILD.bazel b/examples/wheel/BUILD.bazel
index 699bf68..2e45d7d 100644
--- a/examples/wheel/BUILD.bazel
+++ b/examples/wheel/BUILD.bazel
@@ -312,6 +312,20 @@
     deps = [":example_pkg"],
 )
 
+# Package just a specific py_libraries, without their dependencies
+py_wheel(
+    name = "minimal_data_files",
+    testonly = True,  # Set this to verify the generated .dist target doesn't break things
+
+    # Re-using some files already checked into the repo.
+    data_files = {
+        "//examples/wheel:NOTICE": "scripts/NOTICE",
+        "README.md": "data/target/path/README.md",
+    },
+    distribution = "minimal_data_files",
+    version = "0.0.1",
+)
+
 py_test(
     name = "wheel_test",
     srcs = ["wheel_test.py"],
@@ -321,6 +335,7 @@
         ":custom_package_root_multi_prefix_reverse_order",
         ":customized",
         ":filename_escaping",
+        ":minimal_data_files",
         ":minimal_with_py_library",
         ":minimal_with_py_library_with_stamp",
         ":minimal_with_py_package",
diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py
index 0c3e87b..e135eaa 100644
--- a/examples/wheel/wheel_test.py
+++ b/examples/wheel/wheel_test.py
@@ -472,6 +472,23 @@
                 requires,
             )
 
+    def test_minimal_data_files(self):
+        filename = self._get_path("minimal_data_files-0.0.1-py3-none-any.whl")
+
+        with zipfile.ZipFile(filename) as zf:
+            self.assertAllEntriesHasReproducibleMetadata(zf)
+            metadata_file = None
+            self.assertEqual(
+                zf.namelist(),
+                [
+                    "minimal_data_files-0.0.1.dist-info/WHEEL",
+                    "minimal_data_files-0.0.1.dist-info/METADATA",
+                    "minimal_data_files-0.0.1.data/data/target/path/README.md",
+                    "minimal_data_files-0.0.1.data/scripts/NOTICE",
+                    "minimal_data_files-0.0.1.dist-info/RECORD",
+                ]
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl
index 5919abe..2aed9b9 100644
--- a/python/private/py_wheel.bzl
+++ b/python/private/py_wheel.bzl
@@ -120,6 +120,7 @@
 
 _feature_flags = {}
 
+ALLOWED_DATA_FILE_PREFIX = ("purelib", "platlib", "headers", "scripts", "data")
 _requirement_attrs = {
     "extra_requires": attr.string_list_dict(
         doc = ("A mapping of [extras](https://peps.python.org/pep-0508/#extras) options to lists of requirements (similar to `requires`). This attribute " +
@@ -172,6 +173,11 @@
     "classifiers": attr.string_list(
         doc = "A list of strings describing the categories for the package. For valid classifiers see https://pypi.org/classifiers",
     ),
+    "data_files": attr.label_keyed_string_dict(
+        doc = ("Any file that is not normally installed inside site-packages goes into the .data directory, named " +
+               "as the .dist-info directory but with the .data/ extension.  Allowed paths: {prefixes}".format(prefixes = ALLOWED_DATA_FILE_PREFIX)),
+        allow_files = True,
+    ),
     "description_content_type": attr.string(
         doc = ("The type of contents in description_file. " +
                "If not provided, the type will be inferred from the extension of description_file. " +
@@ -473,6 +479,28 @@
             filename + ";" + target_files[0].path,
         )
 
+    for target, filename in ctx.attr.data_files.items():
+        target_files = target.files.to_list()
+        if len(target_files) != 1:
+            fail(
+                "Multi-file target listed in data_files %s",
+                filename,
+            )
+
+        if filename.partition("/")[0] not in ALLOWED_DATA_FILE_PREFIX:
+            fail(
+                "The target data file must start with one of these prefixes: '%s'.  Target filepath: '%s'" %
+                (
+                    ",".join(ALLOWED_DATA_FILE_PREFIX),
+                    filename,
+                ),
+            )
+        other_inputs.extend(target_files)
+        args.add(
+            "--data_files",
+            filename + ";" + target_files[0].path,
+        )
+
     ctx.actions.run(
         mnemonic = "PyWheel",
         inputs = depset(direct = other_inputs, transitive = [inputs_to_package]),
diff --git a/python/private/repack_whl.py b/python/private/repack_whl.py
index be113ef..ea9c01f 100644
--- a/python/private/repack_whl.py
+++ b/python/private/repack_whl.py
@@ -150,8 +150,9 @@
         logging.debug(f"Found dist-info dir: {distinfo_dir}")
         record_path = distinfo_dir / "RECORD"
         record_contents = record_path.read_text() if record_path.exists() else ""
+        distribution_prefix = distinfo_dir.with_suffix("").name
 
-        with _WhlFile(args.output, mode="w", distinfo_dir=distinfo_dir) as out:
+        with _WhlFile(args.output, mode="w", distribution_prefix=distribution_prefix) as out:
             for p in _files_to_pack(patched_wheel_dir, record_contents):
                 rel_path = p.relative_to(patched_wheel_dir)
                 out.add_file(str(rel_path), p)
diff --git a/tests/py_wheel/py_wheel_tests.bzl b/tests/py_wheel/py_wheel_tests.bzl
index 3c03a1b..091e01c 100644
--- a/tests/py_wheel/py_wheel_tests.bzl
+++ b/tests/py_wheel/py_wheel_tests.bzl
@@ -14,6 +14,7 @@
 """Test for py_wheel."""
 
 load("@rules_testing//lib:analysis_test.bzl", "analysis_test", "test_suite")
+load("@rules_testing//lib:truth.bzl", "matching")
 load("@rules_testing//lib:util.bzl", rt_util = "util")
 load("//python:packaging.bzl", "py_wheel")
 load("//python/private:py_wheel_normalize_pep440.bzl", "normalize_pep440")  # buildifier: disable=bzl-visibility
@@ -46,6 +47,79 @@
 
 _tests.append(_test_metadata)
 
+def _test_data(name):
+    rt_util.helper_target(
+        py_wheel,
+        name = name + "_data",
+        distribution = "mydist_" + name,
+        version = "0.0.0",
+        data_files = {
+            "source_name": "scripts/wheel_name",
+        },
+    )
+    analysis_test(
+        name = name,
+        impl = _test_data_impl,
+        target = name + "_data",
+    )
+
+def _test_data_impl(env, target):
+    action = env.expect.that_target(target).action_named(
+        "PyWheel",
+    )
+    action.contains_at_least_args(["--data_files", "scripts/wheel_name;tests/py_wheel/source_name"])
+    action.contains_at_least_inputs(["tests/py_wheel/source_name"])
+
+_tests.append(_test_data)
+
+def _test_data_bad_path(name):
+    rt_util.helper_target(
+        py_wheel,
+        name = name + "_data",
+        distribution = "mydist_" + name,
+        version = "0.0.0",
+        data_files = {
+            "source_name": "unsupported_path/wheel_name",
+        },
+    )
+    analysis_test(
+        name = name,
+        impl = _test_data_bad_path_impl,
+        target = name + "_data",
+        expect_failure = True,
+    )
+
+def _test_data_bad_path_impl(env, target):
+    env.expect.that_target(target).failures().contains_predicate(
+        matching.str_matches("target data file must start with"),
+    )
+
+_tests.append(_test_data_bad_path)
+
+def _test_data_bad_path_but_right_prefix(name):
+    rt_util.helper_target(
+        py_wheel,
+        name = name + "_data",
+        distribution = "mydist_" + name,
+        version = "0.0.0",
+        data_files = {
+            "source_name": "scripts2/wheel_name",
+        },
+    )
+    analysis_test(
+        name = name,
+        impl = _test_data_bad_path_but_right_prefix_impl,
+        target = name + "_data",
+        expect_failure = True,
+    )
+
+def _test_data_bad_path_but_right_prefix_impl(env, target):
+    env.expect.that_target(target).failures().contains_predicate(
+        matching.str_matches("target data file must start with"),
+    )
+
+_tests.append(_test_data_bad_path_but_right_prefix)
+
 def _test_content_type_from_attr(name):
     rt_util.helper_target(
         py_wheel,
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index 26153f6..8fa3e02 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -102,12 +102,13 @@
         filename,
         *,
         mode,
-        distinfo_dir: str | Path,
+        distribution_prefix: str,
         strip_path_prefixes=None,
         compression=zipfile.ZIP_DEFLATED,
         **kwargs,
     ):
-        self._distinfo_dir: str = Path(distinfo_dir).name
+        self._distribution_prefix = distribution_prefix
+
         self._strip_path_prefixes = strip_path_prefixes or []
         # Entries for the RECORD file as (filename, hash, size) tuples.
         self._record = []
@@ -115,7 +116,10 @@
         super().__init__(filename, mode=mode, compression=compression, **kwargs)
 
     def distinfo_path(self, basename):
-        return f"{self._distinfo_dir}/{basename}"
+        return f"{self._distribution_prefix}.dist-info/{basename}"
+
+    def data_path(self, basename):
+        return f"{self._distribution_prefix}.data/{basename}"
 
     def add_file(self, package_filename, real_filename):
         """Add given file to the distribution."""
@@ -123,8 +127,8 @@
         def arcname_from(name):
             # Always use unix path separators.
             normalized_arcname = name.replace(os.path.sep, "/")
-            # Don't manipulate names filenames in the .distinfo directory.
-            if normalized_arcname.startswith(self._distinfo_dir):
+            # Don't manipulate names filenames in the .distinfo or .data directories.
+            if normalized_arcname.startswith(self._distribution_prefix):
                 return normalized_arcname
             for prefix in self._strip_path_prefixes:
                 if normalized_arcname.startswith(prefix):
@@ -237,11 +241,9 @@
         self._wheelname_fragment_distribution_name = escape_filename_distribution_name(
             self._name
         )
-        self._distinfo_dir = (
-            self._wheelname_fragment_distribution_name
-            + "-"
-            + self._version
-            + ".dist-info/"
+
+        self._distribution_prefix = (
+            self._wheelname_fragment_distribution_name + "-" + self._version
         )
 
         self._whlfile = None
@@ -250,7 +252,7 @@
         self._whlfile = _WhlFile(
             self.filename(),
             mode="w",
-            distinfo_dir=self._distinfo_dir,
+            distribution_prefix=self._distribution_prefix,
             strip_path_prefixes=self._strip_path_prefixes,
         )
         return self
@@ -280,6 +282,9 @@
     def distinfo_path(self, basename):
         return self._whlfile.distinfo_path(basename)
 
+    def data_path(self, basename):
+        return self._whlfile.data_path(basename)
+
     def add_file(self, package_filename, real_filename):
         """Add given file to the distribution."""
         self._whlfile.add_file(package_filename, real_filename)
@@ -436,6 +441,12 @@
         help="'filename;real_path' pairs listing extra files to include in"
         "dist-info directory. Can be supplied multiple times.",
     )
+    contents_group.add_argument(
+        "--data_files",
+        action="append",
+        help="'filename;real_path' pairs listing data files to include in"
+        "data directory. Can be supplied multiple times.",
+    )
 
     build_group = parser.add_argument_group("Building requirements")
     build_group.add_argument(
@@ -452,25 +463,25 @@
     return parser.parse_args(sys.argv[1:])
 
 
+def _parse_file_pairs(content: List[str]) -> List[List[str]]:
+    """
+    Parse ; delimited lists of files into a 2D list.
+    """
+    return [i.split(";", maxsplit=1) for i in content or []]
+
+
 def main() -> None:
     arguments = parse_args()
 
-    if arguments.input_file:
-        input_files = [i.split(";") for i in arguments.input_file]
-    else:
-        input_files = []
+    input_files = _parse_file_pairs(arguments.input_file)
+    extra_distinfo_file = _parse_file_pairs(arguments.extra_distinfo_file)
+    data_files = _parse_file_pairs(arguments.data_files)
 
-    if arguments.extra_distinfo_file:
-        extra_distinfo_file = [i.split(";") for i in arguments.extra_distinfo_file]
-    else:
-        extra_distinfo_file = []
-
-    if arguments.input_file_list:
-        for input_file in arguments.input_file_list:
-            with open(input_file) as _file:
-                input_file_list = _file.read().splitlines()
-            for _input_file in input_file_list:
-                input_files.append(_input_file.split(";"))
+    for input_file in arguments.input_file_list:
+        with open(input_file) as _file:
+            input_file_list = _file.read().splitlines()
+        for _input_file in input_file_list:
+            input_files.append(_input_file.split(";"))
 
     all_files = get_files_to_package(input_files)
     # Sort the files for reproducible order in the archive.
@@ -570,6 +581,8 @@
             )
 
         # Sort the files for reproducible order in the archive.
+        for filename, real_path in sorted(data_files):
+            maker.add_file(maker.data_path(filename), real_path)
         for filename, real_path in sorted(extra_distinfo_file):
             maker.add_file(maker.distinfo_path(filename), real_path)