feat(bzlmod): support patching 'whl' distributions (#1393)

Before that the users had to rely on patching the actual wheel files and
uploading them as different versions to internal artifact stores if they
needed to modify the wheel dependencies. This is very common when
breaking dependency cycles in `pytorch` or `apache-airflow` packages.
With this feature we can support patching external PyPI dependencies via
pip.override tag class to fix package dependencies and/or a broken
`RECORD` metadata file.

Overall design:
* Split the `whl_installer` CLI into two parts - downloading and
extracting.
  Merged in #1487.
* Add a starlark function which extracts the downloaded wheel applies
patches
  and repackages a wheel (so that the extraction part works as before).
* Add a `override` tag_class to the `pip` extension and allow users to
pass patches
  to be applied to specific wheel files.
* Only the root module is allowed to apply patches. This is to avoid far
away modules
modifying the code of other modules and conflicts between modules and
their patches.

Patches have to be in `unified-diff` format.

Related #1076, #1166, #1120
diff --git a/.bazelrc b/.bazelrc
index 67f2973..2935f27 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -3,8 +3,8 @@
 # This lets us glob() up all the files inside the examples to make them inputs to tests
 # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it)
 # To update these lines, run tools/bazel_integration_test/update_deleted_packages.sh
-build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/proto,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points
-query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/proto,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points
+build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/proto,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points
+query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/proto,tests/compile_pip_requirements,tests/compile_pip_requirements_test_from_external_workspace,tests/ignore_root_user_error,tests/pip_repository_entry_points
 
 test --test_output=errors
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5540bae..62a372d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,11 @@
 * (py_wheel) Produce deterministic wheel files and make `RECORD` file entries
   follow the order of files written to the `.whl` archive.
 
+### Added
+
+* (bzlmod) Added `.whl` patching support via `patches` and `patch_strip`
+  arguments to the new `pip.override` tag class.
+
 ## [0.26.0] - 2023-10-06
 
 ### Changed
diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel
index 0d1c7a7..5824280 100644
--- a/examples/bzlmod/MODULE.bazel
+++ b/examples/bzlmod/MODULE.bazel
@@ -113,6 +113,19 @@
         "@whl_mods_hub//:wheel.json": "wheel",
     },
 )
+
+# You can add patches that will be applied on the whl contents.
+#
+# The patches have to be in the unified-diff format.
+pip.override(
+    file = "requests-2.25.1-py2.py3-none-any.whl",
+    patch_strip = 1,
+    patches = [
+        "@//patches:empty.patch",
+        "@//patches:requests_metadata.patch",
+        "@//patches:requests_record.patch",
+    ],
+)
 use_repo(pip, "pip")
 
 bazel_dep(name = "other_module", version = "", repo_name = "our_other_module")
diff --git a/examples/bzlmod/patches/BUILD.bazel b/examples/bzlmod/patches/BUILD.bazel
new file mode 100644
index 0000000..ed2af79
--- /dev/null
+++ b/examples/bzlmod/patches/BUILD.bazel
@@ -0,0 +1,4 @@
+exports_files(
+    srcs = glob(["*.patch"]),
+    visibility = ["//visibility:public"],
+)
diff --git a/examples/bzlmod/patches/empty.patch b/examples/bzlmod/patches/empty.patch
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/examples/bzlmod/patches/empty.patch
diff --git a/examples/bzlmod/patches/requests_metadata.patch b/examples/bzlmod/patches/requests_metadata.patch
new file mode 100644
index 0000000..3a52410
--- /dev/null
+++ b/examples/bzlmod/patches/requests_metadata.patch
@@ -0,0 +1,12 @@
+diff --unified --recursive a/requests-2.25.1.dist-info/METADATA b/requests-2.25.1.dist-info/METADATA
+--- a/requests-2.25.1.dist-info/METADATA	2020-12-16 19:37:50.000000000 +0900
++++ b/requests-2.25.1.dist-info/METADATA	2023-09-30 20:31:50.079863410 +0900
+@@ -1,7 +1,7 @@
+ Metadata-Version: 2.1
+ Name: requests
+ Version: 2.25.1
+-Summary: Python HTTP for Humans.
++Summary: Python HTTP for Humans. Patched.
+ Home-page: https://requests.readthedocs.io
+ Author: Kenneth Reitz
+ Author-email: me@kennethreitz.org
diff --git a/examples/bzlmod/patches/requests_record.patch b/examples/bzlmod/patches/requests_record.patch
new file mode 100644
index 0000000..0167510
--- /dev/null
+++ b/examples/bzlmod/patches/requests_record.patch
@@ -0,0 +1,11 @@
+--- a/requests-2.25.1.dist-info/RECORD
++++ b/requests-2.25.1.dist-info/RECORD
+@@ -17,7 +17,7 @@
+ requests/structures.py,sha256=msAtr9mq1JxHd-JRyiILfdFlpbJwvvFuP3rfUQT_QxE,3005
+ requests/utils.py,sha256=_K9AgkN6efPe-a-zgZurXzds5PBC0CzDkyjAE2oCQFQ,30529
+ requests-2.25.1.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
+-requests-2.25.1.dist-info/METADATA,sha256=RuNh38uN0IMsRT3OwaTNB_WyGx6RMwwQoMwujXfkUVM,4168
++requests-2.25.1.dist-info/METADATA,sha256=fRSAA0u0Bi0heD4zYq91wdNUTJlbzhK6_iDOcRRNDx4,4177
+ requests-2.25.1.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110
+ requests-2.25.1.dist-info/top_level.txt,sha256=fMSVmHfb5rbGOo6xv-O_tUX6j-WyixssE-SnwcDRxNQ,9
+ requests-2.25.1.dist-info/RECORD,,
diff --git a/examples/bzlmod/whl_mods/appended_build_content.BUILD b/examples/bzlmod/whl_mods/appended_build_content.BUILD
index 7a9f3a2..0ca118d 100644
--- a/examples/bzlmod/whl_mods/appended_build_content.BUILD
+++ b/examples/bzlmod/whl_mods/appended_build_content.BUILD
@@ -5,3 +5,12 @@
     out = "generated_file.txt",
     content = ["Hello world from requests"],
 )
+
+filegroup(
+    name = "whl_orig",
+    srcs = glob(
+        ["*.whl"],
+        allow_empty = False,
+        exclude = ["*-patched-*.whl"],
+    ),
+)
diff --git a/python/pip_install/BUILD.bazel b/python/pip_install/BUILD.bazel
index 4159905..4304fb5 100644
--- a/python/pip_install/BUILD.bazel
+++ b/python/pip_install/BUILD.bazel
@@ -30,6 +30,7 @@
         "//python/pip_install/private:srcs_bzl",
         "//python/private:bzlmod_enabled_bzl",
         "//python/private:normalize_name_bzl",
+        "//python/private:patch_whl_bzl",
         "//python/private:render_pkg_aliases_bzl",
         "//python/private:toolchains_repo_bzl",
         "//python/private:which_bzl",
@@ -97,6 +98,8 @@
     srcs = [
         "//python/pip_install/tools/dependency_resolver:py_srcs",
         "//python/pip_install/tools/wheel_installer:py_srcs",
+        "//python/private:repack_whl.py",
+        "//tools:wheelmaker.py",
     ],
     visibility = ["//python/pip_install/private:__pkg__"],
 )
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 207c47a..f9d3676 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -22,6 +22,7 @@
 load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
 load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
 load("//python/private:normalize_name.bzl", "normalize_name")
+load("//python/private:patch_whl.bzl", "patch_whl")
 load("//python/private:render_pkg_aliases.bzl", "render_pkg_aliases")
 load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
 load("//python/private:which.bzl", "which_with_fail")
@@ -44,6 +45,7 @@
 
     Args:
         rctx: Handle to the repository_context.
+
     Returns: String of the PYTHONPATH.
     """
 
@@ -542,6 +544,22 @@
     if not rctx.delete("whl_file.json"):
         fail("failed to delete the whl_file.json file")
 
+    if rctx.attr.whl_patches:
+        patches = {}
+        for patch_file, json_args in patches.items():
+            patch_dst = struct(**json.decode(json_args))
+            if whl_path.basename in patch_dst.whls:
+                patches[patch_file] = patch_dst.patch_strip
+
+        whl_path = patch_whl(
+            rctx,
+            python_interpreter = python_interpreter,
+            whl_path = whl_path,
+            patches = patches,
+            quiet = rctx.attr.quiet,
+            timeout = rctx.attr.timeout,
+        )
+
     result = rctx.execute(
         args + ["--whl-file", whl_path],
         environment = environment,
@@ -635,6 +653,13 @@
         mandatory = True,
         doc = "Python requirement string describing the package to make available",
     ),
+    "whl_patches": attr.label_keyed_string_dict(
+        doc = """"a label-keyed-string dict that has
+            json.encode(struct([whl_file], patch_strip]) as values. This
+            is to maintain flexibility and correct bzlmod extension interface
+            until we have a better way to define whl_library and move whl
+            patching to a separate place. INTERNAL USE ONLY.""",
+    ),
     "_python_path_entries": attr.label_list(
         # Get the root directory of these rules and keep them as a default attribute
         # in order to avoid unnecessary repository fetching restarts.
diff --git a/python/pip_install/private/srcs.bzl b/python/pip_install/private/srcs.bzl
index e342d90..e92e49f 100644
--- a/python/pip_install/private/srcs.bzl
+++ b/python/pip_install/private/srcs.bzl
@@ -13,4 +13,6 @@
     "@rules_python//python/pip_install/tools/wheel_installer:namespace_pkgs.py",
     "@rules_python//python/pip_install/tools/wheel_installer:wheel.py",
     "@rules_python//python/pip_install/tools/wheel_installer:wheel_installer.py",
+    "@rules_python//python/private:repack_whl.py",
+    "@rules_python//tools:wheelmaker.py",
 ]
diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel
index 4388594..d5b170e 100644
--- a/python/private/BUILD.bazel
+++ b/python/private/BUILD.bazel
@@ -89,6 +89,17 @@
 )
 
 bzl_library(
+    name = "patch_whl_bzl",
+    srcs = ["patch_whl.bzl"],
+    deps = [":parse_whl_name_bzl"],
+)
+
+bzl_library(
+    name = "parse_whl_name_bzl",
+    srcs = ["parse_whl_name.bzl"],
+)
+
+bzl_library(
     name = "py_cc_toolchain_bzl",
     srcs = [
         "py_cc_toolchain_macro.bzl",
@@ -239,13 +250,14 @@
 exports_files(
     [
         "coverage.patch",
+        "repack_whl.py",
+        "py_cc_toolchain_rule.bzl",
         "py_package.bzl",
         "py_wheel.bzl",
         "py_wheel_normalize_pep440.bzl",
         "reexports.bzl",
         "stamp.bzl",
         "util.bzl",
-        "py_cc_toolchain_rule.bzl",
     ],
     visibility = ["//:__subpackages__"],
 )
diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl
index 3630648..166f213 100644
--- a/python/private/bzlmod/pip.bzl
+++ b/python/private/bzlmod/pip.bzl
@@ -26,6 +26,7 @@
 load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
 load("//python/private:full_version.bzl", "full_version")
 load("//python/private:normalize_name.bzl", "normalize_name")
+load("//python/private:parse_whl_name.bzl", "parse_whl_name")
 load("//python/private:version_label.bzl", "version_label")
 load(":pip_repository.bzl", "pip_repository")
 
@@ -78,7 +79,7 @@
             whl_mods = whl_mods,
         )
 
-def _create_whl_repos(module_ctx, pip_attr, whl_map):
+def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
     python_interpreter_target = pip_attr.python_interpreter_target
 
     # if we do not have the python_interpreter set in the attributes
@@ -131,6 +132,10 @@
             repo = pip_name,
             repo_prefix = pip_name + "_",
             annotation = annotation,
+            whl_patches = {
+                p: json.encode(args)
+                for p, args in whl_overrides.get(whl_name, {}).items()
+            },
             python_interpreter = pip_attr.python_interpreter,
             python_interpreter_target = python_interpreter_target,
             quiet = pip_attr.quiet,
@@ -217,6 +222,35 @@
     # Build all of the wheel modifications if the tag class is called.
     _whl_mods_impl(module_ctx)
 
+    _overriden_whl_set = {}
+    whl_overrides = {}
+
+    for module in module_ctx.modules:
+        for attr in module.tags.override:
+            if not module.is_root:
+                fail("overrides are only supported in root modules")
+
+            if not attr.file.endswith(".whl"):
+                fail("Only whl overrides are supported at this time")
+
+            whl_name = normalize_name(parse_whl_name(attr.file).distribution)
+
+            if attr.file in _overriden_whl_set:
+                fail("Duplicate module overrides for '{}'".format(attr.file))
+            _overriden_whl_set[attr.file] = None
+
+            for patch in attr.patches:
+                if whl_name not in whl_overrides:
+                    whl_overrides[whl_name] = {}
+
+                if patch not in whl_overrides[whl_name]:
+                    whl_overrides[whl_name][patch] = struct(
+                        patch_strip = attr.patch_strip,
+                        whls = [],
+                    )
+
+                whl_overrides[whl_name][patch].whls.append(attr.file)
+
     # Used to track all the different pip hubs and the spoke pip Python
     # versions.
     pip_hub_map = {}
@@ -261,7 +295,7 @@
             else:
                 pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version)
 
-            _create_whl_repos(module_ctx, pip_attr, hub_whl_map)
+            _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides)
 
     for hub_name, whl_map in hub_whl_map.items():
         pip_repository(
@@ -381,6 +415,35 @@
     }
     return attrs
 
+# NOTE: the naming of 'override' is taken from the bzlmod native
+# 'archive_override', 'git_override' bzlmod functions.
+_override_tag = tag_class(
+    attrs = {
+        "file": attr.string(
+            doc = """\
+The Python distribution file name which needs to be patched. This will be
+applied to all repositories that setup this distribution via the pip.parse tag
+class.""",
+            mandatory = True,
+        ),
+        "patch_strip": attr.int(
+            default = 0,
+            doc = """\
+The number of leading path segments to be stripped from the file name in the
+patches.""",
+        ),
+        "patches": attr.label_list(
+            doc = """\
+A list of patches to apply to the repository *after* 'whl_library' is extracted
+and BUILD.bazel file is generated.""",
+            mandatory = True,
+        ),
+    },
+    doc = """\
+Apply any overrides (e.g. patches) to a given Python distribution defined by
+other tags in this extension.""",
+)
+
 def _extension_extra_args():
     args = {}
 
@@ -412,6 +475,7 @@
 """,
     implementation = _pip_impl,
     tag_classes = {
+        "override": _override_tag,
         "parse": tag_class(
             attrs = _pip_parse_ext_attrs(),
             doc = """\
diff --git a/python/private/parse_whl_name.bzl b/python/private/parse_whl_name.bzl
new file mode 100644
index 0000000..9c7866e
--- /dev/null
+++ b/python/private/parse_whl_name.bzl
@@ -0,0 +1,72 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+A starlark implementation of a Wheel filename parsing.
+"""
+
+def parse_whl_name(file):
+    """Parse whl file name into a struct of constituents.
+
+    Args:
+        file (str): The file name of a wheel
+
+    Returns:
+        A struct with the following attributes:
+            distribution: the distribution name
+            version: the version of the distribution
+            build_tag: the build tag for the wheel. None if there was no
+              build_tag in the given string.
+            python_tag: the python tag for the wheel
+            abi_tag: the ABI tag for the wheel
+            platform_tag: the platform tag
+    """
+    if not file.endswith(".whl"):
+        fail("not a valid wheel: {}".format(file))
+
+    file = file[:-len(".whl")]
+
+    # Parse the following
+    # {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl
+    #
+    # For more info, see the following standards:
+    # https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
+    # https://packaging.python.org/en/latest/specifications/platform-compatibility-tags/
+    head, _, platform_tag = file.rpartition("-")
+    if not platform_tag:
+        fail("cannot extract platform tag from the whl filename: {}".format(file))
+    head, _, abi_tag = head.rpartition("-")
+    if not abi_tag:
+        fail("cannot extract abi tag from the whl filename: {}".format(file))
+    head, _, python_tag = head.rpartition("-")
+    if not python_tag:
+        fail("cannot extract python tag from the whl filename: {}".format(file))
+    head, _, version = head.rpartition("-")
+    if not version:
+        fail("cannot extract version from the whl filename: {}".format(file))
+    distribution, _, maybe_version = head.partition("-")
+
+    if maybe_version:
+        version, build_tag = maybe_version, version
+    else:
+        build_tag = None
+
+    return struct(
+        distribution = distribution,
+        version = version,
+        build_tag = build_tag,
+        python_tag = python_tag,
+        abi_tag = abi_tag,
+        platform_tag = platform_tag,
+    )
diff --git a/python/private/patch_whl.bzl b/python/private/patch_whl.bzl
new file mode 100644
index 0000000..24b8a0b
--- /dev/null
+++ b/python/private/patch_whl.bzl
@@ -0,0 +1,100 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A small utility to patch a file in the repository context and repackage it using a Python interpreter
+
+Note, because we are patching a wheel file and we need a new RECORD file, this
+function will print a diff of the RECORD and will ask the user to include a
+RECORD patch in their patches that they maintain. This is to ensure that we can
+satisfy the following usecases:
+* Patch an invalid RECORD file.
+* Patch files within a wheel.
+
+If we were silently regenerating the RECORD file, we may be vulnerable to supply chain
+attacks (it is a very small chance) and keeping the RECORD patches next to the
+other patches ensures that the users have overview on exactly what has changed
+within the wheel.
+"""
+
+load("//python/private:parse_whl_name.bzl", "parse_whl_name")
+
+_rules_python_root = Label("//:BUILD.bazel")
+
+def patch_whl(rctx, *, python_interpreter, whl_path, patches, **kwargs):
+    """Patch a whl file and repack it to ensure that the RECORD metadata stays correct.
+
+    Args:
+        rctx: repository_ctx
+        python_interpreter: the python interpreter to use.
+        whl_path: The whl file name to be patched.
+        patches: a label-keyed-int dict that has the patch files as keys and
+            the patch_strip as the value.
+        **kwargs: extras passed to rctx.execute.
+
+    Returns:
+        value of the repackaging action.
+    """
+
+    # extract files into the current directory for patching as rctx.patch
+    # does not support patching in another directory.
+    whl_input = rctx.path(whl_path)
+
+    # symlink to a zip file to use bazel's extract so that we can use bazel's
+    # repository_ctx patch implementation. The whl file may be in a different
+    # external repository.
+    whl_file_zip = whl_input.basename + ".zip"
+    rctx.symlink(whl_input, whl_file_zip)
+    rctx.extract(whl_file_zip)
+    if not rctx.delete(whl_file_zip):
+        fail("Failed to remove the symlink after extracting")
+
+    for patch_file, patch_strip in patches.items():
+        rctx.patch(patch_file, strip = patch_strip)
+
+    # Generate an output filename, which we will be returning
+    parsed_whl = parse_whl_name(whl_input.basename)
+    whl_patched = "{}.whl".format("-".join([
+        parsed_whl.distribution,
+        parsed_whl.version,
+        (parsed_whl.build_tag or "") + "patched",
+        parsed_whl.python_tag,
+        parsed_whl.abi_tag,
+        parsed_whl.platform_tag,
+    ]))
+
+    result = rctx.execute(
+        [
+            python_interpreter,
+            "-m",
+            "python.private.repack_whl",
+            whl_input,
+            whl_patched,
+        ],
+        environment = {
+            "PYTHONPATH": str(rctx.path(_rules_python_root).dirname),
+        },
+        **kwargs
+    )
+
+    if result.return_code:
+        fail(
+            "repackaging .whl {whl} failed: with exit code '{return_code}':\n{stdout}\n\nstderr:\n{stderr}".format(
+                whl = whl_input.basename,
+                stdout = result.stdout,
+                stderr = result.stderr,
+                return_code = result.return_code,
+            ),
+        )
+
+    return rctx.path(whl_patched)
diff --git a/python/private/repack_whl.py b/python/private/repack_whl.py
new file mode 100644
index 0000000..074e30d
--- /dev/null
+++ b/python/private/repack_whl.py
@@ -0,0 +1,175 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Regenerate a whl file after patching and cleanup the patched contents.
+
+This script will take contents of the current directory and create a new wheel
+out of it and will remove all files that were written to the wheel.
+"""
+
+from __future__ import annotations
+
+import argparse
+import difflib
+import logging
+import pathlib
+import sys
+import tempfile
+
+from tools.wheelmaker import _WhlFile
+
+# NOTE: Implement the following matching of what goes into the RECORD
+# https://peps.python.org/pep-0491/#the-dist-info-directory
+_EXCLUDES = [
+    "RECORD",
+    "INSTALLER",
+    "RECORD.jws",
+    "RECORD.p7s",
+    "REQUESTED",
+]
+
+_DISTINFO = "dist-info"
+
+
+def _unidiff_output(expected, actual, record):
+    """
+    Helper function. Returns a string containing the unified diff of two
+    multiline strings.
+    """
+
+    expected = expected.splitlines(1)
+    actual = actual.splitlines(1)
+
+    diff = difflib.unified_diff(
+        expected, actual, fromfile=f"a/{record}", tofile=f"b/{record}"
+    )
+
+    return "".join(diff)
+
+
+def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]:
+    """Check that the RECORD file entries are correct and print a unified diff on failure."""
+
+    # First get existing files by using the RECORD file
+    got_files = []
+    got_distinfos = []
+    for line in want_record.splitlines():
+        rec, _, _ = line.partition(",")
+        path = dir / rec
+
+        if not path.exists():
+            # skip files that do not exist as they won't be present in the final
+            # RECORD file.
+            continue
+
+        if not path.parent.name.endswith(_DISTINFO):
+            got_files.append(path)
+        elif path.name not in _EXCLUDES:
+            got_distinfos.append(path)
+
+    # Then get extra files present in the directory but not in the RECORD file
+    extra_files = []
+    extra_distinfos = []
+    for path in dir.rglob("*"):
+        if path.is_dir():
+            continue
+
+        elif path.parent.name.endswith(_DISTINFO):
+            if path.name in _EXCLUDES:
+                # NOTE: we implement the following matching of what goes into the RECORD
+                # https://peps.python.org/pep-0491/#the-dist-info-directory
+                continue
+            elif path not in got_distinfos:
+                extra_distinfos.append(path)
+
+        elif path not in got_files:
+            extra_files.append(path)
+
+    # sort the extra files for reproducibility
+    extra_files.sort()
+    extra_distinfos.sort()
+
+    # This order ensures that the structure of the RECORD file is always the
+    # same and ensures smaller patchsets to the RECORD file in general
+    return got_files + extra_files + got_distinfos + extra_distinfos
+
+
+def main(sys_argv):
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "whl_path",
+        type=pathlib.Path,
+        help="The original wheel file that we have patched.",
+    )
+    parser.add_argument(
+        "output",
+        type=pathlib.Path,
+        help="The output path that we are going to write a new file to.",
+    )
+    args = parser.parse_args(sys_argv)
+
+    cwd = pathlib.Path.cwd()
+    logging.debug("=" * 80)
+    logging.debug("Repackaging the wheel")
+    logging.debug("=" * 80)
+
+    with tempfile.TemporaryDirectory(dir=cwd) as tmpdir:
+        patched_wheel_dir = cwd / tmpdir
+        logging.debug(f"Created a tmpdir: {patched_wheel_dir}")
+
+        excludes = [args.whl_path, patched_wheel_dir]
+
+        logging.debug("Moving whl contents to the newly created tmpdir")
+        for p in cwd.glob("*"):
+            if p in excludes:
+                logging.debug(f"Ignoring: {p}")
+                continue
+
+            rel_path = p.relative_to(cwd)
+            dst = p.rename(patched_wheel_dir / rel_path)
+            logging.debug(f"mv {p} -> {dst}")
+
+        distinfo_dir = next(iter(patched_wheel_dir.glob("*dist-info")))
+        logging.debug(f"Found dist-info dir: {distinfo_dir}")
+        record_path = distinfo_dir / "RECORD"
+        record_contents = record_path.read_text() if record_path.exists() else ""
+
+        with _WhlFile(args.output, mode="w", distinfo_dir=distinfo_dir) as out:
+            for p in _files_to_pack(patched_wheel_dir, record_contents):
+                rel_path = p.relative_to(patched_wheel_dir)
+                out.add_file(str(rel_path), p)
+
+            logging.debug(f"Writing RECORD file")
+            got_record = out.add_recordfile().decode("utf-8", "surrogateescape")
+
+    if got_record == record_contents:
+        logging.info(f"Created a whl file: {args.output}")
+        return
+
+    record_diff = _unidiff_output(
+        record_contents,
+        got_record,
+        out.distinfo_path("RECORD"),
+    )
+    logging.exception(f"Please also patch the RECORD file with:\n{record_diff}")
+    return 1
+
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        format="%(module)s: %(levelname)s: %(message)s", level=logging.DEBUG
+    )
+
+    sys.exit(main(sys.argv[1:]))
diff --git a/tests/private/parse_whl_name/BUILD.bazel b/tests/private/parse_whl_name/BUILD.bazel
new file mode 100644
index 0000000..c2fb365
--- /dev/null
+++ b/tests/private/parse_whl_name/BUILD.bazel
@@ -0,0 +1,3 @@
+load(":parse_whl_name_tests.bzl", "parse_whl_name_test_suite")
+
+parse_whl_name_test_suite(name = "parse_whl_name_tests")
diff --git a/tests/private/parse_whl_name/parse_whl_name_tests.bzl b/tests/private/parse_whl_name/parse_whl_name_tests.bzl
new file mode 100644
index 0000000..c249f9f
--- /dev/null
+++ b/tests/private/parse_whl_name/parse_whl_name_tests.bzl
@@ -0,0 +1,72 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""
+
+load("@rules_testing//lib:test_suite.bzl", "test_suite")
+load("//python/private:parse_whl_name.bzl", "parse_whl_name")  # buildifier: disable=bzl-visibility
+
+_tests = []
+
+def _test_simple(env):
+    got = parse_whl_name("foo-1.2.3-py3-none-any.whl")
+    env.expect.that_str(got.distribution).equals("foo")
+    env.expect.that_str(got.version).equals("1.2.3")
+    env.expect.that_str(got.abi_tag).equals("none")
+    env.expect.that_str(got.platform_tag).equals("any")
+    env.expect.that_str(got.python_tag).equals("py3")
+    env.expect.that_str(got.build_tag).equals(None)
+
+_tests.append(_test_simple)
+
+def _test_with_build_tag(env):
+    got = parse_whl_name("foo-3.2.1-9999-py2.py3-none-any.whl")
+    env.expect.that_str(got.distribution).equals("foo")
+    env.expect.that_str(got.version).equals("3.2.1")
+    env.expect.that_str(got.abi_tag).equals("none")
+    env.expect.that_str(got.platform_tag).equals("any")
+    env.expect.that_str(got.python_tag).equals("py2.py3")
+    env.expect.that_str(got.build_tag).equals("9999")
+
+_tests.append(_test_with_build_tag)
+
+def _test_multiple_platforms(env):
+    got = parse_whl_name("bar-3.2.1-py3-abi3-manylinux1.manylinux2.whl")
+    env.expect.that_str(got.distribution).equals("bar")
+    env.expect.that_str(got.version).equals("3.2.1")
+    env.expect.that_str(got.abi_tag).equals("abi3")
+    env.expect.that_str(got.platform_tag).equals("manylinux1.manylinux2")
+    env.expect.that_str(got.python_tag).equals("py3")
+    env.expect.that_str(got.build_tag).equals(None)
+
+_tests.append(_test_multiple_platforms)
+
+def _test_real_numpy_wheel(env):
+    got = parse_whl_name("numpy-1.26.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl")
+    env.expect.that_str(got.distribution).equals("numpy")
+    env.expect.that_str(got.version).equals("1.26.1")
+    env.expect.that_str(got.abi_tag).equals("pypy39_pp73")
+    env.expect.that_str(got.platform_tag).equals("macosx_10_9_x86_64")
+    env.expect.that_str(got.python_tag).equals("pp39")
+    env.expect.that_str(got.build_tag).equals(None)
+
+_tests.append(_test_real_numpy_wheel)
+
+def parse_whl_name_test_suite(name):
+    """Create the test suite.
+
+    Args:
+        name: the name of the test suite
+    """
+    test_suite(name = name, basic_tests = _tests)
diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py
index b051564..66e86fb 100644
--- a/tools/wheelmaker.py
+++ b/tools/wheelmaker.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import argparse
 import base64
 import hashlib
@@ -99,14 +101,12 @@
         filename,
         *,
         mode,
-        distinfo_dir,
+        distinfo_dir: str | Path,
         strip_path_prefixes=None,
         compression=zipfile.ZIP_DEFLATED,
         **kwargs,
     ):
-        self._distinfo_dir = distinfo_dir
-        if not self._distinfo_dir.endswith("/"):
-            self._distinfo_dir += "/"
+        self._distinfo_dir: str = Path(distinfo_dir).name
         self._strip_path_prefixes = strip_path_prefixes or []
         # Entries for the RECORD file as (filename, hash, size) tuples.
         self._record = []
@@ -114,7 +114,7 @@
         super().__init__(filename, mode=mode, compression=compression, **kwargs)
 
     def distinfo_path(self, basename):
-        return self._distinfo_dir + basename
+        return f"{self._distinfo_dir}/{basename}"
 
     def add_file(self, package_filename, real_filename):
         """Add given file to the distribution."""
@@ -155,6 +155,7 @@
                     fdst.write(block)
                     hash.update(block)
                     size += len(block)
+
         self._add_to_record(arcname, self._serialize_digest(hash), size)
 
     def add_string(self, filename, contents):