refactor: move PyPI related extension and repository_rule setup to its own dir (#2003)

This is to ensure that future work is easier and its clear where to add
tests.
Now all of the unit tests can be run by just `bazel test
//tests/pypi/...`.

Refactor summary:
- chore: add aignas to CODEOWNERS
- chore: add a new directory for storing PyPI related code
- move pypi_index_sources.bzl to private/pypi
- chore: move parse_requirements_txt to private/pypi
- move parse_whl_name to private/pypi
- move whl_target_platforms to private/pypi
- move parse_requirements to private/pypi
- move pip_repo_name to private/pypi
- remove unused file
- move pip_config_settings to private/pypi
- move pypi_index to pypi/private and rename
- move labels.bzl to private/pypi
- move generate_build_bazel to private/pypi
- move render_pkg_aliases.bzl to private/pypi
- move patch_whl.bzl to private/pypi
- Move envsubst and render_tests to top level of tests
- move pip_install_srcs to private/pypi
- split and move pip_install/pip_repository.bzl to private/pypi
- move the bzlmod extension to private/pypi
diff --git a/python/private/pypi/bzlmod.bzl b/python/private/pypi/bzlmod.bzl
new file mode 100644
index 0000000..e98208a
--- /dev/null
+++ b/python/private/pypi/bzlmod.bzl
@@ -0,0 +1,818 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"pip module extension for use with bzlmod"
+
+load("@bazel_features//:features.bzl", "bazel_features")
+load("@pythons_hub//:interpreters.bzl", "DEFAULT_PYTHON_VERSION", "INTERPRETER_LABELS")
+load("//python/private:auth.bzl", "AUTH_ATTRS")
+load("//python/private:normalize_name.bzl", "normalize_name")
+load("//python/private:repo_utils.bzl", "repo_utils")
+load("//python/private:version_label.bzl", "version_label")
+load(":attrs.bzl", "use_isolated")
+load(":hub_repository.bzl", "hub_repository")
+load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement")
+load(":parse_whl_name.bzl", "parse_whl_name")
+load(":pip_repository_attrs.bzl", "ATTRS")
+load(":render_pkg_aliases.bzl", "whl_alias")
+load(":simpleapi_download.bzl", "simpleapi_download")
+load(":whl_library.bzl", "whl_library")
+load(":whl_repo_name.bzl", "whl_repo_name")
+
+def _parse_version(version):
+    major, _, version = version.partition(".")
+    minor, _, version = version.partition(".")
+    patch, _, version = version.partition(".")
+    build, _, version = version.partition(".")
+
+    return struct(
+        # use semver vocabulary here
+        major = major,
+        minor = minor,
+        patch = patch,  # this is called `micro` in the Python interpreter versioning scheme
+        build = build,
+    )
+
+def _major_minor_version(version):
+    version = _parse_version(version)
+    return "{}.{}".format(version.major, version.minor)
+
+def _whl_mods_impl(mctx):
+    """Implementation of the pip.whl_mods tag class.
+
+    This creates the JSON files used to modify the creation of different wheels.
+"""
+    whl_mods_dict = {}
+    for mod in mctx.modules:
+        for whl_mod_attr in mod.tags.whl_mods:
+            if whl_mod_attr.hub_name not in whl_mods_dict.keys():
+                whl_mods_dict[whl_mod_attr.hub_name] = {whl_mod_attr.whl_name: whl_mod_attr}
+            elif whl_mod_attr.whl_name in whl_mods_dict[whl_mod_attr.hub_name].keys():
+                # We cannot have the same wheel name in the same hub, as we
+                # will create the same JSON file name.
+                fail("""\
+Found same whl_name '{}' in the same hub '{}', please use a different hub_name.""".format(
+                    whl_mod_attr.whl_name,
+                    whl_mod_attr.hub_name,
+                ))
+            else:
+                whl_mods_dict[whl_mod_attr.hub_name][whl_mod_attr.whl_name] = whl_mod_attr
+
+    for hub_name, whl_maps in whl_mods_dict.items():
+        whl_mods = {}
+
+        # create a struct that we can pass to the _whl_mods_repo rule
+        # to create the different JSON files.
+        for whl_name, mods in whl_maps.items():
+            build_content = mods.additive_build_content
+            if mods.additive_build_content_file != None and mods.additive_build_content != "":
+                fail("""\
+You cannot use both the additive_build_content and additive_build_content_file arguments at the same time.
+""")
+            elif mods.additive_build_content_file != None:
+                build_content = mctx.read(mods.additive_build_content_file)
+
+            whl_mods[whl_name] = json.encode(struct(
+                additive_build_content = build_content,
+                copy_files = mods.copy_files,
+                copy_executables = mods.copy_executables,
+                data = mods.data,
+                data_exclude_glob = mods.data_exclude_glob,
+                srcs_exclude_glob = mods.srcs_exclude_glob,
+            ))
+
+        _whl_mods_repo(
+            name = hub_name,
+            whl_mods = whl_mods,
+        )
+
+def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache):
+    logger = repo_utils.logger(module_ctx)
+    python_interpreter_target = pip_attr.python_interpreter_target
+    is_hub_reproducible = True
+
+    # if we do not have the python_interpreter set in the attributes
+    # we programmatically find it.
+    hub_name = pip_attr.hub_name
+    if python_interpreter_target == None and not pip_attr.python_interpreter:
+        python_name = "python_{}_host".format(
+            pip_attr.python_version.replace(".", "_"),
+        )
+        if python_name not in INTERPRETER_LABELS:
+            fail((
+                "Unable to find interpreter for pip hub '{hub_name}' for " +
+                "python_version={version}: Make sure a corresponding " +
+                '`python.toolchain(python_version="{version}")` call exists.' +
+                "Expected to find {python_name} among registered versions:\n  {labels}"
+            ).format(
+                hub_name = hub_name,
+                version = pip_attr.python_version,
+                python_name = python_name,
+                labels = "  \n".join(INTERPRETER_LABELS),
+            ))
+        python_interpreter_target = INTERPRETER_LABELS[python_name]
+
+    pip_name = "{}_{}".format(
+        hub_name,
+        version_label(pip_attr.python_version),
+    )
+    major_minor = _major_minor_version(pip_attr.python_version)
+
+    if hub_name not in whl_map:
+        whl_map[hub_name] = {}
+
+    whl_modifications = {}
+    if pip_attr.whl_modifications != None:
+        for mod, whl_name in pip_attr.whl_modifications.items():
+            whl_modifications[whl_name] = mod
+
+    if pip_attr.experimental_requirement_cycles:
+        requirement_cycles = {
+            name: [normalize_name(whl_name) for whl_name in whls]
+            for name, whls in pip_attr.experimental_requirement_cycles.items()
+        }
+
+        whl_group_mapping = {
+            whl_name: group_name
+            for group_name, group_whls in requirement_cycles.items()
+            for whl_name in group_whls
+        }
+
+        # TODO @aignas 2024-04-05: how do we support different requirement
+        # cycles for different abis/oses? For now we will need the users to
+        # assume the same groups across all versions/platforms until we start
+        # using an alternative cycle resolution strategy.
+        group_map[hub_name] = pip_attr.experimental_requirement_cycles
+    else:
+        whl_group_mapping = {}
+        requirement_cycles = {}
+
+    # Create a new wheel library for each of the different whls
+
+    get_index_urls = None
+    if pip_attr.experimental_index_url:
+        if pip_attr.download_only:
+            fail("Currently unsupported to use `download_only` and `experimental_index_url`")
+
+        get_index_urls = lambda ctx, distributions: simpleapi_download(
+            ctx,
+            attr = struct(
+                index_url = pip_attr.experimental_index_url,
+                extra_index_urls = pip_attr.experimental_extra_index_urls or [],
+                index_url_overrides = pip_attr.experimental_index_url_overrides or {},
+                sources = distributions,
+                envsubst = pip_attr.envsubst,
+                # Auth related info
+                netrc = pip_attr.netrc,
+                auth_patterns = pip_attr.auth_patterns,
+            ),
+            cache = simpleapi_cache,
+            parallel_download = pip_attr.parallel_download,
+        )
+
+    requirements_by_platform = parse_requirements(
+        module_ctx,
+        requirements_by_platform = pip_attr.requirements_by_platform,
+        requirements_linux = pip_attr.requirements_linux,
+        requirements_lock = pip_attr.requirements_lock,
+        requirements_osx = pip_attr.requirements_darwin,
+        requirements_windows = pip_attr.requirements_windows,
+        extra_pip_args = pip_attr.extra_pip_args,
+        get_index_urls = get_index_urls,
+        python_version = major_minor,
+        logger = logger,
+    )
+
+    repository_platform = host_platform(module_ctx.os)
+    for whl_name, requirements in requirements_by_platform.items():
+        # We are not using the "sanitized name" because the user
+        # would need to guess what name we modified the whl name
+        # to.
+        annotation = whl_modifications.get(whl_name)
+        whl_name = normalize_name(whl_name)
+
+        group_name = whl_group_mapping.get(whl_name)
+        group_deps = requirement_cycles.get(group_name, [])
+
+        # Construct args separately so that the lock file can be smaller and does not include unused
+        # attrs.
+        whl_library_args = dict(
+            repo = pip_name,
+            dep_template = "@{}//{{name}}:{{target}}".format(hub_name),
+        )
+        maybe_args = dict(
+            # The following values are safe to omit if they have false like values
+            annotation = annotation,
+            download_only = pip_attr.download_only,
+            enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs,
+            environment = pip_attr.environment,
+            envsubst = pip_attr.envsubst,
+            experimental_target_platforms = pip_attr.experimental_target_platforms,
+            group_deps = group_deps,
+            group_name = group_name,
+            pip_data_exclude = pip_attr.pip_data_exclude,
+            python_interpreter = pip_attr.python_interpreter,
+            python_interpreter_target = python_interpreter_target,
+            whl_patches = {
+                p: json.encode(args)
+                for p, args in whl_overrides.get(whl_name, {}).items()
+            },
+        )
+        whl_library_args.update({k: v for k, v in maybe_args.items() if v})
+        maybe_args_with_default = dict(
+            # The following values have defaults next to them
+            isolated = (use_isolated(module_ctx, pip_attr), True),
+            quiet = (pip_attr.quiet, True),
+            timeout = (pip_attr.timeout, 600),
+        )
+        whl_library_args.update({
+            k: v
+            for k, (v, default) in maybe_args_with_default.items()
+            if v != default
+        })
+
+        if get_index_urls:
+            # TODO @aignas 2024-05-26: move to a separate function
+            found_something = False
+            for requirement in requirements:
+                for distribution in requirement.whls + [requirement.sdist]:
+                    if not distribution:
+                        # sdist may be None
+                        continue
+
+                    found_something = True
+                    is_hub_reproducible = False
+
+                    if pip_attr.netrc:
+                        whl_library_args["netrc"] = pip_attr.netrc
+                    if pip_attr.auth_patterns:
+                        whl_library_args["auth_patterns"] = pip_attr.auth_patterns
+
+                    # pip is not used to download wheels and the python `whl_library` helpers are only extracting things
+                    whl_library_args.pop("extra_pip_args", None)
+
+                    # This is no-op because pip is not used to download the wheel.
+                    whl_library_args.pop("download_only", None)
+
+                    repo_name = whl_repo_name(pip_name, distribution.filename, distribution.sha256)
+                    whl_library_args["requirement"] = requirement.srcs.requirement
+                    whl_library_args["urls"] = [distribution.url]
+                    whl_library_args["sha256"] = distribution.sha256
+                    whl_library_args["filename"] = distribution.filename
+                    whl_library_args["experimental_target_platforms"] = requirement.target_platforms
+
+                    # Pure python wheels or sdists may need to have a platform here
+                    target_platforms = None
+                    if distribution.filename.endswith("-any.whl") or not distribution.filename.endswith(".whl"):
+                        if len(requirements) > 1:
+                            target_platforms = requirement.target_platforms
+
+                    whl_library(name = repo_name, **dict(sorted(whl_library_args.items())))
+
+                    whl_map[hub_name].setdefault(whl_name, []).append(
+                        whl_alias(
+                            repo = repo_name,
+                            version = major_minor,
+                            filename = distribution.filename,
+                            target_platforms = target_platforms,
+                        ),
+                    )
+
+            if found_something:
+                continue
+
+        requirement = select_requirement(
+            requirements,
+            platform = repository_platform,
+        )
+        if not requirement:
+            # Sometimes the package is not present for host platform if there
+            # are whls specified only in particular requirements files, in that
+            # case just continue, however, if the download_only flag is set up,
+            # then the user can also specify the target platform of the wheel
+            # packages they want to download, in that case there will be always
+            # a requirement here, so we will not be in this code branch.
+            continue
+        elif get_index_urls:
+            logger.warn(lambda: "falling back to pip for installing the right file for {}".format(requirement.requirement_line))
+
+        whl_library_args["requirement"] = requirement.requirement_line
+        if requirement.extra_pip_args:
+            whl_library_args["extra_pip_args"] = requirement.extra_pip_args
+
+        # We sort so that the lock-file remains the same no matter the order of how the
+        # args are manipulated in the code going before.
+        repo_name = "{}_{}".format(pip_name, whl_name)
+        whl_library(name = repo_name, **dict(sorted(whl_library_args.items())))
+        whl_map[hub_name].setdefault(whl_name, []).append(
+            whl_alias(
+                repo = repo_name,
+                version = major_minor,
+            ),
+        )
+
+    return is_hub_reproducible
+
+def _pip_impl(module_ctx):
+    """Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.
+
+    This implementation iterates through all of the `pip.parse` calls and creates
+    different pip hub repositories based on the "hub_name".  Each of the
+    pip calls create spoke repos that uses a specific Python interpreter.
+
+    In a MODULES.bazel file we have:
+
+    pip.parse(
+        hub_name = "pip",
+        python_version = 3.9,
+        requirements_lock = "//:requirements_lock_3_9.txt",
+        requirements_windows = "//:requirements_windows_3_9.txt",
+    )
+    pip.parse(
+        hub_name = "pip",
+        python_version = 3.10,
+        requirements_lock = "//:requirements_lock_3_10.txt",
+        requirements_windows = "//:requirements_windows_3_10.txt",
+    )
+
+    For instance, we have a hub with the name of "pip".
+    A repository named the following is created. It is actually called last when
+    all of the pip spokes are collected.
+
+    - @@rules_python~override~pip~pip
+
+    As shown in the example code above we have the following.
+    Two different pip.parse statements exist in MODULE.bazel provide the hub_name "pip".
+    These definitions create two different pip spoke repositories that are
+    related to the hub "pip".
+    One spoke uses Python 3.9 and the other uses Python 3.10. This code automatically
+    determines the Python version and the interpreter.
+    Both of these pip spokes contain requirements files that includes websocket
+    and its dependencies.
+
+    We also need repositories for the wheels that the different pip spokes contain.
+    For each Python version a different wheel repository is created. In our example
+    each pip spoke had a requirements file that contained websockets. We
+    then create two different wheel repositories that are named the following.
+
+    - @@rules_python~override~pip~pip_39_websockets
+    - @@rules_python~override~pip~pip_310_websockets
+
+    And if the wheel has any other dependencies subsequent wheels are created in the same fashion.
+
+    The hub repository has aliases for `pkg`, `data`, etc, which have a select that resolves to
+    a spoke repository depending on the Python version.
+
+    Also we may have more than one hub as defined in a MODULES.bazel file.  So we could have multiple
+    hubs pointing to various different pip spokes.
+
+    Some other business rules notes. A hub can only have one spoke per Python version.  We cannot
+    have a hub named "pip" that has two spokes that use the Python 3.9 interpreter.  Second
+    we cannot have the same hub name used in sub-modules.  The hub name has to be globally
+    unique.
+
+    This implementation also handles the creation of whl_modification JSON files that are used
+    during the creation of wheel libraries. These JSON files used via the annotations argument
+    when calling wheel_installer.py.
+
+    Args:
+        module_ctx: module contents
+    """
+
+    # Build all of the wheel modifications if the tag class is called.
+    _whl_mods_impl(module_ctx)
+
+    _overriden_whl_set = {}
+    whl_overrides = {}
+
+    for module in module_ctx.modules:
+        for attr in module.tags.override:
+            if not module.is_root:
+                fail("overrides are only supported in root modules")
+
+            if not attr.file.endswith(".whl"):
+                fail("Only whl overrides are supported at this time")
+
+            whl_name = normalize_name(parse_whl_name(attr.file).distribution)
+
+            if attr.file in _overriden_whl_set:
+                fail("Duplicate module overrides for '{}'".format(attr.file))
+            _overriden_whl_set[attr.file] = None
+
+            for patch in attr.patches:
+                if whl_name not in whl_overrides:
+                    whl_overrides[whl_name] = {}
+
+                if patch not in whl_overrides[whl_name]:
+                    whl_overrides[whl_name][patch] = struct(
+                        patch_strip = attr.patch_strip,
+                        whls = [],
+                    )
+
+                whl_overrides[whl_name][patch].whls.append(attr.file)
+
+    # Used to track all the different pip hubs and the spoke pip Python
+    # versions.
+    pip_hub_map = {}
+
+    # Keeps track of all the hub's whl repos across the different versions.
+    # dict[hub, dict[whl, dict[version, str pip]]]
+    # Where hub, whl, and pip are the repo names
+    hub_whl_map = {}
+    hub_group_map = {}
+
+    simpleapi_cache = {}
+    is_extension_reproducible = True
+
+    for mod in module_ctx.modules:
+        for pip_attr in mod.tags.parse:
+            hub_name = pip_attr.hub_name
+            if hub_name not in pip_hub_map:
+                pip_hub_map[pip_attr.hub_name] = struct(
+                    module_name = mod.name,
+                    python_versions = [pip_attr.python_version],
+                )
+            elif pip_hub_map[hub_name].module_name != mod.name:
+                # We cannot have two hubs with the same name in different
+                # modules.
+                fail((
+                    "Duplicate cross-module pip hub named '{hub}': pip hub " +
+                    "names must be unique across modules. First defined " +
+                    "by module '{first_module}', second attempted by " +
+                    "module '{second_module}'"
+                ).format(
+                    hub = hub_name,
+                    first_module = pip_hub_map[hub_name].module_name,
+                    second_module = mod.name,
+                ))
+
+            elif pip_attr.python_version in pip_hub_map[hub_name].python_versions:
+                fail((
+                    "Duplicate pip python version '{version}' for hub " +
+                    "'{hub}' in module '{module}': the Python versions " +
+                    "used for a hub must be unique"
+                ).format(
+                    hub = hub_name,
+                    module = mod.name,
+                    version = pip_attr.python_version,
+                ))
+            else:
+                pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version)
+
+            is_hub_reproducible = _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides, hub_group_map, simpleapi_cache)
+            is_extension_reproducible = is_extension_reproducible and is_hub_reproducible
+
+    for hub_name, whl_map in hub_whl_map.items():
+        hub_repository(
+            name = hub_name,
+            repo_name = hub_name,
+            whl_map = {
+                key: json.encode(value)
+                for key, value in whl_map.items()
+            },
+            default_version = _major_minor_version(DEFAULT_PYTHON_VERSION),
+            groups = hub_group_map.get(hub_name),
+        )
+
+    if bazel_features.external_deps.extension_metadata_has_reproducible:
+        # If we are not using the `experimental_index_url feature, the extension is fully
+        # deterministic and we don't need to create a lock entry for it.
+        #
+        # In order to be able to dogfood the `experimental_index_url` feature before it gets
+        # stabilized, we have created the `_pip_non_reproducible` function, that will result
+        # in extra entries in the lock file.
+        return module_ctx.extension_metadata(reproducible = is_extension_reproducible)
+    else:
+        return None
+
+def _pip_non_reproducible(module_ctx):
+    _pip_impl(module_ctx)
+
+    # We default to calling the PyPI index and that will go into the
+    # MODULE.bazel.lock file, hence return nothing here.
+    return None
+
+def _pip_parse_ext_attrs(**kwargs):
+    """Get the attributes for the pip extension.
+
+    Args:
+        **kwargs: A kwarg for setting defaults for the specific attributes. The
+        key is expected to be the same as the attribute key.
+
+    Returns:
+        A dict of attributes.
+    """
+    attrs = dict({
+        "experimental_extra_index_urls": attr.string_list(
+            doc = """\
+The extra index URLs to use for downloading wheels using bazel downloader.
+Each value is going to be subject to `envsubst` substitutions if necessary.
+
+The indexes must support Simple API as described here:
+https://packaging.python.org/en/latest/specifications/simple-repository-api/
+
+This is equivalent to `--extra-index-urls` `pip` option.
+""",
+            default = [],
+        ),
+        "experimental_index_url": attr.string(
+            default = kwargs.get("experimental_index_url", ""),
+            doc = """\
+The index URL to use for downloading wheels using bazel downloader. This value is going
+to be subject to `envsubst` substitutions if necessary.
+
+The indexes must support Simple API as described here:
+https://packaging.python.org/en/latest/specifications/simple-repository-api/
+
+In the future this could be defaulted to `https://pypi.org` when this feature becomes
+stable.
+
+This is equivalent to `--index-url` `pip` option.
+""",
+        ),
+        "experimental_index_url_overrides": attr.string_dict(
+            doc = """\
+The index URL overrides for each package to use for downloading wheels using
+bazel downloader. This value is going to be subject to `envsubst` substitutions
+if necessary.
+
+The key is the package name (will be normalized before usage) and the value is the
+index URL.
+
+This design pattern has been chosen in order to be fully deterministic about which
+packages come from which source. We want to avoid issues similar to what happened in
+https://pytorch.org/blog/compromised-nightly-dependency/.
+
+The indexes must support Simple API as described here:
+https://packaging.python.org/en/latest/specifications/simple-repository-api/
+""",
+        ),
+        "hub_name": attr.string(
+            mandatory = True,
+            doc = """
+The name of the repo pip dependencies will be accessible from.
+
+This name must be unique between modules; unless your module is guaranteed to
+always be the root module, it's highly recommended to include your module name
+in the hub name. Repo mapping, `use_repo(..., pip="my_modules_pip_deps")`, can
+be used for shorter local names within your module.
+
+Within a module, the same `hub_name` can be specified to group different Python
+versions of pip dependencies under one repository name. This allows using a
+Python version-agnostic name when referring to pip dependencies; the
+correct version will be automatically selected.
+
+Typically, a module will only have a single hub of pip dependencies, but this
+is not required. Each hub is a separate resolution of pip dependencies. This
+means if different programs need different versions of some library, separate
+hubs can be created, and each program can use its respective hub's targets.
+Targets from different hubs should not be used together.
+""",
+        ),
+        "parallel_download": attr.bool(
+            doc = """\
+The flag allows to make use of parallel downloading feature in bazel 7.1 and above
+when the bazel downloader is used. This is by default enabled as it improves the
+performance by a lot, but in case the queries to the simple API are very expensive
+or when debugging authentication issues one may want to disable this feature.
+
+NOTE, This will download (potentially duplicate) data for multiple packages if
+there is more than one index available, but in general this should be negligible
+because the simple API calls are very cheap and the user should not notice any
+extra overhead.
+
+If we are in synchronous mode, then we will use the first result that we
+find in case extra indexes are specified.
+""",
+            default = True,
+        ),
+        "python_version": attr.string(
+            mandatory = True,
+            doc = """
+The Python version the dependencies are targetting, in Major.Minor format
+(e.g., "3.11") or patch level granularity (e.g. "3.11.1").
+
+If an interpreter isn't explicitly provided (using `python_interpreter` or
+`python_interpreter_target`), then the version specified here must have
+a corresponding `python.toolchain()` configured.
+""",
+        ),
+        "whl_modifications": attr.label_keyed_string_dict(
+            mandatory = False,
+            doc = """\
+A dict of labels to wheel names that is typically generated by the whl_modifications.
+The labels are JSON config files describing the modifications.
+""",
+        ),
+    }, **ATTRS)
+    attrs.update(AUTH_ATTRS)
+
+    return attrs
+
+def _whl_mod_attrs():
+    attrs = {
+        "additive_build_content": attr.string(
+            doc = "(str, optional): Raw text to add to the generated `BUILD` file of a package.",
+        ),
+        "additive_build_content_file": attr.label(
+            doc = """\
+(label, optional): path to a BUILD file to add to the generated
+`BUILD` file of a package. You cannot use both additive_build_content and additive_build_content_file
+arguments at the same time.""",
+        ),
+        "copy_executables": attr.string_dict(
+            doc = """\
+(dict, optional): A mapping of `src` and `out` files for
+[@bazel_skylib//rules:copy_file.bzl][cf]. Targets generated here will also be flagged as
+executable.""",
+        ),
+        "copy_files": attr.string_dict(
+            doc = """\
+(dict, optional): A mapping of `src` and `out` files for
+[@bazel_skylib//rules:copy_file.bzl][cf]""",
+        ),
+        "data": attr.string_list(
+            doc = """\
+(list, optional): A list of labels to add as `data` dependencies to
+the generated `py_library` target.""",
+        ),
+        "data_exclude_glob": attr.string_list(
+            doc = """\
+(list, optional): A list of exclude glob patterns to add as `data` to
+the generated `py_library` target.""",
+        ),
+        "hub_name": attr.string(
+            doc = """\
+Name of the whl modification, hub we use this name to set the modifications for
+pip.parse. If you have different pip hubs you can use a different name,
+otherwise it is best practice to just use one.
+
+You cannot have the same `hub_name` in different modules.  You can reuse the same
+name in the same module for different wheels that you put in the same hub, but you
+cannot have a child module that uses the same `hub_name`.
+""",
+            mandatory = True,
+        ),
+        "srcs_exclude_glob": attr.string_list(
+            doc = """\
+(list, optional): A list of labels to add as `srcs` to the generated
+`py_library` target.""",
+        ),
+        "whl_name": attr.string(
+            doc = "The whl name that the modifications are used for.",
+            mandatory = True,
+        ),
+    }
+    return attrs
+
+# NOTE: the naming of 'override' is taken from the bzlmod native
+# 'archive_override', 'git_override' bzlmod functions.
+_override_tag = tag_class(
+    attrs = {
+        "file": attr.string(
+            doc = """\
+The Python distribution file name which needs to be patched. This will be
+applied to all repositories that setup this distribution via the pip.parse tag
+class.""",
+            mandatory = True,
+        ),
+        "patch_strip": attr.int(
+            default = 0,
+            doc = """\
+The number of leading path segments to be stripped from the file name in the
+patches.""",
+        ),
+        "patches": attr.label_list(
+            doc = """\
+A list of patches to apply to the repository *after* 'whl_library' is extracted
+and BUILD.bazel file is generated.""",
+            mandatory = True,
+        ),
+    },
+    doc = """\
+Apply any overrides (e.g. patches) to a given Python distribution defined by
+other tags in this extension.""",
+)
+
+pypi = module_extension(
+    doc = """\
+This extension is used to make dependencies from pip available.
+
+pip.parse:
+To use, call `pip.parse()` and specify `hub_name` and your requirements file.
+Dependencies will be downloaded and made available in a repo named after the
+`hub_name` argument.
+
+Each `pip.parse()` call configures a particular Python version. Multiple calls
+can be made to configure different Python versions, and will be grouped by
+the `hub_name` argument. This allows the same logical name, e.g. `@pip//numpy`
+to automatically resolve to different, Python version-specific, libraries.
+
+pip.whl_mods:
+This tag class is used to help create JSON files to describe modifications to
+the BUILD files for wheels.
+""",
+    implementation = _pip_impl,
+    tag_classes = {
+        "override": _override_tag,
+        "parse": tag_class(
+            attrs = _pip_parse_ext_attrs(),
+            doc = """\
+This tag class is used to create a pip hub and all of the spokes that are part of that hub.
+This tag class reuses most of the pip attributes that are found in
+@rules_python//python/pip_install:pip_repository.bzl.
+The exception is it does not use the arg 'repo_prefix'.  We set the repository
+prefix for the user and the alias arg is always True in bzlmod.
+""",
+        ),
+        "whl_mods": tag_class(
+            attrs = _whl_mod_attrs(),
+            doc = """\
+This tag class is used to create JSON file that are used when calling wheel_builder.py.  These
+JSON files contain instructions on how to modify a wheel's project.  Each of the attributes
+create different modifications based on the type of attribute. Previously to bzlmod these
+JSON files where referred to as annotations, and were renamed to whl_modifications in this
+extension.
+""",
+        ),
+    },
+)
+
+pypi_internal = module_extension(
+    doc = """\
+This extension is used to make dependencies from pypi available.
+
+For now this is intended to be used internally so that usage of the `pip`
+extension in `rules_python` does not affect the evaluations of the extension
+for the consumers.
+
+pip.parse:
+To use, call `pip.parse()` and specify `hub_name` and your requirements file.
+Dependencies will be downloaded and made available in a repo named after the
+`hub_name` argument.
+
+Each `pip.parse()` call configures a particular Python version. Multiple calls
+can be made to configure different Python versions, and will be grouped by
+the `hub_name` argument. This allows the same logical name, e.g. `@pypi//numpy`
+to automatically resolve to different, Python version-specific, libraries.
+
+pip.whl_mods:
+This tag class is used to help create JSON files to describe modifications to
+the BUILD files for wheels.
+""",
+    implementation = _pip_non_reproducible,
+    tag_classes = {
+        "override": _override_tag,
+        "parse": tag_class(
+            attrs = _pip_parse_ext_attrs(
+                experimental_index_url = "https://pypi.org/simple",
+            ),
+            doc = """\
+This tag class is used to create a pypi hub and all of the spokes that are part of that hub.
+This tag class reuses most of the pypi attributes that are found in
+@rules_python//python/pip_install:pip_repository.bzl.
+The exception is it does not use the arg 'repo_prefix'.  We set the repository
+prefix for the user and the alias arg is always True in bzlmod.
+""",
+        ),
+        "whl_mods": tag_class(
+            attrs = _whl_mod_attrs(),
+            doc = """\
+This tag class is used to create JSON file that are used when calling wheel_builder.py.  These
+JSON files contain instructions on how to modify a wheel's project.  Each of the attributes
+create different modifications based on the type of attribute. Previously to bzlmod these
+JSON files where referred to as annotations, and were renamed to whl_modifications in this
+extension.
+""",
+        ),
+    },
+)
+
+def _whl_mods_repo_impl(rctx):
+    rctx.file("BUILD.bazel", "")
+    for whl_name, mods in rctx.attr.whl_mods.items():
+        rctx.file("{}.json".format(whl_name), mods)
+
+_whl_mods_repo = repository_rule(
+    doc = """\
+This rule creates json files based on the whl_mods attribute.
+""",
+    implementation = _whl_mods_repo_impl,
+    attrs = {
+        "whl_mods": attr.string_dict(
+            mandatory = True,
+            doc = "JSON endcoded string that is provided to wheel_builder.py",
+        ),
+    },
+)