fix(pipstar): fix whl extraction and flip pipstar=true (#3461)

Attempt number 2. This should be smoother this time and should not cause
any breakage because we are not enabling any cross-building by default
and
only the host wheels will be present.

Because we also started extracting using starlark APIs, some extra
fixups
where needed because some wheels require extracting `.data` files into
correct paths. This also adds the `INSTALLER` file after extracting
files to
signify that `pipstar` has installed the file.

Because we have stopped passing hermetic interpreter to the
`whl_library` if
pipstar is enabled, we also needed to ensure that the code path is only
enabled
if the extraction with pipstar is supported (i.e. bazel >= 8).

Fixes #2949

---------

Co-authored-by: Richard Levasseur <richardlev@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69e159c..fcf5634 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -81,6 +81,10 @@
 * (pip) `pipstar` has been enabled for all `whl_library` instances where the whl
   is passed through a label or downloaded using the bazel downloader
   ([#2949](https://github.com/bazel-contrib/rules_python/issues/2949)).
+* (pypi) `pipstar` flag default has been flipped to be on by default.
+  It can be disabled through `RULES_PYTHON_ENABLE_PIPSTAR=0` environment variable.
+  If you do need to disable it, please add a comment to
+  [#2949](https://github.com/bazel-contrib/rules_python/issues/2949).
 * (gazelle deps) rules_go bumped from 0.55.1 to 0.59.0
 * (gazelle deps) gazelle bumped from 0.36.0 to 0.47.0
 
diff --git a/python/private/internal_config_repo.bzl b/python/private/internal_config_repo.bzl
index fc1f8d3..9fc301c 100644
--- a/python/private/internal_config_repo.bzl
+++ b/python/private/internal_config_repo.bzl
@@ -22,7 +22,7 @@
 load(":repo_utils.bzl", "repo_utils")
 
 _ENABLE_PIPSTAR_ENVVAR_NAME = "RULES_PYTHON_ENABLE_PIPSTAR"
-_ENABLE_PIPSTAR_DEFAULT = "0"
+_ENABLE_PIPSTAR_DEFAULT = "1"
 _ENABLE_DEPRECATION_WARNINGS_ENVVAR_NAME = "RULES_PYTHON_DEPRECATION_WARNINGS"
 _ENABLE_DEPRECATION_WARNINGS_DEFAULT = "0"
 
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 3d7985a..e903399 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -70,13 +70,16 @@
 def build_config(
         *,
         module_ctx,
-        enable_pipstar):
+        enable_pipstar,
+        enable_pipstar_extract):
     """Parse 'configure' and 'default' extension tags
 
     Args:
         module_ctx: {type}`module_ctx` module context.
         enable_pipstar: {type}`bool` a flag to enable dropping Python dependency for
             evaluation of the extension.
+        enable_pipstar_extract: {type}`bool | None` a flag to also not pass Python
+            interpreter to `whl_library` when possible.
 
     Returns:
         A struct with the configuration.
@@ -127,6 +130,7 @@
             for name, values in defaults["platforms"].items()
         },
         enable_pipstar = enable_pipstar,
+        enable_pipstar_extract = enable_pipstar_extract,
     )
 
 def parse_modules(
@@ -134,6 +138,7 @@
         _fail = fail,
         simpleapi_download = simpleapi_download,
         enable_pipstar = False,
+        enable_pipstar_extract = False,
         **kwargs):
     """Implementation of parsing the tag classes for the extension and return a struct for registering repositories.
 
@@ -142,6 +147,8 @@
         simpleapi_download: Used for testing overrides
         enable_pipstar: {type}`bool` a flag to enable dropping Python dependency for
             evaluation of the extension.
+        enable_pipstar_extract: {type}`bool` a flag to enable dropping Python dependency for
+            extracting wheels.
         _fail: {type}`function` the failure function, mainly for testing.
         **kwargs: Extra arguments passed to the hub_builder.
 
@@ -179,7 +186,7 @@
                 srcs_exclude_glob = whl_mod.srcs_exclude_glob,
             )
 
-    config = build_config(module_ctx = module_ctx, enable_pipstar = enable_pipstar)
+    config = build_config(module_ctx = module_ctx, enable_pipstar = enable_pipstar, enable_pipstar_extract = enable_pipstar_extract)
 
     # TODO @aignas 2025-06-03: Merge override API with the builder?
     _overriden_whl_set = {}
@@ -362,7 +369,7 @@
         module_ctx: module contents
     """
 
-    mods = parse_modules(module_ctx, enable_pipstar = rp_config.enable_pipstar)
+    mods = parse_modules(module_ctx, enable_pipstar = rp_config.enable_pipstar, enable_pipstar_extract = rp_config.enable_pipstar and rp_config.bazel_8_or_later)
 
     # Build all of the wheel modifications if the tag class is called.
     _whl_mods_impl(mods.whl_mods)
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index 97e0a11..95e007a 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -153,6 +153,7 @@
         module_ctx,
         pip_attr = pip_attr,
         enable_pipstar = self._config.enable_pipstar or self._get_index_urls.get(pip_attr.python_version),
+        enable_pipstar_extract = self._config.enable_pipstar_extract or self._get_index_urls.get(pip_attr.python_version),
     )
 
 ### end of PUBLIC methods
@@ -407,7 +408,8 @@
         module_ctx,
         *,
         pip_attr,
-        enable_pipstar = False):
+        enable_pipstar = False,
+        enable_pipstar_extract = False):
     """create all of the whl repositories
 
     Args:
@@ -415,6 +417,7 @@
         module_ctx: {type}`module_ctx`.
         pip_attr: {type}`struct` - the struct that comes from the tag class iteration.
         enable_pipstar: {type}`bool` - enable the pipstar or not.
+        enable_pipstar_extract: {type}`bool` - enable the pipstar extraction or not.
     """
     logger = self._logger
     platforms = self._platforms[pip_attr.python_version]
@@ -479,6 +482,7 @@
                 is_multiple_versions = whl.is_multiple_versions,
                 interpreter = interpreter,
                 enable_pipstar = enable_pipstar,
+                enable_pipstar_extract = enable_pipstar_extract,
             )
             _add_whl_library(
                 self,
@@ -555,7 +559,8 @@
         python_version,
         use_downloader,
         interpreter,
-        enable_pipstar = False):
+        enable_pipstar = False,
+        enable_pipstar_extract = False):
     args = dict(whl_library_args)
     args["requirement"] = src.requirement_line
     is_whl = src.filename.endswith(".whl")
@@ -567,7 +572,7 @@
         # need to pass the extra args there, so only pop this for whls
         args["extra_pip_args"] = src.extra_pip_args
 
-    if "whl_patches" in args or not (enable_pipstar and is_whl):
+    if "whl_patches" in args or not (enable_pipstar_extract and is_whl):
         if interpreter.path:
             args["python_interpreter"] = interpreter.path
         if interpreter.target:
diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl
index 9f04252..c368dea 100644
--- a/python/private/pypi/whl_library.bzl
+++ b/python/private/pypi/whl_library.bzl
@@ -26,7 +26,7 @@
 load(":patch_whl.bzl", "patch_whl")
 load(":pep508_requirement.bzl", "requirement")
 load(":pypi_repo_utils.bzl", "pypi_repo_utils")
-load(":whl_metadata.bzl", "whl_metadata")
+load(":whl_metadata.bzl", "find_whl_metadata", "whl_metadata")
 load(":whl_target_platforms.bzl", "whl_target_platforms")
 
 _CPPFLAGS = "CPPFLAGS"
@@ -265,6 +265,79 @@
         env[_CPPFLAGS] = " ".join(cppflags)
     return env
 
+def _extract_whl_star(rctx, *, whl_path, logger):
+    install_dir_path = whl_path.dirname.get_child("site-packages")
+    repo_utils.extract(
+        rctx,
+        archive = whl_path,
+        output = install_dir_path,
+        supports_whl_extraction = rp_config.supports_whl_extraction,
+    )
+    metadata_file = find_whl_metadata(
+        install_dir = install_dir_path,
+        logger = logger,
+    )
+
+    # Get the <prefix>.dist_info dir name
+    dist_info_dir = metadata_file.dirname
+    rctx.file(
+        dist_info_dir.get_child("INSTALLER"),
+        "https://github.com/bazel-contrib/rules_python#pipstar",
+    )
+    repo_root_dir = whl_path.dirname
+
+    # Get the <prefix>.dist_info dir name
+    data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
+    if data_dir.exists:
+        for prefix, dest in {
+            # https://docs.python.org/3/library/sysconfig.html#posix-prefix
+            # We are taking this from the legacy whl installer config
+            "data": "data",
+            "headers": "include",
+            "platlib": "site-packages",
+            "purelib": "site-packages",
+            "scripts": "bin",
+        }.items():
+            src = data_dir.get_child(prefix)
+            dest = repo_root_dir.get_child(dest)
+            if src.exists:
+                rctx.rename(src, dest)
+
+            # TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
+            # shebang to be something else, for inspiration look at the hermetic
+            # toolchain wrappers
+
+def _extract_whl_py(rctx, *, python_interpreter, args, whl_path, environment, logger):
+    target_platforms = rctx.attr.experimental_target_platforms or []
+    if target_platforms:
+        parsed_whl = parse_whl_name(whl_path.basename)
+
+        # NOTE @aignas 2023-12-04: if the wheel is a platform specific wheel, we
+        # only include deps for that target platform
+        if parsed_whl.platform_tag != "any":
+            target_platforms = [
+                p.target_platform
+                for p in whl_target_platforms(
+                    platform_tag = parsed_whl.platform_tag,
+                    abi_tag = parsed_whl.abi_tag.strip("tm"),
+                )
+            ]
+
+    pypi_repo_utils.execute_checked(
+        rctx,
+        op = "whl_library.ExtractWheel({}, {})".format(rctx.attr.name, whl_path),
+        python = python_interpreter,
+        arguments = args + [
+            "--whl-file",
+            whl_path,
+        ] + ["--platform={}".format(p) for p in target_platforms],
+        srcs = rctx.attr._python_srcs,
+        environment = environment,
+        quiet = rctx.attr.quiet,
+        timeout = rctx.attr.timeout,
+        logger = logger,
+    )
+
 def _whl_library_impl(rctx):
     logger = repo_utils.logger(rctx)
     python_interpreter = pypi_repo_utils.resolve_python_interpreter(
@@ -327,6 +400,8 @@
 
     # also enable pipstar for any whls that are downloaded without `pip`
     enable_pipstar = (rp_config.enable_pipstar or whl_path) and rctx.attr.config_load
+    enable_pipstar_extract = (rp_config.enable_pipstar and rp_config.bazel_8_or_later) and rctx.attr.config_load
+
     if not whl_path:
         if rctx.attr.urls:
             op_tmpl = "whl_library.BuildWheelFromSource({name}, {requirement})"
@@ -372,19 +447,24 @@
                 timeout = rctx.attr.timeout,
             )
 
+    if enable_pipstar_extract:
+        _extract_whl_star(rctx, whl_path = whl_path, logger = logger)
+    else:
+        _extract_whl_py(
+            rctx,
+            python_interpreter = python_interpreter,
+            args = args,
+            whl_path = whl_path,
+            environment = environment,
+            logger = logger,
+        )
+
     # NOTE @aignas 2025-09-28: if someone has an old vendored file that does not have the
     # dep_template set or the packages is not set either, we should still not break, best to
     # disable pipstar for that particular case.
     #
     # Remove non-pipstar and config_load check when we release rules_python 2.
     if enable_pipstar:
-        repo_utils.extract(
-            rctx,
-            archive = whl_path,
-            output = "site-packages",
-            supports_whl_extraction = rp_config.supports_whl_extraction,
-        )
-
         install_dir_path = whl_path.dirname.get_child("site-packages")
         metadata = whl_metadata(
             install_dir = install_dir_path,
@@ -439,36 +519,6 @@
             extras = requirement(rctx.attr.requirement).extras,
         )
     else:
-        target_platforms = rctx.attr.experimental_target_platforms or []
-        if target_platforms:
-            parsed_whl = parse_whl_name(whl_path.basename)
-
-            # NOTE @aignas 2023-12-04: if the wheel is a platform specific wheel, we
-            # only include deps for that target platform
-            if parsed_whl.platform_tag != "any":
-                target_platforms = [
-                    p.target_platform
-                    for p in whl_target_platforms(
-                        platform_tag = parsed_whl.platform_tag,
-                        abi_tag = parsed_whl.abi_tag.strip("tm"),
-                    )
-                ]
-
-        pypi_repo_utils.execute_checked(
-            rctx,
-            op = "whl_library.ExtractWheel({}, {})".format(rctx.attr.name, whl_path),
-            python = python_interpreter,
-            arguments = args + [
-                "--whl-file",
-                whl_path,
-            ] + ["--platform={}".format(p) for p in target_platforms],
-            srcs = rctx.attr._python_srcs,
-            environment = environment,
-            quiet = rctx.attr.quiet,
-            timeout = rctx.attr.timeout,
-            logger = logger,
-        )
-
         metadata = json.decode(rctx.read("metadata.json"))
         rctx.delete("metadata.json")
 
diff --git a/tests/pypi/extension/extension_tests.bzl b/tests/pypi/extension/extension_tests.bzl
index 924796c..90723c4 100644
--- a/tests/pypi/extension/extension_tests.bzl
+++ b/tests/pypi/extension/extension_tests.bzl
@@ -99,6 +99,7 @@
     return env.expect.that_struct(
         build_config(
             enable_pipstar = enable_pipstar,
+            enable_pipstar_extract = True,
             **kwargs
         ),
         attrs = dict(
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index bf21dca..42c65ae 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -41,6 +41,7 @@
 def hub_builder(
         env,
         enable_pipstar = True,
+        enable_pipstar_extract = True,
         debug = False,
         config = None,
         minor_mapping = {},
@@ -54,6 +55,7 @@
         config = config or struct(
             # no need to evaluate the markers with the interpreter
             enable_pipstar = enable_pipstar,
+            enable_pipstar_extract = enable_pipstar_extract,
             platforms = {
                 "{}_{}{}".format(os, cpu, freethreaded): _plat(
                     name = "{}_{}{}".format(os, cpu, freethreaded),
@@ -512,6 +514,7 @@
         config = struct(
             netrc = None,
             enable_pipstar = True,
+            enable_pipstar_extract = True,
             auth_patterns = {},
             platforms = {
                 "{}_{}".format(os, cpu): _plat(
@@ -1095,6 +1098,7 @@
         enable_pipstar = True,
         config = struct(
             enable_pipstar = True,
+            enable_pipstar_extract = True,
             netrc = None,
             auth_patterns = {},
             platforms = {
@@ -1179,6 +1183,7 @@
         enable_pipstar = True,
         config = struct(
             enable_pipstar = True,
+            enable_pipstar_extract = True,
             netrc = None,
             auth_patterns = {},
             platforms = {