fix(pypi): fix the whl selection algorithm after #2069 (#2078) It seems that a few things broke in recent commits: - We are not using the `MODULE.bazel.lock` file and it seems that it is easy to miss when the components in the PyPI extension stop integrating well together. This happened during the switch to `{abi}_{os}_{plat}` target platform passing within the code. - The logger code stopped working in the extension after the recent additions to add the `rule_name`. - `repo_utils.getenv` was always getting `PATH` env var on bazel `6.x`. This PR fixes both cases and updates docs to serve as a better reminder. By fixing the `select_whls` code and we can just rely on target platform triples (ABI, OS, CPU). This gets one step closer to maybe supporting optional `python_version` which would address #1708. Whilst at it we are also adding different status messages for building the wheel from `sdist` vs just extracting or downloading the wheel. Tests: - Added more unit tests and brought them in line with the rest of the code. - Checked manually for differences between the `MODULE.bazel.lock` files in our `rules_python` extension before #2069 and after this PR and there are no differences except in the `experimental_target_platforms` attribute in `whl_library`. Before this PR you would see that we do not select any wheels for e.g. `MarkupSafe` and we are always building from `sdist`. Work towards #260.

commit: 4a262fae88bad2c4f59c7c60a97360c316e64946 [log] [tgz]
author: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Sat Jul 20 14:04:16 2024 +0900
committer: GitHub <noreply@github.com> Sat Jul 20 05:04:16 2024 +0000
tree: 5a05e726f3032c17a1596013f07a908938a618f6
parent: ae2eb7075fe12585c55d6bd6522cc688d20db94c [diff]
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 95526e4..82e580d 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl

@@ -99,7 +99,7 @@
         )
 
 def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache, exposed_packages):
-    logger = repo_utils.logger(module_ctx)
+    logger = repo_utils.logger(module_ctx, "pypi:create_whl_repos")
     python_interpreter_target = pip_attr.python_interpreter_target
     is_hub_reproducible = True
 
@@ -195,7 +195,6 @@
             logger = logger,
         ),
         get_index_urls = get_index_urls,
-        python_version = major_minor,
         logger = logger,
     )
 

diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index 968c486..0cab1d7 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl

@@ -38,9 +38,7 @@
         requirements_by_platform = {},
         extra_pip_args = [],
         get_index_urls = None,
-        python_version = None,
-        logger = None,
-        fail_fn = fail):
+        logger = None):
     """Get the requirements with platforms that the requirements apply to.
 
     Args:
@@ -53,10 +51,7 @@
         get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
             of the distribution URLs from a PyPI index. Accepts ctx and
             distribution names to query.
-        python_version: str or None. This is needed when the get_index_urls is
-            specified. It should be of the form "3.x.x",
         logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
-        fail_fn (Callable[[str], None]): A failure function used in testing failure cases.
 
     Returns:
         A tuple where the first element a dict of dicts where the first key is
@@ -137,10 +132,6 @@
 
     index_urls = {}
     if get_index_urls:
-        if not python_version:
-            fail_fn("'python_version' must be provided")
-            return None
-
         index_urls = get_index_urls(
             ctx,
             # Use list({}) as a way to have a set
@@ -168,9 +159,8 @@
 
         for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
             whls, sdist = _add_dists(
-                r,
-                index_urls.get(whl_name),
-                python_version = python_version,
+                requirement = r,
+                index_urls = index_urls.get(whl_name),
                 logger = logger,
             )
 
@@ -238,7 +228,7 @@
         repo_utils.get_platforms_cpu_name(ctx),
     )
 
-def _add_dists(requirement, index_urls, python_version, logger = None):
+def _add_dists(*, requirement, index_urls, logger = None):
     """Populate dists based on the information from the PyPI index.
 
     This function will modify the given requirements_by_platform data structure.
@@ -246,7 +236,6 @@
     Args:
         requirement: The result of parse_requirements function.
         index_urls: The result of simpleapi_download.
-        python_version: The version of the python interpreter.
         logger: A logger for printing diagnostic info.
     """
     if not index_urls:
@@ -289,18 +278,6 @@
         ]))
 
     # Filter out the wheels that are incompatible with the target_platforms.
-    whls = select_whls(
-        whls = whls,
-        want_abis = [
-            "none",
-            "abi3",
-            "cp" + python_version.replace(".", ""),
-            # Older python versions have wheels for the `*m` ABI.
-            "cp" + python_version.replace(".", "") + "m",
-        ],
-        want_platforms = requirement.target_platforms,
-        want_python_version = python_version,
-        logger = logger,
-    )
+    whls = select_whls(whls = whls, want_platforms = requirement.target_platforms, logger = logger)
 
     return whls, sdist

diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl
index f453f92..0419926 100644
--- a/python/private/pypi/whl_library.bzl
+++ b/python/private/pypi/whl_library.bzl

@@ -231,9 +231,16 @@
     args = _parse_optional_attrs(rctx, args, extra_pip_args)
 
     if not whl_path:
+        if rctx.attr.urls:
+            op_tmpl = "whl_library.BuildWheelFromSource({name}, {requirement})"
+        elif rctx.attr.download_only:
+            op_tmpl = "whl_library.DownloadWheel({name}, {requirement})"
+        else:
+            op_tmpl = "whl_library.ResolveRequirement({name}, {requirement})"
+
         repo_utils.execute_checked(
             rctx,
-            op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement),
+            op = op_tmpl.format(name = rctx.attr.name, requirement = rctx.attr.requirement),
             arguments = args,
             environment = environment,
             quiet = rctx.attr.quiet,

diff --git a/python/private/pypi/whl_target_platforms.bzl b/python/private/pypi/whl_target_platforms.bzl
index bee7957..bdc44c6 100644
--- a/python/private/pypi/whl_target_platforms.bzl
+++ b/python/private/pypi/whl_target_platforms.bzl

@@ -46,15 +46,13 @@
     "win": "windows",
 }  # buildifier: disable=unsorted-dict-items
 
-def select_whls(*, whls, want_python_version = "3.0", want_abis = [], want_platforms = [], logger = None):
+def select_whls(*, whls, want_platforms = [], logger = None):
     """Select a subset of wheels suitable for target platforms from a list.
 
     Args:
         whls(list[struct]): A list of candidates which have a `filename`
             attribute containing the `whl` filename.
-        want_python_version(str): An optional parameter to filter whls by python version. Defaults to '3.0'.
-        want_abis(list[str]): A list of ABIs that are supported.
-        want_platforms(str): The platforms
+        want_platforms(str): The platforms in "{abi}_{os}_{cpu}" or "{os}_{cpu}" format.
         logger: A logger for printing diagnostic messages.
 
     Returns:
@@ -64,9 +62,34 @@
     if not whls:
         return []
 
-    version_limit = -1
-    if want_python_version:
-        version_limit = int(want_python_version.split(".")[1])
+    want_abis = {
+        "abi3": None,
+        "none": None,
+    }
+
+    _want_platforms = {}
+    version_limit = None
+
+    for p in want_platforms:
+        if not p.startswith("cp3"):
+            fail("expected all platforms to start with ABI, but got: {}".format(p))
+
+        abi, _, os_cpu = p.partition("_")
+        _want_platforms[os_cpu] = None
+        _want_platforms[p] = None
+
+        version_limit_candidate = int(abi[3:])
+        if not version_limit:
+            version_limit = version_limit_candidate
+        if version_limit and version_limit != version_limit_candidate:
+            fail("Only a single python version is supported for now")
+
+        # For some legacy implementations the wheels may target the `cp3xm` ABI
+        _want_platforms["{}m_{}".format(abi, os_cpu)] = None
+        want_abis[abi] = None
+        want_abis[abi + "m"] = None
+
+    want_platforms = sorted(_want_platforms)
 
     candidates = {}
     for whl in whls:
@@ -101,7 +124,7 @@
                 logger.trace(lambda: "Discarding the whl because the whl abi did not match")
             continue
 
-        if version_limit != -1 and whl_version_min > version_limit:
+        if whl_version_min > version_limit:
             if logger:
                 logger.trace(lambda: "Discarding the whl because the whl supported python version is too high")
             continue
@@ -110,7 +133,7 @@
         if parsed.platform_tag == "any":
             compatible = True
         else:
-            for p in whl_target_platforms(parsed.platform_tag):
+            for p in whl_target_platforms(parsed.platform_tag, abi_tag = parsed.abi_tag.strip("m") if parsed.abi_tag.startswith("cp") else None):
                 if p.target_platform in want_platforms:
                     compatible = True
                     break

diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl
index 1893789..3c07027 100644
--- a/python/private/repo_utils.bzl
+++ b/python/private/repo_utils.bzl

@@ -42,22 +42,23 @@
     if _is_repo_debug_enabled(rctx):
         print(message_cb())  # buildifier: disable=print
 
-def _logger(rctx):
+def _logger(ctx, name = None):
     """Creates a logger instance for printing messages.
 
     Args:
-        rctx: repository_ctx object. If the attribute `_rule_name` is
-            present, it will be included in log messages.
+        ctx: repository_ctx or module_ctx object. If the attribute
+            `_rule_name` is present, it will be included in log messages.
+        name: name for the logger. Optional for repository_ctx usage.
 
     Returns:
         A struct with attributes logging: trace, debug, info, warn, fail.
     """
-    if _is_repo_debug_enabled(rctx):
+    if _is_repo_debug_enabled(ctx):
         verbosity_level = "DEBUG"
     else:
         verbosity_level = "WARN"
 
-    env_var_verbosity = rctx.os.environ.get(REPO_VERBOSITY_ENV_VAR)
+    env_var_verbosity = _getenv(ctx, REPO_VERBOSITY_ENV_VAR)
     verbosity_level = env_var_verbosity or verbosity_level
 
     verbosity = {
@@ -66,18 +67,23 @@
         "TRACE": 3,
     }.get(verbosity_level, 0)
 
+    if hasattr(ctx, "attr"):
+        # This is `repository_ctx`.
+        name = name or "{}(@@{})".format(getattr(ctx.attr, "_rule_name", "?"), ctx.name)
+    elif not name:
+        fail("The name has to be specified when using the logger with `module_ctx`")
+
     def _log(enabled_on_verbosity, level, message_cb_or_str):
         if verbosity < enabled_on_verbosity:
             return
-        rule_name = getattr(rctx.attr, "_rule_name", "?")
+
         if type(message_cb_or_str) == "string":
             message = message_cb_or_str
         else:
             message = message_cb_or_str()
 
-        print("\nrules_python:{}(@@{}) {}:".format(
-            rule_name,
-            rctx.name,
+        print("\nrules_python:{} {}:".format(
+            name,
             level.upper(),
         ), message)  # buildifier: disable=print
 
@@ -278,12 +284,9 @@
         path = path,
     )
 
-def _getenv(rctx, name, default = None):
-    # Bazel 7+ API
-    if hasattr(rctx, "getenv"):
-        return rctx.getenv(name, default)
-    else:
-        return rctx.os.environ.get("PATH", default)
+def _getenv(ctx, name, default = None):
+    # Bazel 7+ API has ctx.getenv
+    return getattr(ctx, "getenv", ctx.os.environ.get)(name, default)
 
 def _args_to_str(arguments):
     return " ".join([_arg_repr(a) for a in arguments])
commit	4a262fae88bad2c4f59c7c60a97360c316e64946	[log] [tgz]
author	Ignas Anikevicius <240938+aignas@users.noreply.github.com>	Sat Jul 20 14:04:16 2024 +0900
committer	GitHub <noreply@github.com>	Sat Jul 20 05:04:16 2024 +0000
tree	5a05e726f3032c17a1596013f07a908938a618f6
parent	ae2eb7075fe12585c55d6bd6522cc688d20db94c [diff]