fix(pypi): fix the whl selection algorithm after #2069 (#2078)
It seems that a few things broke in recent commits:
- We are not using the `MODULE.bazel.lock` file and it seems that it is
easy to
miss when the components in the PyPI extension stop integrating well
together. This happened during the switch to `{abi}_{os}_{plat}` target
platform passing within the code.
- The logger code stopped working in the extension after the recent
additions
to add the `rule_name`.
- `repo_utils.getenv` was always getting `PATH` env var on bazel `6.x`.
This PR fixes both cases and updates docs to serve as a better reminder.
By
fixing the `select_whls` code and we can just rely on target platform
triples
(ABI, OS, CPU). This gets one step closer to maybe supporting optional
`python_version` which would address #1708. Whilst at it we are also
adding
different status messages for building the wheel from `sdist` vs just
extracting or downloading the wheel.
Tests:
- Added more unit tests and brought them in line with the rest of the
code.
- Checked manually for differences between the `MODULE.bazel.lock` files
in our
`rules_python` extension before #2069 and after this PR and there are no
differences except in the `experimental_target_platforms` attribute in
`whl_library`. Before this PR you would see that we do not select any
wheels
for e.g. `MarkupSafe` and we are always building from `sdist`.
Work towards #260.
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 95526e4..82e580d 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -99,7 +99,7 @@
)
def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache, exposed_packages):
- logger = repo_utils.logger(module_ctx)
+ logger = repo_utils.logger(module_ctx, "pypi:create_whl_repos")
python_interpreter_target = pip_attr.python_interpreter_target
is_hub_reproducible = True
@@ -195,7 +195,6 @@
logger = logger,
),
get_index_urls = get_index_urls,
- python_version = major_minor,
logger = logger,
)
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index 968c486..0cab1d7 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -38,9 +38,7 @@
requirements_by_platform = {},
extra_pip_args = [],
get_index_urls = None,
- python_version = None,
- logger = None,
- fail_fn = fail):
+ logger = None):
"""Get the requirements with platforms that the requirements apply to.
Args:
@@ -53,10 +51,7 @@
get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
of the distribution URLs from a PyPI index. Accepts ctx and
distribution names to query.
- python_version: str or None. This is needed when the get_index_urls is
- specified. It should be of the form "3.x.x",
logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
- fail_fn (Callable[[str], None]): A failure function used in testing failure cases.
Returns:
A tuple where the first element a dict of dicts where the first key is
@@ -137,10 +132,6 @@
index_urls = {}
if get_index_urls:
- if not python_version:
- fail_fn("'python_version' must be provided")
- return None
-
index_urls = get_index_urls(
ctx,
# Use list({}) as a way to have a set
@@ -168,9 +159,8 @@
for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
whls, sdist = _add_dists(
- r,
- index_urls.get(whl_name),
- python_version = python_version,
+ requirement = r,
+ index_urls = index_urls.get(whl_name),
logger = logger,
)
@@ -238,7 +228,7 @@
repo_utils.get_platforms_cpu_name(ctx),
)
-def _add_dists(requirement, index_urls, python_version, logger = None):
+def _add_dists(*, requirement, index_urls, logger = None):
"""Populate dists based on the information from the PyPI index.
This function will modify the given requirements_by_platform data structure.
@@ -246,7 +236,6 @@
Args:
requirement: The result of parse_requirements function.
index_urls: The result of simpleapi_download.
- python_version: The version of the python interpreter.
logger: A logger for printing diagnostic info.
"""
if not index_urls:
@@ -289,18 +278,6 @@
]))
# Filter out the wheels that are incompatible with the target_platforms.
- whls = select_whls(
- whls = whls,
- want_abis = [
- "none",
- "abi3",
- "cp" + python_version.replace(".", ""),
- # Older python versions have wheels for the `*m` ABI.
- "cp" + python_version.replace(".", "") + "m",
- ],
- want_platforms = requirement.target_platforms,
- want_python_version = python_version,
- logger = logger,
- )
+ whls = select_whls(whls = whls, want_platforms = requirement.target_platforms, logger = logger)
return whls, sdist
diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl
index f453f92..0419926 100644
--- a/python/private/pypi/whl_library.bzl
+++ b/python/private/pypi/whl_library.bzl
@@ -231,9 +231,16 @@
args = _parse_optional_attrs(rctx, args, extra_pip_args)
if not whl_path:
+ if rctx.attr.urls:
+ op_tmpl = "whl_library.BuildWheelFromSource({name}, {requirement})"
+ elif rctx.attr.download_only:
+ op_tmpl = "whl_library.DownloadWheel({name}, {requirement})"
+ else:
+ op_tmpl = "whl_library.ResolveRequirement({name}, {requirement})"
+
repo_utils.execute_checked(
rctx,
- op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement),
+ op = op_tmpl.format(name = rctx.attr.name, requirement = rctx.attr.requirement),
arguments = args,
environment = environment,
quiet = rctx.attr.quiet,
diff --git a/python/private/pypi/whl_target_platforms.bzl b/python/private/pypi/whl_target_platforms.bzl
index bee7957..bdc44c6 100644
--- a/python/private/pypi/whl_target_platforms.bzl
+++ b/python/private/pypi/whl_target_platforms.bzl
@@ -46,15 +46,13 @@
"win": "windows",
} # buildifier: disable=unsorted-dict-items
-def select_whls(*, whls, want_python_version = "3.0", want_abis = [], want_platforms = [], logger = None):
+def select_whls(*, whls, want_platforms = [], logger = None):
"""Select a subset of wheels suitable for target platforms from a list.
Args:
whls(list[struct]): A list of candidates which have a `filename`
attribute containing the `whl` filename.
- want_python_version(str): An optional parameter to filter whls by python version. Defaults to '3.0'.
- want_abis(list[str]): A list of ABIs that are supported.
- want_platforms(str): The platforms
+ want_platforms(str): The platforms in "{abi}_{os}_{cpu}" or "{os}_{cpu}" format.
logger: A logger for printing diagnostic messages.
Returns:
@@ -64,9 +62,34 @@
if not whls:
return []
- version_limit = -1
- if want_python_version:
- version_limit = int(want_python_version.split(".")[1])
+ want_abis = {
+ "abi3": None,
+ "none": None,
+ }
+
+ _want_platforms = {}
+ version_limit = None
+
+ for p in want_platforms:
+ if not p.startswith("cp3"):
+ fail("expected all platforms to start with ABI, but got: {}".format(p))
+
+ abi, _, os_cpu = p.partition("_")
+ _want_platforms[os_cpu] = None
+ _want_platforms[p] = None
+
+ version_limit_candidate = int(abi[3:])
+ if not version_limit:
+ version_limit = version_limit_candidate
+ if version_limit and version_limit != version_limit_candidate:
+ fail("Only a single python version is supported for now")
+
+ # For some legacy implementations the wheels may target the `cp3xm` ABI
+ _want_platforms["{}m_{}".format(abi, os_cpu)] = None
+ want_abis[abi] = None
+ want_abis[abi + "m"] = None
+
+ want_platforms = sorted(_want_platforms)
candidates = {}
for whl in whls:
@@ -101,7 +124,7 @@
logger.trace(lambda: "Discarding the whl because the whl abi did not match")
continue
- if version_limit != -1 and whl_version_min > version_limit:
+ if whl_version_min > version_limit:
if logger:
logger.trace(lambda: "Discarding the whl because the whl supported python version is too high")
continue
@@ -110,7 +133,7 @@
if parsed.platform_tag == "any":
compatible = True
else:
- for p in whl_target_platforms(parsed.platform_tag):
+ for p in whl_target_platforms(parsed.platform_tag, abi_tag = parsed.abi_tag.strip("m") if parsed.abi_tag.startswith("cp") else None):
if p.target_platform in want_platforms:
compatible = True
break
diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl
index 1893789..3c07027 100644
--- a/python/private/repo_utils.bzl
+++ b/python/private/repo_utils.bzl
@@ -42,22 +42,23 @@
if _is_repo_debug_enabled(rctx):
print(message_cb()) # buildifier: disable=print
-def _logger(rctx):
+def _logger(ctx, name = None):
"""Creates a logger instance for printing messages.
Args:
- rctx: repository_ctx object. If the attribute `_rule_name` is
- present, it will be included in log messages.
+ ctx: repository_ctx or module_ctx object. If the attribute
+ `_rule_name` is present, it will be included in log messages.
+ name: name for the logger. Optional for repository_ctx usage.
Returns:
A struct with attributes logging: trace, debug, info, warn, fail.
"""
- if _is_repo_debug_enabled(rctx):
+ if _is_repo_debug_enabled(ctx):
verbosity_level = "DEBUG"
else:
verbosity_level = "WARN"
- env_var_verbosity = rctx.os.environ.get(REPO_VERBOSITY_ENV_VAR)
+ env_var_verbosity = _getenv(ctx, REPO_VERBOSITY_ENV_VAR)
verbosity_level = env_var_verbosity or verbosity_level
verbosity = {
@@ -66,18 +67,23 @@
"TRACE": 3,
}.get(verbosity_level, 0)
+ if hasattr(ctx, "attr"):
+ # This is `repository_ctx`.
+ name = name or "{}(@@{})".format(getattr(ctx.attr, "_rule_name", "?"), ctx.name)
+ elif not name:
+ fail("The name has to be specified when using the logger with `module_ctx`")
+
def _log(enabled_on_verbosity, level, message_cb_or_str):
if verbosity < enabled_on_verbosity:
return
- rule_name = getattr(rctx.attr, "_rule_name", "?")
+
if type(message_cb_or_str) == "string":
message = message_cb_or_str
else:
message = message_cb_or_str()
- print("\nrules_python:{}(@@{}) {}:".format(
- rule_name,
- rctx.name,
+ print("\nrules_python:{} {}:".format(
+ name,
level.upper(),
), message) # buildifier: disable=print
@@ -278,12 +284,9 @@
path = path,
)
-def _getenv(rctx, name, default = None):
- # Bazel 7+ API
- if hasattr(rctx, "getenv"):
- return rctx.getenv(name, default)
- else:
- return rctx.os.environ.get("PATH", default)
+def _getenv(ctx, name, default = None):
+ # Bazel 7+ API has ctx.getenv
+ return getattr(ctx, "getenv", ctx.os.environ.get)(name, default)
def _args_to_str(arguments):
return " ".join([_arg_repr(a) for a in arguments])