feat(pypi): support env markers in requirements files (#2059) Before this change the `all_requirements` and related constants will have packages that need to be installed only on specific platforms and will mean that users relying on those constants (e.g. `gazelle`) will need to do extra work to exclude platform-specific packages. The package managers that that support outputting such files now include `uv` and `pdm`. This might be also useful in cases where we attempt to handle non-requirements lock files. Note, that the best way to handle this would be to move all of the requirements parsing code to Python, but that may cause regressions as it is a much bigger change. This is only changing the code so that we are doing extra processing for the requirement lines that have env markers. The lines that have no markers will not see any change in the code execution paths and the python interpreter will not be downloaded. We also use the `*_ctx.watch` API where available to correctly re-evaluate the markers if the `packaging` Python sources for this change. Extra changes that are included in this PR: - Extend the `repo_utils` to have a method for `arch` getting from the `ctx`. - Change the `local_runtime_repo` to perform the validation not relying on the implementation detail of the `get_platforms_os_name`. - Add `$(UV)` make variable for the `uv:current_toolchain` so that we can generate the requirements for `sphinx` using `uv`. - Swap the requirement generation using `genrule` and `uv` for `sphinx` and co so that we can test the `requirement` marker code. Note, the `requirement` markers are not working well with the `requirement_cycles`. Fixes #1105. Fixes #1868. Work towards #260, #1975. Related #1663. --------- Co-authored-by: Richard Levasseur <rlevasseur@google.com>

commit: 519574ca7fb9892524d86b6acaac5b550a7b6df5 [log] [tgz]
author: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Thu Aug 15 22:11:50 2024 +0300
committer: GitHub <noreply@github.com> Thu Aug 15 19:11:50 2024 +0000
tree: 377553b0b3650bb44743da4703be148b36e90bcf
parent: 905af697d68e4a87d037a721b385f4d26f8ba848 [diff]
diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel
index f444287..3b11dbe 100644
--- a/python/private/pypi/BUILD.bazel
+++ b/python/private/pypi/BUILD.bazel

@@ -61,6 +61,7 @@
         ":attrs_bzl",
         ":hub_repository_bzl",
         ":parse_requirements_bzl",
+        ":evaluate_markers_bzl",
         ":parse_whl_name_bzl",
         ":pip_repository_attrs_bzl",
         ":simpleapi_download_bzl",
@@ -90,6 +91,14 @@
 )
 
 bzl_library(
+    name = "evaluate_markers_bzl",
+    srcs = ["evaluate_markers.bzl"],
+    deps = [
+        ":pypi_repo_utils_bzl",
+    ],
+)
+
+bzl_library(
     name = "flags_bzl",
     srcs = ["flags.bzl"],
     deps = [
@@ -215,6 +224,7 @@
     srcs = ["pip_repository.bzl"],
     deps = [
         ":attrs_bzl",
+        ":evaluate_markers_bzl",
         ":parse_requirements_bzl",
         ":pip_repository_attrs_bzl",
         ":render_pkg_aliases_bzl",
@@ -235,6 +245,7 @@
     srcs = ["pypi_repo_utils.bzl"],
     deps = [
         "//python/private:repo_utils_bzl",
+        "@bazel_skylib//lib:types",
     ],
 )
 

diff --git a/python/private/pypi/evaluate_markers.bzl b/python/private/pypi/evaluate_markers.bzl
new file mode 100644
index 0000000..c805fd7
--- /dev/null
+++ b/python/private/pypi/evaluate_markers.bzl

@@ -0,0 +1,77 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A simple function that evaluates markers using a python interpreter."""
+
+load(":pypi_repo_utils.bzl", "pypi_repo_utils")
+
+# Used as a default value in a rule to ensure we fetch the dependencies.
+SRCS = [
+    # When the version, or any of the files in `packaging` package changes,
+    # this file will change as well.
+    Label("@pypi__packaging//:packaging-24.0.dist-info/RECORD"),
+    Label("//python/private/pypi/requirements_parser:resolve_target_platforms.py"),
+    Label("//python/private/pypi/whl_installer:platform.py"),
+]
+
+def evaluate_markers(mrctx, *, requirements, python_interpreter, python_interpreter_target, srcs, logger = None):
+    """Return the list of supported platforms per requirements line.
+
+    Args:
+        mrctx: repository_ctx or module_ctx.
+        requirements: list[str] of the requirement file lines to evaluate.
+        python_interpreter: str, path to the python_interpreter to use to
+            evaluate the env markers in the given requirements files. It will
+            be only called if the requirements files have env markers. This
+            should be something that is in your PATH or an absolute path.
+        python_interpreter_target: Label, same as python_interpreter, but in a
+            label format.
+        srcs: list[Label], the value of SRCS passed from the `rctx` or `mctx` to this function.
+        logger: repo_utils.logger or None, a simple struct to log diagnostic
+            messages. Defaults to None.
+
+    Returns:
+        dict of string lists with target platforms
+    """
+    if not requirements:
+        return {}
+
+    in_file = mrctx.path("requirements_with_markers.in.json")
+    out_file = mrctx.path("requirements_with_markers.out.json")
+    mrctx.file(in_file, json.encode(requirements))
+
+    pypi_repo_utils.execute_checked(
+        mrctx,
+        op = "ResolveRequirementEnvMarkers({})".format(in_file),
+        arguments = [
+            pypi_repo_utils.resolve_python_interpreter(
+                mrctx,
+                python_interpreter = python_interpreter,
+                python_interpreter_target = python_interpreter_target,
+            ),
+            "-m",
+            "python.private.pypi.requirements_parser.resolve_target_platforms",
+            in_file,
+            out_file,
+        ],
+        srcs = srcs,
+        environment = {
+            "PYTHONPATH": [
+                Label("@pypi__packaging//:BUILD.bazel"),
+                Label("//:BUILD.bazel"),
+            ],
+        },
+        logger = logger,
+    )
+    return json.decode(mrctx.read(out_file))

diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 82e580d..1bc8f15 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl

@@ -21,6 +21,7 @@
 load("//python/private:repo_utils.bzl", "repo_utils")
 load("//python/private:version_label.bzl", "version_label")
 load(":attrs.bzl", "use_isolated")
+load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS")
 load(":hub_repository.bzl", "hub_repository")
 load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement")
 load(":parse_whl_name.bzl", "parse_whl_name")
@@ -195,6 +196,28 @@
             logger = logger,
         ),
         get_index_urls = get_index_urls,
+        # NOTE @aignas 2024-08-02: , we will execute any interpreter that we find either
+        # in the PATH or if specified as a label. We will configure the env
+        # markers when evaluating the requirement lines based on the output
+        # from the `requirements_files_by_platform` which should have something
+        # similar to:
+        # {
+        #    "//:requirements.txt": ["cp311_linux_x86_64", ...]
+        # }
+        #
+        # We know the target python versions that we need to evaluate the
+        # markers for and thus we don't need to use multiple python interpreter
+        # instances to perform this manipulation. This function should be executed
+        # only once by the underlying code to minimize the overhead needed to
+        # spin up a Python interpreter.
+        evaluate_markers = lambda module_ctx, requirements: evaluate_markers(
+            module_ctx,
+            requirements = requirements,
+            python_interpreter = pip_attr.python_interpreter,
+            python_interpreter_target = python_interpreter_target,
+            srcs = pip_attr._evaluate_markers_srcs,
+            logger = logger,
+        ),
         logger = logger,
     )
 
@@ -625,6 +648,13 @@
 The labels are JSON config files describing the modifications.
 """,
         ),
+        "_evaluate_markers_srcs": attr.label_list(
+            default = EVALUATE_MARKERS_SRCS,
+            doc = """\
+The list of labels to use as SRCS for the marker evaluation code. This ensures that the
+code will be re-evaluated when any of files in the default changes.
+""",
+        ),
     }, **ATTRS)
     attrs.update(AUTH_ATTRS)
 

diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index 0cab1d7..eee97d7 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl

@@ -38,6 +38,7 @@
         requirements_by_platform = {},
         extra_pip_args = [],
         get_index_urls = None,
+        evaluate_markers = lambda *_: {},
         logger = None):
     """Get the requirements with platforms that the requirements apply to.
 
@@ -51,6 +52,11 @@
         get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
             of the distribution URLs from a PyPI index. Accepts ctx and
             distribution names to query.
+        evaluate_markers: A function to use to evaluate the requirements.
+            Accepts the ctx and a dict where keys are requirement lines to
+            evaluate against the platforms stored as values in the input dict.
+            Returns the same dict, but with values being platforms that are
+            compatible with the requirements line.
         logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
 
     Returns:
@@ -109,6 +115,7 @@
             options[plat] = pip_args
 
     requirements_by_platform = {}
+    reqs_with_env_markers = {}
     for target_platform, reqs_ in requirements.items():
         extra_pip_args = options[target_platform]
 
@@ -118,6 +125,9 @@
                 {},
             )
 
+            if ";" in requirement_line:
+                reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform)
+
             for_req = for_whl.setdefault(
                 (requirement_line, ",".join(extra_pip_args)),
                 struct(
@@ -130,6 +140,20 @@
             )
             for_req.target_platforms.append(target_platform)
 
+    # This may call to Python, so execute it early (before calling to the
+    # internet below) and ensure that we call it only once.
+    #
+    # NOTE @aignas 2024-07-13: in the future, if this is something that we want
+    # to do, we could use Python to parse the requirement lines and infer the
+    # URL of the files to download things from. This should be important for
+    # VCS package references.
+    env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers)
+    if logger:
+        logger.debug(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
+            reqs_with_env_markers,
+            env_marker_target_platforms,
+        ))
+
     index_urls = {}
     if get_index_urls:
         index_urls = get_index_urls(
@@ -146,7 +170,8 @@
     for whl_name, reqs in requirements_by_platform.items():
         requirement_target_platforms = {}
         for r in reqs.values():
-            for p in r.target_platforms:
+            target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
+            for p in target_platforms:
                 requirement_target_platforms[p] = None
 
         is_exposed = len(requirement_target_platforms) == len(requirements)
@@ -164,12 +189,13 @@
                 logger = logger,
             )
 
+            target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
             ret.setdefault(whl_name, []).append(
                 struct(
                     distribution = r.distribution,
                     srcs = r.srcs,
                     requirement_line = r.requirement_line,
-                    target_platforms = sorted(r.target_platforms),
+                    target_platforms = sorted(target_platforms),
                     extra_pip_args = r.extra_pip_args,
                     whls = whls,
                     sdist = sdist,

diff --git a/python/private/pypi/pip_repository.bzl b/python/private/pypi/pip_repository.bzl
index 137c524..0c9e300 100644
--- a/python/private/pypi/pip_repository.bzl
+++ b/python/private/pypi/pip_repository.bzl

@@ -18,6 +18,7 @@
 load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:repo_utils.bzl", "REPO_DEBUG_ENV_VAR")
 load("//python/private:text_util.bzl", "render")
+load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS")
 load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement")
 load(":pip_repository_attrs.bzl", "ATTRS")
 load(":render_pkg_aliases.bzl", "render_pkg_aliases", "whl_alias")
@@ -81,6 +82,13 @@
             extra_pip_args = rctx.attr.extra_pip_args,
         ),
         extra_pip_args = rctx.attr.extra_pip_args,
+        evaluate_markers = lambda rctx, requirements: evaluate_markers(
+            rctx,
+            requirements = requirements,
+            python_interpreter = rctx.attr.python_interpreter,
+            python_interpreter_target = rctx.attr.python_interpreter_target,
+            srcs = rctx.attr._evaluate_markers_srcs,
+        ),
     )
     selected_requirements = {}
     options = None
@@ -224,6 +232,13 @@
         _template = attr.label(
             default = ":requirements.bzl.tmpl.workspace",
         ),
+        _evaluate_markers_srcs = attr.label_list(
+            default = EVALUATE_MARKERS_SRCS,
+            doc = """\
+The list of labels to use as SRCS for the marker evaluation code. This ensures that the
+code will be re-evaluated when any of files in the default changes.
+""",
+        ),
         **ATTRS
     ),
     doc = """Accepts a locked/compiled requirements file and installs the dependencies listed within.

diff --git a/python/private/pypi/pypi_repo_utils.bzl b/python/private/pypi/pypi_repo_utils.bzl
index 1f9f050..da449b4 100644
--- a/python/private/pypi/pypi_repo_utils.bzl
+++ b/python/private/pypi/pypi_repo_utils.bzl

@@ -14,13 +14,14 @@
 
 ""
 
+load("@bazel_skylib//lib:types.bzl", "types")
 load("//python/private:repo_utils.bzl", "repo_utils")
 
-def _get_python_interpreter_attr(ctx, *, python_interpreter = None):
+def _get_python_interpreter_attr(mrctx, *, python_interpreter = None):
     """A helper function for getting the `python_interpreter` attribute or it's default
 
     Args:
-        ctx (repository_ctx): Handle to the rule repository context.
+        mrctx (module_ctx or repository_ctx): Handle to the rule repository context.
         python_interpreter (str): The python interpreter override.
 
     Returns:
@@ -29,29 +30,30 @@
     if python_interpreter:
         return python_interpreter
 
-    os = repo_utils.get_platforms_os_name(ctx)
+    os = repo_utils.get_platforms_os_name(mrctx)
     if "windows" in os:
         return "python.exe"
     else:
         return "python3"
 
-def _resolve_python_interpreter(ctx, *, python_interpreter = None, python_interpreter_target = None):
+def _resolve_python_interpreter(mrctx, *, python_interpreter = None, python_interpreter_target = None):
     """Helper function to find the python interpreter from the common attributes
 
     Args:
-        ctx: Handle to the rule module_ctx or repository_ctx.
-        python_interpreter: The python interpreter to use.
-        python_interpreter_target: The python interpreter to use after downloading the label.
+        mrctx: Handle to the module_ctx or repository_ctx.
+        python_interpreter: str, the python interpreter to use.
+        python_interpreter_target: Label, the python interpreter to use after
+            downloading the label.
 
     Returns:
         `path` object, for the resolved path to the Python interpreter.
     """
-    python_interpreter = _get_python_interpreter_attr(ctx, python_interpreter = python_interpreter)
+    python_interpreter = _get_python_interpreter_attr(mrctx, python_interpreter = python_interpreter)
 
     if python_interpreter_target != None:
-        python_interpreter = ctx.path(python_interpreter_target)
+        python_interpreter = mrctx.path(python_interpreter_target)
 
-        os = repo_utils.get_platforms_os_name(ctx)
+        os = repo_utils.get_platforms_os_name(mrctx)
 
         # On Windows, the symlink doesn't work because Windows attempts to find
         # Python DLLs where the symlink is, not where the symlink points.
@@ -59,37 +61,70 @@
             python_interpreter = python_interpreter.realpath
     elif "/" not in python_interpreter:
         # It's a plain command, e.g. "python3", to look up in the environment.
-        found_python_interpreter = ctx.which(python_interpreter)
-        if not found_python_interpreter:
-            fail("python interpreter `{}` not found in PATH".format(python_interpreter))
-        python_interpreter = found_python_interpreter
+        python_interpreter = repo_utils.which_checked(mrctx, python_interpreter)
     else:
-        python_interpreter = ctx.path(python_interpreter)
+        python_interpreter = mrctx.path(python_interpreter)
     return python_interpreter
 
-def _construct_pypath(ctx, *, entries):
+def _construct_pypath(mrctx, *, entries):
     """Helper function to construct a PYTHONPATH.
 
     Contains entries for code in this repo as well as packages downloaded from //python/pip_install:repositories.bzl.
     This allows us to run python code inside repository rule implementations.
 
     Args:
-        ctx: Handle to the module_ctx or repository_ctx.
+        mrctx: Handle to the module_ctx or repository_ctx.
         entries: The list of entries to add to PYTHONPATH.
 
     Returns: String of the PYTHONPATH.
     """
 
-    os = repo_utils.get_platforms_os_name(ctx)
+    if not entries:
+        return None
+
+    os = repo_utils.get_platforms_os_name(mrctx)
     separator = ";" if "windows" in os else ":"
     pypath = separator.join([
-        str(ctx.path(entry).dirname)
+        str(mrctx.path(entry).dirname)
         # Use a dict as a way to remove duplicates and then sort it.
         for entry in sorted({x: None for x in entries})
     ])
     return pypath
 
+def _execute_checked(mrctx, *, srcs, **kwargs):
+    """Helper function to run a python script and modify the PYTHONPATH to include external deps.
+
+    Args:
+        mrctx: Handle to the module_ctx or repository_ctx.
+        srcs: The src files that the script depends on. This is important to
+            ensure that the Bazel repository cache or the bzlmod lock file gets
+            invalidated when any one file changes. It is advisable to use
+            `RECORD` files for external deps and the list of srcs from the
+            rules_python repo for any scripts.
+        **kwargs: Arguments forwarded to `repo_utils.execute_checked`. If
+            the `environment` has a value `PYTHONPATH` and it is a list, then
+            it will be passed to `construct_pythonpath` function.
+    """
+
+    for src in srcs:
+        # This will ensure that we will re-evaluate the bzlmod extension or
+        # refetch the repository_rule when the srcs change. This should work on
+        # Bazel versions without `mrctx.watch` as well.
+        repo_utils.watch(mrctx.path(src))
+
+    env = kwargs.pop("environment", {})
+    pythonpath = env.get("PYTHONPATH", "")
+    if pythonpath and not types.is_string(pythonpath):
+        env["PYTHONPATH"] = _construct_pypath(mrctx, entries = pythonpath)
+
+    return repo_utils.execute_checked(
+        mrctx,
+        environment = env,
+        **kwargs
+    )
+
 pypi_repo_utils = struct(
-    resolve_python_interpreter = _resolve_python_interpreter,
     construct_pythonpath = _construct_pypath,
+    execute_checked = _execute_checked,
+    resolve_python_interpreter = _resolve_python_interpreter,
 )

diff --git a/python/private/pypi/requirements_parser/BUILD.bazel b/python/private/pypi/requirements_parser/BUILD.bazel
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/python/private/pypi/requirements_parser/BUILD.bazel


diff --git a/python/private/pypi/requirements_parser/resolve_target_platforms.py b/python/private/pypi/requirements_parser/resolve_target_platforms.py
new file mode 100755
index 0000000..c899a94
--- /dev/null
+++ b/python/private/pypi/requirements_parser/resolve_target_platforms.py

@@ -0,0 +1,63 @@
+"""A CLI to evaluate env markers for requirements files.
+
+A simple script to evaluate the `requirements.txt` files. Currently it is only
+handling environment markers in the requirements files, but in the future it
+may handle more things. We require a `python` interpreter that can run on the
+host platform and then we depend on the [packaging] PyPI wheel.
+
+In order to be able to resolve requirements files for any platform, we are
+re-using the same code that is used in the `whl_library` installer. See
+[here](../whl_installer/wheel.py).
+
+Requirements for the code are:
+- Depends only on `packaging` and core Python.
+- Produces the same result irrespective of the Python interpreter platform or version.
+
+[packaging]: https://packaging.pypa.io/en/stable/
+"""
+
+import argparse
+import json
+import pathlib
+
+from packaging.requirements import Requirement
+
+from python.private.pypi.whl_installer.platform import Platform
+
+INPUT_HELP = """\
+Input path to read the requirements as a json file, the keys in the dictionary
+are the requirements lines and the values are strings of target platforms.
+"""
+OUTPUT_HELP = """\
+Output to write the requirements as a json filepath, the keys in the dictionary
+are the requirements lines and the values are strings of target platforms, which
+got changed based on the evaluated markers.
+"""
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("input_path", type=pathlib.Path, help=INPUT_HELP.strip())
+    parser.add_argument("output_path", type=pathlib.Path, help=OUTPUT_HELP.strip())
+    args = parser.parse_args()
+
+    with args.input_path.open() as f:
+        reqs = json.load(f)
+
+    response = {}
+    for requirement_line, target_platforms in reqs.items():
+        entry, prefix, hashes = requirement_line.partition("--hash")
+        hashes = prefix + hashes
+
+        req = Requirement(entry)
+        for p in target_platforms:
+            (platform,) = Platform.from_string(p)
+            if not req.marker or req.marker.evaluate(platform.env_markers("")):
+                response.setdefault(requirement_line, []).append(p)
+
+    with args.output_path.open("w") as f:
+        json.dump(response, f)
+
+
+if __name__ == "__main__":
+    main()
commit	519574ca7fb9892524d86b6acaac5b550a7b6df5	[log] [tgz]
author	Ignas Anikevicius <240938+aignas@users.noreply.github.com>	Thu Aug 15 22:11:50 2024 +0300
committer	GitHub <noreply@github.com>	Thu Aug 15 19:11:50 2024 +0000
tree	377553b0b3650bb44743da4703be148b36e90bcf
parent	905af697d68e4a87d037a721b385f4d26f8ba848 [diff]