feat(toolchain, pip.parse): introduce a new '_host' toolchain repo (#1644)
This is for passing it in repository_rules and relies on the canonical
label representation introduced in bazel 6.0 and symlink support (needs
to be present on Windows) to work.
This allows the users to not need to `load` the interpreter label from
a `.bzl` file but instead specify the label in the form of
`@<mytoolchainrepo>_host//:python`.
In order to make it work robustly on Windows, we do
`repository_ctx.path(python_interpreter_label).realpath` to get the
actual
path of the Python interpreter on both, bzlmod and non-bzlmod setups on
Windows within the `whl_library` repository rule.
Work towards #1643.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8bd56f1..37163b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,8 +25,21 @@
### Fixed
+* (bzlmod pip.parse) Use a platform-independent reference to the interpreter
+ pip uses. This reduces (but doesn't eliminate) the amount of
+ platform-specific content in `MODULE.bazel.lock` files; Follow
+ [#1643](https://github.com/bazelbuild/rules_python/issues/1643) for removing
+ platform-specific content in `MODULE.bazel.lock` files.
+
### Added
+* (toolchains) `python_register_toolchains` now also generates a repository
+ that is suffixed with `_host`, that has a single label `:python` that is a
+ symlink to the python interpreter for the host platform. The intended use is
+ mainly in `repository_rule`, which are always run using `host` platform
+ Python. This means that `WORKSPACE` users can now copy the `requirements.bzl`
+ file for vendoring as seen in the updated `pip_parse_vendored` example.
+
## [0.28.0] - 2024-01-07
[0.28.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.28.0
diff --git a/examples/build_file_generation/WORKSPACE b/examples/build_file_generation/WORKSPACE
index e283260..3f1fad8 100644
--- a/examples/build_file_generation/WORKSPACE
+++ b/examples/build_file_generation/WORKSPACE
@@ -84,8 +84,6 @@
python_version = "3.9",
)
-# Load the interpreter and pip_parse rules.
-load("@python39//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
# This macro wraps the `pip_repository` rule that invokes `pip`, with `incremental` set.
@@ -114,7 +112,7 @@
# 3. Wrapper script, like in the autodetecting python toolchain.
#
# Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain.
- python_interpreter_target = interpreter,
+ python_interpreter_target = "@python39_host//:python",
# Set the location of the lock file.
requirements_lock = "//:requirements_lock.txt",
requirements_windows = "//:requirements_windows.txt",
diff --git a/examples/multi_python_versions/WORKSPACE b/examples/multi_python_versions/WORKSPACE
index 35855ca..f3a69ce 100644
--- a/examples/multi_python_versions/WORKSPACE
+++ b/examples/multi_python_versions/WORKSPACE
@@ -28,19 +28,15 @@
)
load("@python//:pip.bzl", "multi_pip_parse")
-load("@python//3.10:defs.bzl", interpreter_3_10 = "interpreter")
-load("@python//3.11:defs.bzl", interpreter_3_11 = "interpreter")
-load("@python//3.8:defs.bzl", interpreter_3_8 = "interpreter")
-load("@python//3.9:defs.bzl", interpreter_3_9 = "interpreter")
multi_pip_parse(
name = "pypi",
default_version = default_python_version,
python_interpreter_target = {
- "3.10": interpreter_3_10,
- "3.11": interpreter_3_11,
- "3.8": interpreter_3_8,
- "3.9": interpreter_3_9,
+ "3.10": "@python_3_10_host//:python",
+ "3.11": "@python_3_11_host//:python",
+ "3.8": "@python_3_8_host//:python",
+ "3.9": "@python_3_9_host//:python",
},
requirements_lock = {
"3.10": "//requirements:requirements_lock_3_10.txt",
diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE
index 415d064..1a3a6b0 100644
--- a/examples/pip_parse/WORKSPACE
+++ b/examples/pip_parse/WORKSPACE
@@ -14,7 +14,6 @@
python_version = "3.9",
)
-load("@python39//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
pip_parse(
@@ -52,7 +51,7 @@
# 3. Wrapper script, like in the autodetecting python toolchain.
#
# Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain.
- python_interpreter_target = interpreter,
+ python_interpreter_target = "@python39_host//:python",
# (Optional) You can set quiet to False if you want to see pip output.
#quiet = False,
diff --git a/examples/pip_parse_vendored/BUILD.bazel b/examples/pip_parse_vendored/BUILD.bazel
index ddf3281..b856c94 100644
--- a/examples/pip_parse_vendored/BUILD.bazel
+++ b/examples/pip_parse_vendored/BUILD.bazel
@@ -9,24 +9,6 @@
src = "requirements.in",
)
-# The requirements.bzl file is generated with a reference to the interpreter for the host platform.
-# In order to check in a platform-agnostic file, we have to replace that reference with the symbol
-# loaded from our python toolchain.
-genrule(
- name = "make_platform_agnostic",
- srcs = ["@pip//:requirements.bzl"],
- outs = ["requirements.clean.bzl"],
- cmd = " | ".join([
- "cat $<",
- # Insert our load statement after the existing one so we don't produce a file with buildifier warnings
- """sed -e '/^load.*.pip.bzl/i\\'$$'\\n''load("@python39//:defs.bzl", "interpreter")'""",
- # Replace the bazel 6.0.0 specific comment with something that bazel 5.4.0 would produce.
- # This enables this example to be run as a test under bazel 5.4.0.
- """sed -e 's#@//#//#'""",
- """sed 's#"@python39_.*//:bin/python3"#interpreter#' >$@""",
- ]),
-)
-
write_file(
name = "gen_update",
out = "update.sh",
@@ -35,14 +17,14 @@
"#!/usr/bin/env bash",
# Bazel gives us a way to access the source folder!
"cd $BUILD_WORKSPACE_DIRECTORY",
- "cp -fv bazel-bin/requirements.clean.bzl requirements.bzl",
+ "cp -fv bazel-pip_parse_vendored/external/pip/requirements.bzl requirements.bzl",
],
)
sh_binary(
name = "vendor_requirements",
srcs = ["update.sh"],
- data = [":make_platform_agnostic"],
+ data = ["@pip//:requirements.bzl"],
)
# Similarly ensures that the requirements.bzl file is updated
@@ -51,5 +33,5 @@
name = "test_vendored",
failure_message = "Please run: bazel run //:vendor_requirements",
file1 = "requirements.bzl",
- file2 = ":make_platform_agnostic",
+ file2 = "@pip//:requirements.bzl",
)
diff --git a/examples/pip_parse_vendored/WORKSPACE b/examples/pip_parse_vendored/WORKSPACE
index 157f70a..e0b7c86 100644
--- a/examples/pip_parse_vendored/WORKSPACE
+++ b/examples/pip_parse_vendored/WORKSPACE
@@ -14,14 +14,13 @@
python_version = "3.9",
)
-load("@python39//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
# This repository isn't referenced, except by our test that asserts the requirements.bzl is updated.
# It also wouldn't be needed by users of this ruleset.
pip_parse(
name = "pip",
- python_interpreter_target = interpreter,
+ python_interpreter_target = "@python39_host//:python",
requirements_lock = "//:requirements.txt",
)
diff --git a/examples/pip_parse_vendored/requirements.bzl b/examples/pip_parse_vendored/requirements.bzl
index adbee66..206f8bb 100644
--- a/examples/pip_parse_vendored/requirements.bzl
+++ b/examples/pip_parse_vendored/requirements.bzl
@@ -1,10 +1,9 @@
"""Starlark representation of locked requirements.
@generated by rules_python pip_parse repository rule
-from //:requirements.txt
+from @//:requirements.txt
"""
-load("@python39//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "pip_utils")
load("@rules_python//python/pip_install:pip_repository.bzl", "group_library", "whl_library")
@@ -17,7 +16,7 @@
all_data_requirements = ["@pip//certifi:data", "@pip//charset_normalizer:data", "@pip//idna:data", "@pip//requests:data", "@pip//urllib3:data"]
_packages = [("pip_certifi", "certifi==2023.7.22 --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"), ("pip_charset_normalizer", "charset-normalizer==2.1.1 --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"), ("pip_idna", "idna==3.4 --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"), ("pip_requests", "requests==2.28.1 --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"), ("pip_urllib3", "urllib3==1.26.13 --hash=sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc --hash=sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8")]
-_config = {"download_only": False, "enable_implicit_namespace_pkgs": False, "environment": {}, "extra_pip_args": [], "isolated": True, "pip_data_exclude": [], "python_interpreter": "python3", "python_interpreter_target": interpreter, "quiet": True, "repo": "pip", "repo_prefix": "pip_", "timeout": 600}
+_config = {"download_only": False, "enable_implicit_namespace_pkgs": False, "environment": {}, "extra_pip_args": [], "isolated": True, "pip_data_exclude": [], "python_interpreter": "python3", "python_interpreter_target": "@python39_host//:python", "quiet": True, "repo": "pip", "repo_prefix": "pip_", "timeout": 600}
_annotations = {}
def requirement(name):
diff --git a/examples/pip_repository_annotations/WORKSPACE b/examples/pip_repository_annotations/WORKSPACE
index 3535055..8540555 100644
--- a/examples/pip_repository_annotations/WORKSPACE
+++ b/examples/pip_repository_annotations/WORKSPACE
@@ -14,7 +14,6 @@
python_version = "3.9",
)
-load("@python39//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "package_annotation", "pip_parse")
# Here we can see an example of annotations being applied to an arbitrary
@@ -54,7 +53,7 @@
pip_parse(
name = "pip",
annotations = ANNOTATIONS,
- python_interpreter_target = interpreter,
+ python_interpreter_target = "@python39_host//:python",
requirements_lock = "//:requirements.txt",
)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 3e4878b..a1e3de4 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -22,7 +22,6 @@
load("//python/pip_install/private:generate_group_library_build_bazel.bzl", "generate_group_library_build_bazel")
load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel")
load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
-load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:patch_whl.bzl", "patch_whl")
@@ -87,13 +86,12 @@
if rctx.attr.python_interpreter_target != None:
python_interpreter = rctx.path(rctx.attr.python_interpreter_target)
- if BZLMOD_ENABLED:
- (os, _) = get_host_os_arch(rctx)
+ (os, _) = get_host_os_arch(rctx)
- # On Windows, the symlink doesn't work because Windows attempts to find
- # Python DLLs where the symlink is, not where the symlink points.
- if os == WINDOWS_NAME:
- python_interpreter = python_interpreter.realpath
+ # On Windows, the symlink doesn't work because Windows attempts to find
+ # Python DLLs where the symlink is, not where the symlink points.
+ if os == WINDOWS_NAME:
+ python_interpreter = python_interpreter.realpath
elif "/" not in python_interpreter:
# It's a plain command, e.g. "python3", to look up in the environment.
found_python_interpreter = rctx.which(python_interpreter)
diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl
index 6d45a26..f843a8f 100644
--- a/python/private/bzlmod/pip.bzl
+++ b/python/private/bzlmod/pip.bzl
@@ -87,8 +87,10 @@
# we programmatically find it.
hub_name = pip_attr.hub_name
if python_interpreter_target == None and not pip_attr.python_interpreter:
- python_name = "python_" + version_label(pip_attr.python_version, sep = "_")
- if python_name not in INTERPRETER_LABELS.keys():
+ python_name = "python_{}_host".format(
+ version_label(pip_attr.python_version, sep = "_"),
+ )
+ if python_name not in INTERPRETER_LABELS:
fail((
"Unable to find interpreter for pip hub '{hub_name}' for " +
"python_version={version}: Make sure a corresponding " +
diff --git a/python/private/bzlmod/pythons_hub.bzl b/python/private/bzlmod/pythons_hub.bzl
index 5f536f3..3889e13 100644
--- a/python/private/bzlmod/pythons_hub.bzl
+++ b/python/private/bzlmod/pythons_hub.bzl
@@ -78,7 +78,7 @@
"""
_line_for_hub_template = """\
- "{name}": Label("@{name}_{platform}//:{path}"),
+ "{key}": Label("@{name}_{platform}//:{path}"),
"""
def _hub_repo_impl(rctx):
@@ -103,11 +103,22 @@
# Create a dict that is later used to create
# a symlink to a interpreter.
- interpreter_labels = "".join([_line_for_hub_template.format(
- name = name,
- platform = platform,
- path = path,
- ) for name in rctx.attr.toolchain_user_repository_names])
+ interpreter_labels = "".join([
+ _line_for_hub_template.format(
+ key = name + ("" if platform_str != "host" else "_host"),
+ name = name,
+ platform = platform_str,
+ path = p,
+ )
+ for name in rctx.attr.toolchain_user_repository_names
+ for platform_str, p in {
+ # NOTE @aignas 2023-12-21: maintaining the `platform` specific key
+ # here may be unneeded in the long term, but I am not sure if there
+ # are other users that depend on it.
+ platform: path,
+ "host": "python",
+ }.items()
+ ])
rctx.file(
"interpreters.bzl",
diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl
index 4b6bd11..c7b6178 100644
--- a/python/private/toolchains_repo.bzl
+++ b/python/private/toolchains_repo.bzl
@@ -240,8 +240,50 @@
toolchain_aliases = repository_rule(
_toolchain_aliases_impl,
- doc = """Creates a repository with a shorter name meant for the host platform, which contains
- a BUILD.bazel file declaring aliases to the host platform's targets.
+ doc = """\
+Creates a repository with a shorter name only referencing the python version,
+it contains a BUILD.bazel file declaring aliases to the host platform's targets
+and is a great fit for any usage related to setting up toolchains for build
+actions.""",
+ attrs = {
+ "platforms": attr.string_list(
+ doc = "List of platforms for which aliases shall be created",
+ ),
+ "python_version": attr.string(doc = "The Python version."),
+ "user_repository_name": attr.string(
+ mandatory = True,
+ doc = "The base name for all created repositories, like 'python38'.",
+ ),
+ "_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
+ },
+)
+
+def _host_toolchain_impl(rctx):
+ rctx.file("BUILD.bazel", """\
+# Generated by python/private/toolchains_repo.bzl
+
+exports_files(["python"], visibility = ["//visibility:public"])
+""")
+
+ (os_name, arch) = get_host_os_arch(rctx)
+ host_platform = get_host_platform(os_name, arch)
+ host_python = rctx.path(
+ Label(
+ "@@{py_repository}_{host_platform}//:python".format(
+ py_repository = rctx.attr.name[:-len("_host")],
+ host_platform = host_platform,
+ ),
+ ),
+ )
+ rctx.symlink(host_python, "python")
+
+host_toolchain = repository_rule(
+ _host_toolchain_impl,
+ doc = """\
+Creates a repository with a shorter name meant to be used in the repository_ctx,
+which needs to have `symlinks` for the interpreter. This is separate from the
+toolchain_aliases repo because referencing the `python` interpreter target from
+this repo causes an eager fetch of the toolchain for the host platform.
""",
attrs = {
"platforms": attr.string_list(
diff --git a/python/repositories.bzl b/python/repositories.bzl
index 21becb5..7422a50 100644
--- a/python/repositories.bzl
+++ b/python/repositories.bzl
@@ -27,6 +27,7 @@
load("//python/private:internal_config_repo.bzl", "internal_config_repo")
load(
"//python/private:toolchains_repo.bzl",
+ "host_toolchain",
"multi_toolchain_aliases",
"toolchain_aliases",
"toolchains_repo",
@@ -589,6 +590,13 @@
platform = platform,
))
+ host_toolchain(
+ name = name + "_host",
+ python_version = python_version,
+ user_repository_name = name,
+ platforms = loaded_platforms,
+ )
+
toolchain_aliases(
name = name,
python_version = python_version,