refactor!(toolchain): remove uname dep in the repository_rule stage (#2406)
Before this PR we would shell out to `uname` on UNIX systems to get the
`arch`
of the toolchain - on Windows we would not need to do it because there
used to
be only a single Windows platform. With this change we can correctly
support
the resolution of the python interpreter on various platforms and I have
also
added an env variable to customize the selection, so that users can use
`musl`
or a `freethreaded` interpreter if they wish.
As part of this change, I have restricted visibility of the config
settings
used in the toolchain alias repo so that we are creating fewer targets.
This is
a very good time to do this before `1.0.0`.
Fixes #2145
Work towards #2276
Work towards #2386
Work towards #1211 to unblock #2402
Work towards #1361
---------
Co-authored-by: Richard Levasseur <richardlev@gmail.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5eaa3fa..16ea38b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,16 +52,39 @@
{#v0-0-0-changed}
### Changed
+
+**Breaking**:
+* (toolchains) stop exposing config settings in python toolchain alias repos.
+ Please consider depending on the flags defined in
+ `//python/config_setting/...` and the `@platforms` package instead.
+* (toolchains) consumers who were depending on the `MACOS_NAME` and the `arch`
+ attribute in the `PLATFORMS` list, please update your code to respect the new
+ values. The values now correspond to the values available in the
+ `@platforms//` package constraint values.
+* (toolchains) `host_platform` and `interpreter` constants are no longer created
+ in the `toolchain` generated alias `.bzl` files. If you need to access the
+ host interpreter during the `repository_rule` evaluation, please use the
+ `@python_{version}_host//:python` targets created by
+ {bzl:obj}`python_register_toolchains` and
+ {bzl:obj}`python_register_multi_toolchains` macros or the {bzl:obj}`python`
+ bzlmod extension.
+
+Other changes:
* (python_repository) Start honoring the `strip_prefix` field for `zstd` archives.
{#v0-0-0-fixed}
### Fixed
-* Nothing fixed.
+* (toolchains) stop depending on `uname` to get the value of the host platform.
{#v0-0-0-added}
### Added
* (gazelle): Parser failures will now be logged to the terminal. Additional
details can be logged by setting `GAZELLE_VERBOSE=1`.
+* (toolchains) allow users to select which variant of the support host toolchain
+ they would like to use through
+ `RULES_PYTHON_REPO_TOOLCHAIN_{VERSION}_{OS}_{ARCH}` env variable setting. For
+ example, this allows one to use `freethreaded` python interpreter in the
+ `repository_rule` to build a wheel from `sdist`.
{#v0-0-0-removed}
### Removed
diff --git a/WORKSPACE b/WORKSPACE
index b77918f..46ebbc8 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -86,7 +86,7 @@
_py_gazelle_deps()
# This interpreter is used for various rules_python dev-time tools
-load("@python//3.11.9:defs.bzl", "interpreter")
+interpreter = "@python_3_11_9_host//:python"
#####################
# Install twine for our own runfiles wheel publishing.
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 2a00529..906281d 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -15,6 +15,16 @@
* `TRACE`
:::
+:::{envvar} RULES_PYTHON_REPO_TOOLCHAIN_VERSION_OS_ARCH
+
+Determines the python interpreter platform to be used for a particular
+interpreter `(version, os, arch)` triple to be used in repository rules.
+Replace the `VERSION_OS_ARCH` part with actual values when using, e.g.
+`3_13_0_linux_x86_64`. The version values must have `_` instead of `.` and the
+os, arch values are the same as the ones mentioned in the
+`//python:versions.bzl` file.
+:::
+
:::{envvar} RULES_PYTHON_PIP_ISOLATED
Determines if `--isolated` is used with pip.
diff --git a/examples/bzlmod/MODULE.bazel.lock b/examples/bzlmod/MODULE.bazel.lock
index 5a546c2..8bad32a 100644
--- a/examples/bzlmod/MODULE.bazel.lock
+++ b/examples/bzlmod/MODULE.bazel.lock
@@ -1562,7 +1562,7 @@
},
"@@rules_python~//python/extensions:pip.bzl%pip": {
"general": {
- "bzlTransitiveDigest": "MwmpiMn2qoAVC+3E9MF3E98fB8v1utYBfMa0frXyi7g=",
+ "bzlTransitiveDigest": "mCwiXbsZmReVgs884fZHYfxaZaL9mFG+prEnH/lpE9g=",
"usagesDigest": "VmrNvB/4EhzsYieLDka9584M+pYKPpjNLl3Wcb5rx/c=",
"recordedFileInputs": {
"@@//requirements_lock_3_10.txt": "5e7083982a7e60f34998579a0ae83b520d46ab8f2552cc51337217f024e6def5",
@@ -7035,7 +7035,7 @@
},
"@@rules_python~//python/private/pypi:pip.bzl%pip_internal": {
"general": {
- "bzlTransitiveDigest": "Kx383BMHUpAHEjRiU5aWU4QTRQVg+Uu+Mgi7jVxuz0c=",
+ "bzlTransitiveDigest": "Xu1N6572iHVqGChH12PpMhprC21k3CpjRZVpm3FmE2c=",
"usagesDigest": "/lZXl/ZgP+u5PE8WkeWTyYBsvX9XQWFn1antj5qrBzQ=",
"recordedFileInputs": {
"@@rules_python~//tools/publish/requirements_linux.txt": "8175b4c8df50ae2f22d1706961884beeb54e7da27bd2447018314a175981997d",
diff --git a/python/private/python_register_toolchains.bzl b/python/private/python_register_toolchains.bzl
index 98c8e5b..cd3e9cb 100644
--- a/python/private/python_register_toolchains.bzl
+++ b/python/private/python_register_toolchains.bzl
@@ -160,7 +160,11 @@
platform = platform,
))
- host_toolchain(name = name + "_host")
+ host_toolchain(
+ name = name + "_host",
+ platforms = loaded_platforms,
+ python_version = python_version,
+ )
toolchain_aliases(
name = name,
diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl
index e0bf69a..0e3f7b0 100644
--- a/python/private/repo_utils.bzl
+++ b/python/private/repo_utils.bzl
@@ -41,6 +41,10 @@
Returns:
A struct with attributes logging: trace, debug, info, warn, fail.
+ Please use `return logger.fail` when using the `fail` method, because
+ it makes `buildifier` happy and ensures that other implementation of
+ the logger injected into the function work as expected by terminating
+ on the given line.
"""
if _is_repo_debug_enabled(mrctx):
verbosity_level = "DEBUG"
@@ -140,7 +144,7 @@
result = mrctx.execute(arguments, environment = environment, **kwargs)
if fail_on_error and result.return_code != 0:
- logger.fail((
+ return logger.fail((
"repo.execute: {op}: end: failure:\n" +
" command: {cmd}\n" +
" return code: {return_code}\n" +
diff --git a/python/private/toolchain_aliases.bzl b/python/private/toolchain_aliases.bzl
new file mode 100644
index 0000000..31ac4a8
--- /dev/null
+++ b/python/private/toolchain_aliases.bzl
@@ -0,0 +1,74 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Create toolchain alias targets."""
+
+load("@rules_python//python:versions.bzl", "PLATFORMS")
+
+def toolchain_aliases(*, name, platforms, visibility = None, native = native):
+ """Create toolchain aliases for the python toolchains.
+
+ Args:
+ name: {type}`str` The name of the current repository.
+ platforms: {type}`platforms` The list of platforms that are supported
+ for the current toolchain repository.
+ visibility: {type}`list[Target] | None` The visibility of the aliases.
+ native: The native struct used in the macro, useful for testing.
+ """
+ for platform in PLATFORMS.keys():
+ if platform not in platforms:
+ continue
+
+ native.config_setting(
+ name = platform,
+ flag_values = PLATFORMS[platform].flag_values,
+ constraint_values = PLATFORMS[platform].compatible_with,
+ visibility = ["//visibility:private"],
+ )
+
+ prefix = name
+ for name in [
+ "files",
+ "includes",
+ "libpython",
+ "py3_runtime",
+ "python_headers",
+ "python_runtimes",
+ ]:
+ native.alias(
+ name = name,
+ actual = select({
+ ":" + platform: "@{}_{}//:{}".format(prefix, platform, name)
+ for platform in platforms
+ }),
+ visibility = visibility,
+ )
+
+ native.alias(
+ name = "python3",
+ actual = select({
+ ":" + platform: "@{}_{}//:{}".format(prefix, platform, "python.exe" if "windows" in platform else "bin/python3")
+ for platform in platforms
+ }),
+ visibility = visibility,
+ )
+ native.alias(
+ name = "pip",
+ actual = select({
+ ":" + platform: "@{}_{}//:python_runtimes".format(prefix, platform)
+ for platform in platforms
+ if "windows" not in platform
+ }),
+ visibility = visibility,
+ )
diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl
index d21fb53..7e9a0c7 100644
--- a/python/private/toolchains_repo.bzl
+++ b/python/private/toolchains_repo.bzl
@@ -25,8 +25,6 @@
load(
"//python:versions.bzl",
- "LINUX_NAME",
- "MACOS_NAME",
"PLATFORMS",
"WINDOWS_NAME",
)
@@ -126,43 +124,26 @@
)
def _toolchain_aliases_impl(rctx):
- logger = repo_utils.logger(rctx)
- (os_name, arch) = _get_host_os_arch(rctx, logger)
-
- host_platform = _get_host_platform(os_name, arch)
-
- is_windows = (os_name == WINDOWS_NAME)
- python3_binary_path = "python.exe" if is_windows else "bin/python3"
-
# Base BUILD file for this repository.
build_contents = """\
# Generated by python/private/toolchains_repo.bzl
+load("@rules_python//python/private:toolchain_aliases.bzl", "toolchain_aliases")
+
package(default_visibility = ["//visibility:public"])
-load("@rules_python//python:versions.bzl", "gen_python_config_settings")
-gen_python_config_settings()
+
exports_files(["defs.bzl"])
PLATFORMS = [
{loaded_platforms}
]
-alias(name = "files", actual = select({{":" + item: "@{py_repository}_" + item + "//:files" for item in PLATFORMS}}))
-alias(name = "includes", actual = select({{":" + item: "@{py_repository}_" + item + "//:includes" for item in PLATFORMS}}))
-alias(name = "libpython", actual = select({{":" + item: "@{py_repository}_" + item + "//:libpython" for item in PLATFORMS}}))
-alias(name = "py3_runtime", actual = select({{":" + item: "@{py_repository}_" + item + "//:py3_runtime" for item in PLATFORMS}}))
-alias(name = "python_headers", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_headers" for item in PLATFORMS}}))
-alias(name = "python_runtimes", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_runtimes" for item in PLATFORMS}}))
-alias(name = "python3", actual = select({{":" + item: "@{py_repository}_" + item + "//:" + ("python.exe" if "windows" in item else "bin/python3") for item in PLATFORMS}}))
+toolchain_aliases(
+ name = "{py_repository}",
+ platforms = PLATFORMS,
+)
""".format(
py_repository = rctx.attr.user_repository_name,
loaded_platforms = "\n".join([" \"{}\",".format(p) for p in rctx.attr.platforms]),
)
- if not is_windows:
- build_contents += """\
-alias(name = "pip", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_runtimes" for item in PLATFORMS if "windows" not in item}}))
-""".format(
- py_repository = rctx.attr.user_repository_name,
- host_platform = host_platform,
- )
rctx.file("BUILD.bazel", build_contents)
# Expose a Starlark file so rules can know what host platform we used and where to find an interpreter
@@ -181,9 +162,6 @@
)
load("{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements")
-host_platform = "{host_platform}"
-interpreter = "@{py_repository}_{host_platform}//:{python3_binary_path}"
-
def py_binary(name, **kwargs):
return _py_binary(
name = name,
@@ -214,10 +192,7 @@
)
""".format(
- host_platform = host_platform,
- py_repository = rctx.attr.user_repository_name,
python_version = rctx.attr.python_version,
- python3_binary_path = python3_binary_path,
rules_python = get_repository_name(rctx.attr._rules_python_workspace),
))
@@ -243,15 +218,21 @@
)
def _host_toolchain_impl(rctx):
- logger = repo_utils.logger(rctx)
rctx.file("BUILD.bazel", """\
# Generated by python/private/toolchains_repo.bzl
exports_files(["python"], visibility = ["//visibility:public"])
""")
- (os_name, arch) = _get_host_os_arch(rctx, logger)
- host_platform = _get_host_platform(os_name, arch)
+ os_name = repo_utils.get_platforms_os_name(rctx)
+ host_platform = _get_host_platform(
+ rctx = rctx,
+ logger = repo_utils.logger(rctx),
+ python_version = rctx.attr.python_version,
+ os_name = os_name,
+ cpu_name = repo_utils.get_platforms_cpu_name(rctx),
+ platforms = rctx.attr.platforms,
+ )
repo = "@@{py_repository}_{host_platform}".format(
py_repository = rctx.attr.name[:-len("_host")],
host_platform = host_platform,
@@ -320,6 +301,8 @@
this repo causes an eager fetch of the toolchain for the host platform.
""",
attrs = {
+ "platforms": attr.string_list(mandatory = True),
+ "python_version": attr.string(mandatory = True),
"_rule_name": attr.string(default = "host_toolchain"),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
@@ -336,16 +319,12 @@
load(
"@{repository_name}//:defs.bzl",
_compile_pip_requirements = "compile_pip_requirements",
- _host_platform = "host_platform",
- _interpreter = "interpreter",
_py_binary = "py_binary",
_py_console_script_binary = "py_console_script_binary",
_py_test = "py_test",
)
compile_pip_requirements = _compile_pip_requirements
-host_platform = _host_platform
-interpreter = _interpreter
py_binary = _py_binary
py_console_script_binary = _py_console_script_binary
py_test = _py_test
@@ -388,57 +367,51 @@
def sanitize_platform_name(platform):
return platform.replace("-", "_")
-def _get_host_platform(os_name, arch):
+def _get_host_platform(*, rctx, logger, python_version, os_name, cpu_name, platforms):
"""Gets the host platform.
Args:
- os_name: the host OS name.
- arch: the host arch.
+ rctx: {type}`repository_ctx`.
+ logger: {type}`struct`.
+ python_version: {type}`string`.
+ os_name: {type}`str` the host OS name.
+ cpu_name: {type}`str` the host CPU name.
+ platforms: {type}`list[str]` the list of loaded platforms.
Returns:
The host platform.
"""
- host_platform = None
- for platform, meta in PLATFORMS.items():
- if "freethreaded" in platform:
- continue
+ candidates = []
+ for platform in platforms:
+ meta = PLATFORMS[platform]
- if meta.os_name == os_name and meta.arch == arch:
- host_platform = platform
- if not host_platform:
- fail("No platform declared for host OS {} on arch {}".format(os_name, arch))
- return host_platform
+ if meta.os_name == os_name and meta.arch == cpu_name:
+ candidates.append(platform)
-def _get_host_os_arch(rctx, logger):
- """Infer the host OS name and arch from a repository context.
+ if len(candidates) == 1:
+ return candidates[0]
- Args:
- rctx: Bazel's repository_ctx.
- logger: Logger to use for operations.
+ if candidates:
+ env_var = "RULES_PYTHON_REPO_TOOLCHAIN_{}_{}_{}".format(
+ python_version.replace(".", "_"),
+ os_name.upper(),
+ cpu_name.upper(),
+ )
+ preference = repo_utils.getenv(rctx, env_var)
+ if preference == None:
+ logger.info("Consider using '{}' to select from one of the platforms: {}".format(
+ env_var,
+ candidates,
+ ))
+ elif preference not in candidates:
+ return logger.fail("Please choose a preferred interpreter out of the following platforms: {}".format(candidates))
+ else:
+ candidates = [preference]
- Returns:
- A tuple with the host OS name and arch.
- """
- os_name = rctx.os.name
+ if candidates:
+ return candidates[0]
- # We assume the arch for Windows is always x86_64.
- if "windows" in os_name.lower():
- arch = "x86_64"
-
- # Normalize the os_name. E.g. os_name could be "OS windows server 2019".
- os_name = WINDOWS_NAME
- else:
- # This is not ideal, but bazel doesn't directly expose arch.
- arch = repo_utils.execute_unchecked(
- rctx,
- op = "GetUname",
- arguments = [repo_utils.which_checked(rctx, "uname"), "-m"],
- logger = logger,
- ).stdout.strip()
-
- # Normalize the os_name.
- if "mac" in os_name.lower():
- os_name = MACOS_NAME
- elif "linux" in os_name.lower():
- os_name = LINUX_NAME
-
- return (os_name, arch)
+ return logger.fail("Could not find a compatible 'host' python for '{os_name}', '{cpu_name}' from the loaded platforms: {platforms}".format(
+ os_name = os_name,
+ cpu_name = cpu_name,
+ platforms = platforms,
+ ))
diff --git a/python/versions.bzl b/python/versions.bzl
index 688c4e2..d229b9d 100644
--- a/python/versions.bzl
+++ b/python/versions.bzl
@@ -15,8 +15,8 @@
"""The Python versions we use for the toolchains.
"""
-# Values returned by https://bazel.build/rules/lib/repository_os.
-MACOS_NAME = "mac os"
+# Values present in the @platforms//os package
+MACOS_NAME = "osx"
LINUX_NAME = "linux"
WINDOWS_NAME = "windows"
FREETHREADED = "freethreaded"
@@ -620,9 +620,8 @@
],
flag_values = {},
os_name = MACOS_NAME,
- # Matches the value returned from:
- # repository_ctx.execute(["uname", "-m"]).stdout.strip()
- arch = "arm64",
+ # Matches the value in @platforms//cpu package
+ arch = "aarch64",
),
"aarch64-unknown-linux-gnu": struct(
compatible_with = [
@@ -633,9 +632,7 @@
libc: "glibc",
},
os_name = LINUX_NAME,
- # Note: this string differs between OSX and Linux
- # Matches the value returned from:
- # repository_ctx.execute(["uname", "-m"]).stdout.strip()
+ # Matches the value in @platforms//cpu package
arch = "aarch64",
),
"armv7-unknown-linux-gnu": struct(
@@ -647,7 +644,8 @@
libc: "glibc",
},
os_name = LINUX_NAME,
- arch = "armv7",
+ # Matches the value in @platforms//cpu package
+ arch = "arm",
),
"i386-unknown-linux-gnu": struct(
compatible_with = [
@@ -658,7 +656,8 @@
libc: "glibc",
},
os_name = LINUX_NAME,
- arch = "i386",
+ # Matches the value in @platforms//cpu package
+ arch = "x86_32",
),
"ppc64le-unknown-linux-gnu": struct(
compatible_with = [
@@ -669,10 +668,8 @@
libc: "glibc",
},
os_name = LINUX_NAME,
- # Note: this string differs between OSX and Linux
- # Matches the value returned from:
- # repository_ctx.execute(["uname", "-m"]).stdout.strip()
- arch = "ppc64le",
+ # Matches the value in @platforms//cpu package
+ arch = "ppc",
),
"riscv64-unknown-linux-gnu": struct(
compatible_with = [
@@ -683,6 +680,7 @@
Label("//python/config_settings:py_linux_libc"): "glibc",
},
os_name = LINUX_NAME,
+ # Matches the value in @platforms//cpu package
arch = "riscv64",
),
"s390x-unknown-linux-gnu": struct(
@@ -694,9 +692,7 @@
Label("//python/config_settings:py_linux_libc"): "glibc",
},
os_name = LINUX_NAME,
- # Note: this string differs between OSX and Linux
- # Matches the value returned from:
- # repository_ctx.execute(["uname", "-m"]).stdout.strip()
+ # Matches the value in @platforms//cpu package
arch = "s390x",
),
"x86_64-apple-darwin": struct(
@@ -706,6 +702,7 @@
],
flag_values = {},
os_name = MACOS_NAME,
+ # Matches the value in @platforms//cpu package
arch = "x86_64",
),
"x86_64-pc-windows-msvc": struct(
@@ -715,6 +712,7 @@
],
flag_values = {},
os_name = WINDOWS_NAME,
+ # Matches the value in @platforms//cpu package
arch = "x86_64",
),
"x86_64-unknown-linux-gnu": struct(
@@ -726,6 +724,7 @@
libc: "glibc",
},
os_name = LINUX_NAME,
+ # Matches the value in @platforms//cpu package
arch = "x86_64",
),
}
diff --git a/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE b/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE
index 48caeb4..7834000 100644
--- a/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE
+++ b/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE
@@ -12,7 +12,6 @@
python_version = "3.9",
)
-load("@python39//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
local_repository(
@@ -22,7 +21,7 @@
pip_parse(
name = "pypi",
- python_interpreter_target = interpreter,
+ python_interpreter_target = "@python39_host//:python",
requirements_lock = "@compile_pip_requirements//:requirements_lock.txt",
)