blob: 93bbb52108fa9b125a8ffa2ce5ea49e2c00581e8 [file] [log] [blame]
# Copyright 2022 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a repository to hold the toolchains.
This follows guidance here:
https://docs.bazel.build/versions/main/skylark/deploying.html#registering-toolchains
The "complex computation" in our case is simply downloading large artifacts.
This guidance tells us how to avoid that: we put the toolchain targets in the
alias repository with only the toolchain attribute pointing into the
platform-specific repositories.
"""
load(
"//python:versions.bzl",
"FREETHREADED",
"MUSL",
"PLATFORMS",
"WINDOWS_NAME",
)
load(":repo_utils.bzl", "REPO_DEBUG_ENV_VAR", "repo_utils")
load(":text_util.bzl", "render")
_SUITE_TEMPLATE = """
py_toolchain_suite(
flag_values = {flag_values},
target_settings = {target_settings},
prefix = {prefix},
python_version = {python_version},
set_python_version_constraint = {set_python_version_constraint},
target_compatible_with = {target_compatible_with},
user_repository_name = {user_repository_name},
)
""".lstrip()
_WORKSPACE_TOOLCHAINS_BUILD_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
#
# These can be registered in the workspace file or passed to --extra_toolchains
# flag. By default all these toolchains are registered by the
# python_register_toolchains macro so you don't normally need to interact with
# these targets.
load("@@{rules_python}//python/private:py_toolchain_suite.bzl", "py_toolchain_suite")
""".lstrip()
_TOOLCHAIN_ALIASES_BUILD_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@rules_python//python/private:toolchain_aliases.bzl", "toolchain_aliases")
package(default_visibility = ["//visibility:public"])
exports_files(["defs.bzl"])
PLATFORMS = [
{loaded_platforms}
]
toolchain_aliases(
name = "{py_repository}",
platforms = PLATFORMS,
)
""".lstrip()
_TOOLCHAIN_ALIASES_DEFS_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@@{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements")
load("@@{rules_python}//python/private:deprecation.bzl", "with_deprecation")
load("@@{rules_python}//python/private:text_util.bzl", "render")
load("@@{rules_python}//python:py_binary.bzl", _py_binary = "py_binary")
load("@@{rules_python}//python:py_test.bzl", _py_test = "py_test")
load(
"@@{rules_python}//python/entry_points:py_console_script_binary.bzl",
_py_console_script_binary = "py_console_script_binary",
)
def _with_deprecation(kwargs, *, name):
kwargs["python_version"] = "{python_version}"
return with_deprecation.symbol(
kwargs,
symbol_name = name,
old_load = "@{name}//:defs.bzl",
new_load = "@rules_python//python:{{}}.bzl".format(name),
snippet = render.call(name, **{{k: repr(v) for k,v in kwargs.items()}})
)
def py_binary(**kwargs):
return _py_binary(**_with_deprecation(kwargs, name = "py_binary"))
def py_console_script_binary(**kwargs):
return _py_console_script_binary(**_with_deprecation(kwargs, name = "py_console_script_binary"))
def py_test(**kwargs):
return _py_test(**_with_deprecation(kwargs, name = "py_test"))
def compile_pip_requirements(**kwargs):
return _compile_pip_requirements(**_with_deprecation(kwargs, name = "compile_pip_requirements"))
""".lstrip()
_HOST_TOOLCHAIN_BUILD_CONTENT = """
# Generated by python/private/toolchains_repo.bzl
exports_files(["python"], visibility = ["//visibility:public"])
""".lstrip()
_HOST_PYTHON_TESTER_TEMPLATE = """
from pathlib import Path
import sys
python = Path(sys.executable)
want_python = str(Path("{python}").resolve())
got_python = str(Path(sys.executable).resolve())
assert want_python == got_python, \
"Expected to use a different interpreter:\\nwant: '{{}}'\\n got: '{{}}'".format(
want_python,
got_python,
)
""".lstrip()
_MULTI_TOOLCHAIN_ALIASES_DEFS_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@@{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements")
load("@@{rules_python}//python/private:deprecation.bzl", "with_deprecation")
load("@@{rules_python}//python/private:text_util.bzl", "render")
load("@@{rules_python}//python:py_binary.bzl", _py_binary = "py_binary")
load("@@{rules_python}//python:py_test.bzl", _py_test = "py_test")
load(
"@@{rules_python}//python/entry_points:py_console_script_binary.bzl",
_py_console_script_binary = "py_console_script_binary",
)
def _with_deprecation(kwargs, *, name):
kwargs["python_version"] = "{python_version}"
return with_deprecation.symbol(
kwargs,
symbol_name = name,
old_load = "@{name}//{python_version}:defs.bzl",
new_load = "@rules_python//python:{{}}.bzl".format(name),
snippet = render.call(name, **{{k: repr(v) for k,v in kwargs.items()}})
)
def py_binary(**kwargs):
return _py_binary(**_with_deprecation(kwargs, name = "py_binary"))
def py_console_script_binary(**kwargs):
return _py_console_script_binary(**_with_deprecation(kwargs, name = "py_console_script_binary"))
def py_test(**kwargs):
return _py_test(**_with_deprecation(kwargs, name = "py_test"))
def compile_pip_requirements(**kwargs):
return _compile_pip_requirements(**_with_deprecation(kwargs, name = "compile_pip_requirements"))
""".lstrip()
_MULTI_TOOLCHAIN_ALIASES_PIP_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@@{rules_python}//python:pip.bzl", "pip_parse", _multi_pip_parse = "multi_pip_parse")
def multi_pip_parse(name, requirements_lock, **kwargs):
return _multi_pip_parse(
name = name,
python_versions = {python_versions},
requirements_lock = requirements_lock,
minor_mapping = {minor_mapping},
**kwargs
)
""".lstrip()
def python_toolchain_build_file_content(
prefix,
python_version,
set_python_version_constraint,
user_repository_name,
loaded_platforms):
"""Creates the content for toolchain definitions for a build file.
Args:
prefix: Python toolchain name prefixes
python_version: Python versions for the toolchains
set_python_version_constraint: string, "True" if the toolchain should
have the Python version constraint added as a requirement for
matching the toolchain, "False" if not.
user_repository_name: names for the user repos
loaded_platforms: {type}`struct` the list of platform structs defining the
loaded platforms. It is as they are defined in `//python:versions.bzl`.
Returns:
build_content: Text containing toolchain definitions
"""
entries = []
for platform, meta in loaded_platforms.items():
entries.append(toolchain_suite_content(
target_compatible_with = meta.compatible_with,
flag_values = meta.flag_values,
prefix = "{}{}".format(prefix, platform),
user_repository_name = "{}_{}".format(user_repository_name, platform),
python_version = python_version,
set_python_version_constraint = set_python_version_constraint,
target_settings = [],
))
return "\n\n".join(entries)
def toolchain_suite_content(
*,
flag_values,
prefix,
python_version,
set_python_version_constraint,
target_compatible_with,
target_settings,
user_repository_name):
return _SUITE_TEMPLATE.format(
prefix = render.str(prefix),
user_repository_name = render.str(user_repository_name),
target_compatible_with = render.indent(render.list(target_compatible_with)).lstrip(),
flag_values = render.indent(render.dict(
flag_values,
key_repr = lambda x: repr(str(x)), # this is to correctly display labels
)).lstrip(),
target_settings = render.list(target_settings, hanging_indent = " "),
set_python_version_constraint = render.str(set_python_version_constraint),
python_version = render.str(python_version),
)
def _toolchains_repo_impl(rctx):
build_content = _WORKSPACE_TOOLCHAINS_BUILD_TEMPLATE.format(
rules_python = rctx.attr._rules_python_workspace.repo_name,
)
toolchains = python_toolchain_build_file_content(
prefix = "",
python_version = rctx.attr.python_version,
set_python_version_constraint = str(rctx.attr.set_python_version_constraint),
user_repository_name = rctx.attr.user_repository_name,
loaded_platforms = {
k: v
for k, v in PLATFORMS.items()
if k in rctx.attr.platforms
},
)
rctx.file("BUILD.bazel", build_content + toolchains)
toolchains_repo = repository_rule(
_toolchains_repo_impl,
doc = "Creates a repository with toolchain definitions for all known platforms " +
"which can be registered or selected.",
attrs = {
"platforms": attr.string_list(doc = "List of platforms for which the toolchain definitions shall be created"),
"python_version": attr.string(doc = "The Python version."),
"set_python_version_constraint": attr.bool(doc = "if target_compatible_with for the toolchain should set the version constraint"),
"user_repository_name": attr.string(doc = "what the user chose for the base name"),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
)
def _toolchain_aliases_impl(rctx):
# Base BUILD file for this repository.
build_contents = _TOOLCHAIN_ALIASES_BUILD_TEMPLATE.format(
py_repository = rctx.attr.user_repository_name,
loaded_platforms = "\n".join([" \"{}\",".format(p) for p in rctx.attr.platforms]),
)
rctx.file("BUILD.bazel", build_contents)
# Expose a Starlark file so rules can know what host platform we used and where to find an interpreter
# when using repository_ctx.path, which doesn't understand aliases.
rctx.file("defs.bzl", content = _TOOLCHAIN_ALIASES_DEFS_TEMPLATE.format(
name = rctx.attr.name,
python_version = rctx.attr.python_version,
rules_python = rctx.attr._rules_python_workspace.repo_name,
))
toolchain_aliases = repository_rule(
_toolchain_aliases_impl,
doc = """\
Creates a repository with a shorter name only referencing the python version,
it contains a BUILD.bazel file declaring aliases to the host platform's targets
and is a great fit for any usage related to setting up toolchains for build
actions.""",
attrs = {
"platforms": attr.string_list(
doc = "List of platforms for which aliases shall be created",
),
"python_version": attr.string(doc = "The Python version."),
"user_repository_name": attr.string(
mandatory = True,
doc = "The base name for all created repositories, like 'python38'.",
),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
environ = [REPO_DEBUG_ENV_VAR],
)
def _host_compatible_python_repo_impl(rctx):
rctx.file("BUILD.bazel", _HOST_TOOLCHAIN_BUILD_CONTENT)
os_name = repo_utils.get_platforms_os_name(rctx)
impl_repo_name = _get_host_impl_repo_name(
rctx = rctx,
logger = repo_utils.logger(rctx),
python_version = rctx.attr.python_version,
os_name = os_name,
cpu_name = repo_utils.get_platforms_cpu_name(rctx),
platforms = rctx.attr.platforms,
)
# Bzlmod quirk: A repository rule can't, in its **implemention function**,
# resolve an apparent repo name referring to a repo created by the same
# bzlmod extension. To work around this, we use a canonical label.
repo = "@@{}".format(impl_repo_name)
rctx.report_progress("Symlinking interpreter files to the target platform")
host_python_repo = rctx.path(Label("{repo}//:BUILD.bazel".format(repo = repo)))
# The interpreter might not work on platfroms that don't have symlink support if
# we just symlink the interpreter itself. rctx.symlink does a copy in such cases
# so we can just attempt to symlink all of the directories in the host interpreter
# repo, which should be faster than re-downloading it.
for p in host_python_repo.dirname.readdir():
if p.basename in [
# ignore special files created by the repo rule automatically
"BUILD.bazel",
"MODULE.bazel",
"REPO.bazel",
"WORKSPACE",
"WORKSPACE.bazel",
"WORKSPACE.bzlmod",
]:
continue
# symlink works on all platforms that bazel supports, so it should work on
# UNIX and Windows with and without symlink support. For better performance
# users should enable the symlink startup option, however that requires admin
# privileges.
rctx.symlink(p, p.basename)
is_windows = (os_name == WINDOWS_NAME)
python_binary = "python.exe" if is_windows else "python"
# Ensure that we can run the interpreter and check that we are not
# using the host interpreter.
python_tester_contents = _HOST_PYTHON_TESTER_TEMPLATE.format(
repo = repo.strip("@"),
python = python_binary,
)
python_tester = rctx.path("python_tester.py")
rctx.file(python_tester, python_tester_contents)
repo_utils.execute_checked(
rctx,
op = "CheckHostInterpreter",
arguments = [
rctx.path(python_binary),
# Run the interpreter in isolated mode, this options implies -E, -P and -s.
# This ensures that environment variables are ignored that are set in userspace, such as PYTHONPATH,
# which may interfere with this invocation.
"-I",
python_tester,
],
)
if not rctx.delete(python_tester):
fail("Failed to delete the python tester")
# NOTE: The term "toolchain" is a misnomer for this rule. This doesn't define
# a repo with toolchains or toolchain implementations.
host_compatible_python_repo = repository_rule(
implementation = _host_compatible_python_repo_impl,
doc = """\
Creates a repository with a shorter name meant to be used in the repository_ctx,
which needs to have `symlinks` for the interpreter. This is separate from the
toolchain_aliases repo because referencing the `python` interpreter target from
this repo causes an eager fetch of the toolchain for the host platform.
This repo has two ways in which is it called:
1. Workspace. The `platforms` attribute is set, which are keys into the
PLATFORMS global. It assumes `name` + <matching platform name> is a
valid repo name which it can use as the backing repo.
2. Bzlmod. All platform and backing repo information is passed in via the
arch_names, impl_repo_names, os_names, python_versions attributes.
""",
attrs = {
"arch_names": attr.string_dict(
doc = """
Arch (cpu) names. Only set in bzlmod. Keyed by index in `platforms`
""",
),
"base_name": attr.string(
doc = """
The name arg, but without bzlmod canonicalization applied. Only set in bzlmod.
""",
),
"impl_repo_names": attr.string_dict(
doc = """
The names of backing runtime repos. Only set in bzlmod. The names must be repos
in the same extension as creates the host repo. Keyed by index in `platforms`.
""",
),
"os_names": attr.string_dict(
doc = """
If set, overrides the platform metadata. Only set in bzlmod. Keyed by
index in `platforms`
""",
),
"platforms": attr.string_list(
mandatory = True,
doc = """
Platform names (workspace) or platform name-like keys (bzlmod)
NOTE: The order of this list matters. The first platform that is compatible
with the host will be selected; this can be customized by using the
`RULES_PYTHON_REPO_TOOLCHAIN_*` env vars.
The values passed vary depending on workspace vs bzlmod.
Workspace: the values are keys into the `PLATFORMS` dict and are the suffix
to append to `name` to point to the backing repo name.
Bzlmod: The values are arbitrary keys to create the platform map from the
other attributes (os_name, arch_names, et al).
""",
),
"python_version": attr.string(
doc = """
Full python version, Major.Minor.Micro.
Only set in workspace calls.
""",
),
"python_versions": attr.string_dict(
doc = """
If set, the Python version for the corresponding selected platform. Values in
Major.Minor.Micro format. Keyed by index in `platforms`.
""",
),
"_rule_name": attr.string(default = "host_compatible_python_repo"),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
)
def _multi_toolchain_aliases_impl(rctx):
rules_python = rctx.attr._rules_python_workspace.repo_name
for python_version, repository_name in rctx.attr.python_versions.items():
file = "{}/defs.bzl".format(python_version)
rctx.file(file, content = _MULTI_TOOLCHAIN_ALIASES_DEFS_TEMPLATE.format(
repository_name = repository_name,
name = rctx.attr.name,
python_version = python_version,
rules_python = rules_python,
))
rctx.file("{}/BUILD.bazel".format(python_version), "")
pip_bzl = _MULTI_TOOLCHAIN_ALIASES_PIP_TEMPLATE.format(
python_versions = rctx.attr.python_versions.keys(),
minor_mapping = render.indent(render.dict(rctx.attr.minor_mapping), indent = " " * 8).lstrip(),
rules_python = rules_python,
)
rctx.file("pip.bzl", content = pip_bzl)
rctx.file("BUILD.bazel", "")
multi_toolchain_aliases = repository_rule(
_multi_toolchain_aliases_impl,
attrs = {
"minor_mapping": attr.string_dict(doc = "The mapping between `X.Y` and `X.Y.Z` python version values"),
"python_versions": attr.string_dict(doc = "The Python versions."),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
)
def sorted_host_platform_names(platform_names):
"""Sort platform names to give correct precedence.
The order of keys in the platform mapping matters for the host toolchain
selection. When multiple runtimes are compatible with the host, we take the
first that is compatible (usually; there's also the
`RULES_PYTHON_REPO_TOOLCHAIN_*` environment variables). The historical
behavior carefully constructed the ordering of platform keys such that
the ordering was:
* Regular platforms
* The "-freethreaded" suffix
* The "-musl" suffix
Here, we formalize that so it isn't subtly encoded in the ordering of keys
in a dict that autoformatters like to clobber and whose only documentation
is an innocous looking formatter disable directive.
Args:
platform_names: a list of platform names
Returns:
list[str] the same values, but in the desired order.
"""
def platform_keyer(name):
# Ascending sort: lower is higher precedence
return (
1 if MUSL in name else 0,
1 if FREETHREADED in name else 0,
)
return sorted(platform_names, key = platform_keyer)
def sorted_host_platforms(platform_map):
"""Sort the keys in the platform map to give correct precedence.
See sorted_host_platform_names for explanation.
Args:
platform_map: a mapping of platforms and their metadata.
Returns:
dict; the same values, but with the keys inserted in the desired
order so that iteration happens in the desired order.
"""
return {
key: platform_map[key]
for key in sorted_host_platform_names(platform_map.keys())
}
def _get_host_impl_repo_name(*, rctx, logger, python_version, os_name, cpu_name, platforms):
"""Gets the host platform.
Args:
rctx: {type}`repository_ctx`.
logger: {type}`struct`.
python_version: {type}`string`.
os_name: {type}`str` the host OS name.
cpu_name: {type}`str` the host CPU name.
platforms: {type}`list[str]` the list of loaded platforms.
Returns:
The host platform.
"""
if rctx.attr.os_names:
platform_map = {}
base_name = rctx.attr.base_name
if not base_name:
fail("The `base_name` attribute must be set under bzlmod")
for i, platform_name in enumerate(platforms):
key = str(i)
impl_repo_name = rctx.attr.impl_repo_names[key]
impl_repo_name = rctx.name.replace(base_name, impl_repo_name)
platform_map[platform_name] = struct(
os_name = rctx.attr.os_names[key],
arch = rctx.attr.arch_names[key],
python_version = rctx.attr.python_versions[key],
impl_repo_name = impl_repo_name,
)
else:
base_name = rctx.name.removesuffix("_host")
platform_map = {}
for platform_name, info in sorted_host_platforms(PLATFORMS).items():
platform_map[platform_name] = struct(
os_name = info.os_name,
arch = info.arch,
python_version = python_version,
impl_repo_name = "{}_{}".format(base_name, platform_name),
)
candidates = []
for platform in platforms:
meta = platform_map[platform]
if meta.os_name == os_name and meta.arch == cpu_name:
candidates.append((platform, meta))
if len(candidates) == 1:
platform_name, meta = candidates[0]
return meta.impl_repo_name
if candidates:
env_var = "RULES_PYTHON_REPO_TOOLCHAIN_{}_{}_{}".format(
python_version.replace(".", "_"),
os_name.upper(),
cpu_name.upper(),
)
preference = repo_utils.getenv(rctx, env_var)
if preference == None:
logger.info("Consider using '{}' to select from one of the platforms: {}".format(
env_var,
candidates,
))
elif preference not in candidates:
return logger.fail("Please choose a preferred interpreter out of the following platforms: {}".format(candidates))
else:
candidates = [preference]
if candidates:
platform_name, meta = candidates[0]
suffix = meta.impl_repo_name
if not suffix:
suffix = platform_name
return suffix
return logger.fail("Could not find a compatible 'host' python for '{os_name}', '{cpu_name}' from the loaded platforms: {platforms}".format(
os_name = os_name,
cpu_name = cpu_name,
platforms = platforms,
))