blob: 0fd05c66253fba5562f4a5c5c2f3e3846093f8bf [file] [log] [blame]
# Copyright 2022 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a repository to hold the toolchains.
This follows guidance here:
https://docs.bazel.build/versions/main/skylark/deploying.html#registering-toolchains
The "complex computation" in our case is simply downloading large artifacts.
This guidance tells us how to avoid that: we put the toolchain targets in the
alias repository with only the toolchain attribute pointing into the
platform-specific repositories.
"""
load(
"//python:versions.bzl",
"FREETHREADED",
"MUSL",
"PLATFORMS",
"WINDOWS_NAME",
)
load(":repo_utils.bzl", "REPO_DEBUG_ENV_VAR", "repo_utils")
load(":text_util.bzl", "render")
_SUITE_TEMPLATE = """
py_toolchain_suite(
flag_values = {flag_values},
target_settings = {target_settings},
prefix = {prefix},
python_version = {python_version},
set_python_version_constraint = {set_python_version_constraint},
target_compatible_with = {target_compatible_with},
user_repository_name = {user_repository_name},
)
""".lstrip()
_WORKSPACE_TOOLCHAINS_BUILD_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
#
# These can be registered in the workspace file or passed to --extra_toolchains
# flag. By default all these toolchains are registered by the
# python_register_toolchains macro so you don't normally need to interact with
# these targets.
load("@@{rules_python}//python/private:py_toolchain_suite.bzl", "py_toolchain_suite")
""".lstrip()
_TOOLCHAIN_ALIASES_BUILD_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@rules_python//python/private:toolchain_aliases.bzl", "toolchain_aliases")
package(default_visibility = ["//visibility:public"])
exports_files(["defs.bzl"])
PLATFORMS = [
{loaded_platforms}
]
toolchain_aliases(
name = "{py_repository}",
platforms = PLATFORMS,
)
""".lstrip()
_TOOLCHAIN_ALIASES_DEFS_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@@{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements")
load("@@{rules_python}//python/private:deprecation.bzl", "with_deprecation")
load("@@{rules_python}//python/private:text_util.bzl", "render")
load("@@{rules_python}//python:py_binary.bzl", _py_binary = "py_binary")
load("@@{rules_python}//python:py_test.bzl", _py_test = "py_test")
load(
"@@{rules_python}//python/entry_points:py_console_script_binary.bzl",
_py_console_script_binary = "py_console_script_binary",
)
def _with_deprecation(kwargs, *, name):
kwargs["python_version"] = "{python_version}"
return with_deprecation.symbol(
kwargs,
symbol_name = name,
old_load = "@{name}//:defs.bzl",
new_load = "@rules_python//python:{{}}.bzl".format(name),
snippet = render.call(name, **{{k: repr(v) for k,v in kwargs.items()}})
)
def py_binary(**kwargs):
return _py_binary(**_with_deprecation(kwargs, name = "py_binary"))
def py_console_script_binary(**kwargs):
return _py_console_script_binary(**_with_deprecation(kwargs, name = "py_console_script_binary"))
def py_test(**kwargs):
return _py_test(**_with_deprecation(kwargs, name = "py_test"))
def compile_pip_requirements(**kwargs):
return _compile_pip_requirements(**_with_deprecation(kwargs, name = "compile_pip_requirements"))
""".lstrip()
_HOST_TOOLCHAIN_BUILD_CONTENT = """
# Generated by python/private/toolchains_repo.bzl
exports_files(["python"], visibility = ["//visibility:public"])
""".lstrip()
_HOST_PYTHON_TESTER_TEMPLATE = """
from pathlib import Path
import sys
python = Path(sys.executable)
want_python = str(Path("{python}").resolve())
got_python = str(Path(sys.executable).resolve())
assert want_python == got_python, \
"Expected to use a different interpreter:\\nwant: '{{}}'\\n got: '{{}}'".format(
want_python,
got_python,
)
""".lstrip()
_MULTI_TOOLCHAIN_ALIASES_DEFS_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@@{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements")
load("@@{rules_python}//python/private:deprecation.bzl", "with_deprecation")
load("@@{rules_python}//python/private:text_util.bzl", "render")
load("@@{rules_python}//python:py_binary.bzl", _py_binary = "py_binary")
load("@@{rules_python}//python:py_test.bzl", _py_test = "py_test")
load(
"@@{rules_python}//python/entry_points:py_console_script_binary.bzl",
_py_console_script_binary = "py_console_script_binary",
)
def _with_deprecation(kwargs, *, name):
kwargs["python_version"] = "{python_version}"
return with_deprecation.symbol(
kwargs,
symbol_name = name,
old_load = "@{name}//{python_version}:defs.bzl",
new_load = "@rules_python//python:{{}}.bzl".format(name),
snippet = render.call(name, **{{k: repr(v) for k,v in kwargs.items()}})
)
def py_binary(**kwargs):
return _py_binary(**_with_deprecation(kwargs, name = "py_binary"))
def py_console_script_binary(**kwargs):
return _py_console_script_binary(**_with_deprecation(kwargs, name = "py_console_script_binary"))
def py_test(**kwargs):
return _py_test(**_with_deprecation(kwargs, name = "py_test"))
def compile_pip_requirements(**kwargs):
return _compile_pip_requirements(**_with_deprecation(kwargs, name = "compile_pip_requirements"))
""".lstrip()
_MULTI_TOOLCHAIN_ALIASES_PIP_TEMPLATE = """
# Generated by python/private/toolchains_repo.bzl
load("@@{rules_python}//python:pip.bzl", "pip_parse", _multi_pip_parse = "multi_pip_parse")
def multi_pip_parse(name, requirements_lock, **kwargs):
return _multi_pip_parse(
name = name,
python_versions = {python_versions},
requirements_lock = requirements_lock,
minor_mapping = {minor_mapping},
**kwargs
)
""".lstrip()
def python_toolchain_build_file_content(
prefix,
python_version,
set_python_version_constraint,
user_repository_name,
loaded_platforms):
"""Creates the content for toolchain definitions for a build file.
Args:
prefix: Python toolchain name prefixes
python_version: Python versions for the toolchains
set_python_version_constraint: string, "True" if the toolchain should
have the Python version constraint added as a requirement for
matching the toolchain, "False" if not.
user_repository_name: names for the user repos
loaded_platforms: {type}`struct` the list of platform structs defining the
loaded platforms. It is as they are defined in `//python:versions.bzl`.
Returns:
build_content: Text containing toolchain definitions
"""
entries = []
for platform, meta in loaded_platforms.items():
entries.append(toolchain_suite_content(
target_compatible_with = meta.compatible_with,
flag_values = meta.flag_values,
prefix = "{}{}".format(prefix, platform),
user_repository_name = "{}_{}".format(user_repository_name, platform),
python_version = python_version,
set_python_version_constraint = set_python_version_constraint,
target_settings = [],
))
return "\n\n".join(entries)
def toolchain_suite_content(
*,
flag_values,
prefix,
python_version,
set_python_version_constraint,
target_compatible_with,
target_settings,
user_repository_name):
return _SUITE_TEMPLATE.format(
prefix = render.str(prefix),
user_repository_name = render.str(user_repository_name),
target_compatible_with = render.indent(render.list(target_compatible_with)).lstrip(),
flag_values = render.indent(render.dict(
flag_values,
key_repr = lambda x: repr(str(x)), # this is to correctly display labels
)).lstrip(),
target_settings = render.list(target_settings, hanging_indent = " "),
set_python_version_constraint = render.str(set_python_version_constraint),
python_version = render.str(python_version),
)
def _toolchains_repo_impl(rctx):
build_content = _WORKSPACE_TOOLCHAINS_BUILD_TEMPLATE.format(
rules_python = rctx.attr._rules_python_workspace.repo_name,
)
toolchains = python_toolchain_build_file_content(
prefix = "",
python_version = rctx.attr.python_version,
set_python_version_constraint = str(rctx.attr.set_python_version_constraint),
user_repository_name = rctx.attr.user_repository_name,
loaded_platforms = {
k: v
for k, v in PLATFORMS.items()
if k in rctx.attr.platforms
},
)
rctx.file("BUILD.bazel", build_content + toolchains)
toolchains_repo = repository_rule(
_toolchains_repo_impl,
doc = "Creates a repository with toolchain definitions for all known platforms " +
"which can be registered or selected.",
attrs = {
"platforms": attr.string_list(doc = "List of platforms for which the toolchain definitions shall be created"),
"python_version": attr.string(doc = "The Python version."),
"set_python_version_constraint": attr.bool(doc = "if target_compatible_with for the toolchain should set the version constraint"),
"user_repository_name": attr.string(doc = "what the user chose for the base name"),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
)
def _toolchain_aliases_impl(rctx):
# Base BUILD file for this repository.
build_contents = _TOOLCHAIN_ALIASES_BUILD_TEMPLATE.format(
py_repository = rctx.attr.user_repository_name,
loaded_platforms = "\n".join([" \"{}\",".format(p) for p in rctx.attr.platforms]),
)
rctx.file("BUILD.bazel", build_contents)
# Expose a Starlark file so rules can know what host platform we used and where to find an interpreter
# when using repository_ctx.path, which doesn't understand aliases.
rctx.file("defs.bzl", content = _TOOLCHAIN_ALIASES_DEFS_TEMPLATE.format(
name = rctx.attr.name,
python_version = rctx.attr.python_version,
rules_python = rctx.attr._rules_python_workspace.repo_name,
))
toolchain_aliases = repository_rule(
_toolchain_aliases_impl,
doc = """\
Creates a repository with a shorter name only referencing the python version,
it contains a BUILD.bazel file declaring aliases to the host platform's targets
and is a great fit for any usage related to setting up toolchains for build
actions.""",
attrs = {
"platforms": attr.string_list(
doc = "List of platforms for which aliases shall be created",
),
"python_version": attr.string(doc = "The Python version."),
"user_repository_name": attr.string(
mandatory = True,
doc = "The base name for all created repositories, like 'python38'.",
),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
environ = [REPO_DEBUG_ENV_VAR],
)
def _host_toolchain_impl(rctx):
rctx.file("BUILD.bazel", _HOST_TOOLCHAIN_BUILD_CONTENT)
os_name = repo_utils.get_platforms_os_name(rctx)
host_platform = _get_host_platform(
rctx = rctx,
logger = repo_utils.logger(rctx),
python_version = rctx.attr.python_version,
os_name = os_name,
cpu_name = repo_utils.get_platforms_cpu_name(rctx),
platforms = rctx.attr.platforms,
)
repo = "@@{py_repository}_{host_platform}".format(
py_repository = rctx.attr.name[:-len("_host")],
host_platform = host_platform,
)
rctx.report_progress("Symlinking interpreter files to the target platform")
host_python_repo = rctx.path(Label("{repo}//:BUILD.bazel".format(repo = repo)))
# The interpreter might not work on platfroms that don't have symlink support if
# we just symlink the interpreter itself. rctx.symlink does a copy in such cases
# so we can just attempt to symlink all of the directories in the host interpreter
# repo, which should be faster than re-downloading it.
for p in host_python_repo.dirname.readdir():
if p.basename in [
# ignore special files created by the repo rule automatically
"BUILD.bazel",
"MODULE.bazel",
"REPO.bazel",
"WORKSPACE",
"WORKSPACE.bazel",
"WORKSPACE.bzlmod",
]:
continue
# symlink works on all platforms that bazel supports, so it should work on
# UNIX and Windows with and without symlink support. For better performance
# users should enable the symlink startup option, however that requires admin
# privileges.
rctx.symlink(p, p.basename)
is_windows = (os_name == WINDOWS_NAME)
python_binary = "python.exe" if is_windows else "python"
# Ensure that we can run the interpreter and check that we are not
# using the host interpreter.
python_tester_contents = _HOST_PYTHON_TESTER_TEMPLATE.format(
repo = repo.strip("@"),
python = python_binary,
)
python_tester = rctx.path("python_tester.py")
rctx.file(python_tester, python_tester_contents)
repo_utils.execute_checked(
rctx,
op = "CheckHostInterpreter",
arguments = [
rctx.path(python_binary),
# Run the interpreter in isolated mode, this options implies -E, -P and -s.
# This ensures that environment variables are ignored that are set in userspace, such as PYTHONPATH,
# which may interfere with this invocation.
"-I",
python_tester,
],
)
if not rctx.delete(python_tester):
fail("Failed to delete the python tester")
# NOTE: The term "toolchain" is a misnomer for this rule. This doesn't define
# a repo with toolchains or toolchain implementations.
host_toolchain = repository_rule(
_host_toolchain_impl,
doc = """\
Creates a repository with a shorter name meant to be used in the repository_ctx,
which needs to have `symlinks` for the interpreter. This is separate from the
toolchain_aliases repo because referencing the `python` interpreter target from
this repo causes an eager fetch of the toolchain for the host platform.
""",
attrs = {
"arch_names": attr.string_dict(
doc = """
If set, overrides the platform metadata. Keyed by index in `platforms`
""",
),
"os_names": attr.string_dict(
doc = """
If set, overrides the platform metadata. Keyed by index in `platforms`
""",
),
"platforms": attr.string_list(mandatory = True),
"python_version": attr.string(mandatory = True),
"_rule_name": attr.string(default = "host_toolchain"),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
)
def _multi_toolchain_aliases_impl(rctx):
rules_python = rctx.attr._rules_python_workspace.repo_name
for python_version, repository_name in rctx.attr.python_versions.items():
file = "{}/defs.bzl".format(python_version)
rctx.file(file, content = _MULTI_TOOLCHAIN_ALIASES_DEFS_TEMPLATE.format(
repository_name = repository_name,
name = rctx.attr.name,
python_version = python_version,
rules_python = rules_python,
))
rctx.file("{}/BUILD.bazel".format(python_version), "")
pip_bzl = _MULTI_TOOLCHAIN_ALIASES_PIP_TEMPLATE.format(
python_versions = rctx.attr.python_versions.keys(),
minor_mapping = render.indent(render.dict(rctx.attr.minor_mapping), indent = " " * 8).lstrip(),
rules_python = rules_python,
)
rctx.file("pip.bzl", content = pip_bzl)
rctx.file("BUILD.bazel", "")
multi_toolchain_aliases = repository_rule(
_multi_toolchain_aliases_impl,
attrs = {
"minor_mapping": attr.string_dict(doc = "The mapping between `X.Y` and `X.Y.Z` python version values"),
"python_versions": attr.string_dict(doc = "The Python versions."),
"_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
},
)
def sorted_host_platforms(platform_map):
"""Sort the keys in the platform map to give correct precedence.
The order of keys in the platform mapping matters for the host toolchain
selection. When multiple runtimes are compatible with the host, we take the
first that is compatible (usually; there's also the
`RULES_PYTHON_REPO_TOOLCHAIN_*` environment variables). The historical
behavior carefully constructed the ordering of platform keys such that
the ordering was:
* Regular platforms
* The "-freethreaded" suffix
* The "-musl" suffix
Here, we formalize that so it isn't subtly encoded in the ordering of keys
in a dict that autoformatters like to clobber and whose only documentation
is an innocous looking formatter disable directive.
Args:
platform_map: a mapping of platforms and their metadata.
Returns:
dict; the same values, but with the keys inserted in the desired
order so that iteration happens in the desired order.
"""
def platform_keyer(name):
# Ascending sort: lower is higher precedence
return (
1 if MUSL in name else 0,
1 if FREETHREADED in name else 0,
)
sorted_platform_keys = sorted(platform_map.keys(), key = platform_keyer)
return {
key: platform_map[key]
for key in sorted_platform_keys
}
def _get_host_platform(*, rctx, logger, python_version, os_name, cpu_name, platforms):
"""Gets the host platform.
Args:
rctx: {type}`repository_ctx`.
logger: {type}`struct`.
python_version: {type}`string`.
os_name: {type}`str` the host OS name.
cpu_name: {type}`str` the host CPU name.
platforms: {type}`list[str]` the list of loaded platforms.
Returns:
The host platform.
"""
if rctx.attr.os_names:
platform_map = {}
for i, platform_name in enumerate(platforms):
key = str(i)
platform_map[platform_name] = struct(
os_name = rctx.attr.os_names[key],
arch = rctx.attr.arch_names[key],
)
else:
platform_map = sorted_host_platforms(PLATFORMS)
candidates = []
for platform in platforms:
meta = platform_map[platform]
if meta.os_name == os_name and meta.arch == cpu_name:
candidates.append(platform)
if len(candidates) == 1:
return candidates[0]
if candidates:
env_var = "RULES_PYTHON_REPO_TOOLCHAIN_{}_{}_{}".format(
python_version.replace(".", "_"),
os_name.upper(),
cpu_name.upper(),
)
preference = repo_utils.getenv(rctx, env_var)
if preference == None:
logger.info("Consider using '{}' to select from one of the platforms: {}".format(
env_var,
candidates,
))
elif preference not in candidates:
return logger.fail("Please choose a preferred interpreter out of the following platforms: {}".format(candidates))
else:
candidates = [preference]
if candidates:
return candidates[0]
return logger.fail("Could not find a compatible 'host' python for '{os_name}', '{cpu_name}' from the loaded platforms: {platforms}".format(
os_name = os_name,
cpu_name = cpu_name,
platforms = platforms,
))