refactor!(toolchain): remove uname dep in the repository_rule stage (#2406)

Before this PR we would shell out to `uname` on UNIX systems to get the
`arch`
of the toolchain - on Windows we would not need to do it because there
used to
be only a single Windows platform. With this change we can correctly
support
the resolution of the python interpreter on various platforms and I have
also
added an env variable to customize the selection, so that users can use
`musl`
or a `freethreaded` interpreter if they wish.

As part of this change, I have restricted visibility of the config
settings
used in the toolchain alias repo so that we are creating fewer targets.
This is
a very good time to do this before `1.0.0`.

Fixes #2145
Work towards #2276
Work towards #2386
Work towards #1211 to unblock #2402
Work towards #1361

---------

Co-authored-by: Richard Levasseur <richardlev@gmail.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5eaa3fa..16ea38b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,16 +52,39 @@
 
 {#v0-0-0-changed}
 ### Changed
+
+**Breaking**:
+* (toolchains) stop exposing config settings in python toolchain alias repos.
+  Please consider depending on the flags defined in
+  `//python/config_setting/...` and the `@platforms` package instead.
+* (toolchains) consumers who were depending on the `MACOS_NAME` and the `arch`
+  attribute in the `PLATFORMS` list, please update your code to respect the new
+  values. The values now correspond to the values available in the
+  `@platforms//` package constraint values.
+* (toolchains) `host_platform` and `interpreter` constants are no longer created
+  in the `toolchain` generated alias `.bzl` files. If you need to access the
+  host interpreter during the `repository_rule` evaluation, please use the
+  `@python_{version}_host//:python` targets created by
+  {bzl:obj}`python_register_toolchains` and
+  {bzl:obj}`python_register_multi_toolchains` macros or the {bzl:obj}`python`
+  bzlmod extension.
+
+Other changes:
 * (python_repository) Start honoring the `strip_prefix` field for `zstd` archives.
 
 {#v0-0-0-fixed}
 ### Fixed
-* Nothing fixed.
+* (toolchains) stop depending on `uname` to get the value of the host platform.
 
 {#v0-0-0-added}
 ### Added
 * (gazelle): Parser failures will now be logged to the terminal. Additional
   details can be logged by setting `GAZELLE_VERBOSE=1`.
+* (toolchains) allow users to select which variant of the support host toolchain
+  they would like to use through
+  `RULES_PYTHON_REPO_TOOLCHAIN_{VERSION}_{OS}_{ARCH}` env variable setting. For
+  example, this allows one to use `freethreaded` python interpreter in the
+  `repository_rule` to build a wheel from `sdist`.
 
 {#v0-0-0-removed}
 ### Removed
diff --git a/WORKSPACE b/WORKSPACE
index b77918f..46ebbc8 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -86,7 +86,7 @@
 _py_gazelle_deps()
 
 # This interpreter is used for various rules_python dev-time tools
-load("@python//3.11.9:defs.bzl", "interpreter")
+interpreter = "@python_3_11_9_host//:python"
 
 #####################
 # Install twine for our own runfiles wheel publishing.
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 2a00529..906281d 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -15,6 +15,16 @@
 * `TRACE`
 :::
 
+:::{envvar} RULES_PYTHON_REPO_TOOLCHAIN_VERSION_OS_ARCH
+
+Determines the python interpreter platform to be used for a particular
+interpreter `(version, os, arch)` triple to be used in repository rules.
+Replace the `VERSION_OS_ARCH` part with actual values when using, e.g.
+`3_13_0_linux_x86_64`. The version values must have `_` instead of `.` and the
+os, arch values are the same as the ones mentioned in the
+`//python:versions.bzl` file.
+:::
+
 :::{envvar} RULES_PYTHON_PIP_ISOLATED
 
 Determines if `--isolated` is used with pip.
diff --git a/examples/bzlmod/MODULE.bazel.lock b/examples/bzlmod/MODULE.bazel.lock
index 5a546c2..8bad32a 100644
--- a/examples/bzlmod/MODULE.bazel.lock
+++ b/examples/bzlmod/MODULE.bazel.lock
@@ -1562,7 +1562,7 @@
     },
     "@@rules_python~//python/extensions:pip.bzl%pip": {
       "general": {
-        "bzlTransitiveDigest": "MwmpiMn2qoAVC+3E9MF3E98fB8v1utYBfMa0frXyi7g=",
+        "bzlTransitiveDigest": "mCwiXbsZmReVgs884fZHYfxaZaL9mFG+prEnH/lpE9g=",
         "usagesDigest": "VmrNvB/4EhzsYieLDka9584M+pYKPpjNLl3Wcb5rx/c=",
         "recordedFileInputs": {
           "@@//requirements_lock_3_10.txt": "5e7083982a7e60f34998579a0ae83b520d46ab8f2552cc51337217f024e6def5",
@@ -7035,7 +7035,7 @@
     },
     "@@rules_python~//python/private/pypi:pip.bzl%pip_internal": {
       "general": {
-        "bzlTransitiveDigest": "Kx383BMHUpAHEjRiU5aWU4QTRQVg+Uu+Mgi7jVxuz0c=",
+        "bzlTransitiveDigest": "Xu1N6572iHVqGChH12PpMhprC21k3CpjRZVpm3FmE2c=",
         "usagesDigest": "/lZXl/ZgP+u5PE8WkeWTyYBsvX9XQWFn1antj5qrBzQ=",
         "recordedFileInputs": {
           "@@rules_python~//tools/publish/requirements_linux.txt": "8175b4c8df50ae2f22d1706961884beeb54e7da27bd2447018314a175981997d",
diff --git a/python/private/python_register_toolchains.bzl b/python/private/python_register_toolchains.bzl
index 98c8e5b..cd3e9cb 100644
--- a/python/private/python_register_toolchains.bzl
+++ b/python/private/python_register_toolchains.bzl
@@ -160,7 +160,11 @@
                 platform = platform,
             ))
 
-    host_toolchain(name = name + "_host")
+    host_toolchain(
+        name = name + "_host",
+        platforms = loaded_platforms,
+        python_version = python_version,
+    )
 
     toolchain_aliases(
         name = name,
diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl
index e0bf69a..0e3f7b0 100644
--- a/python/private/repo_utils.bzl
+++ b/python/private/repo_utils.bzl
@@ -41,6 +41,10 @@
 
     Returns:
         A struct with attributes logging: trace, debug, info, warn, fail.
+        Please use `return logger.fail` when using the `fail` method, because
+        it makes `buildifier` happy and ensures that other implementation of
+        the logger injected into the function work as expected by terminating
+        on the given line.
     """
     if _is_repo_debug_enabled(mrctx):
         verbosity_level = "DEBUG"
@@ -140,7 +144,7 @@
     result = mrctx.execute(arguments, environment = environment, **kwargs)
 
     if fail_on_error and result.return_code != 0:
-        logger.fail((
+        return logger.fail((
             "repo.execute: {op}: end: failure:\n" +
             "  command: {cmd}\n" +
             "  return code: {return_code}\n" +
diff --git a/python/private/toolchain_aliases.bzl b/python/private/toolchain_aliases.bzl
new file mode 100644
index 0000000..31ac4a8
--- /dev/null
+++ b/python/private/toolchain_aliases.bzl
@@ -0,0 +1,74 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Create toolchain alias targets."""
+
+load("@rules_python//python:versions.bzl", "PLATFORMS")
+
+def toolchain_aliases(*, name, platforms, visibility = None, native = native):
+    """Create toolchain aliases for the python toolchains.
+
+    Args:
+        name: {type}`str` The name of the current repository.
+        platforms: {type}`platforms` The list of platforms that are supported
+            for the current toolchain repository.
+        visibility: {type}`list[Target] | None` The visibility of the aliases.
+        native: The native struct used in the macro, useful for testing.
+    """
+    for platform in PLATFORMS.keys():
+        if platform not in platforms:
+            continue
+
+        native.config_setting(
+            name = platform,
+            flag_values = PLATFORMS[platform].flag_values,
+            constraint_values = PLATFORMS[platform].compatible_with,
+            visibility = ["//visibility:private"],
+        )
+
+    prefix = name
+    for name in [
+        "files",
+        "includes",
+        "libpython",
+        "py3_runtime",
+        "python_headers",
+        "python_runtimes",
+    ]:
+        native.alias(
+            name = name,
+            actual = select({
+                ":" + platform: "@{}_{}//:{}".format(prefix, platform, name)
+                for platform in platforms
+            }),
+            visibility = visibility,
+        )
+
+    native.alias(
+        name = "python3",
+        actual = select({
+            ":" + platform: "@{}_{}//:{}".format(prefix, platform, "python.exe" if "windows" in platform else "bin/python3")
+            for platform in platforms
+        }),
+        visibility = visibility,
+    )
+    native.alias(
+        name = "pip",
+        actual = select({
+            ":" + platform: "@{}_{}//:python_runtimes".format(prefix, platform)
+            for platform in platforms
+            if "windows" not in platform
+        }),
+        visibility = visibility,
+    )
diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl
index d21fb53..7e9a0c7 100644
--- a/python/private/toolchains_repo.bzl
+++ b/python/private/toolchains_repo.bzl
@@ -25,8 +25,6 @@
 
 load(
     "//python:versions.bzl",
-    "LINUX_NAME",
-    "MACOS_NAME",
     "PLATFORMS",
     "WINDOWS_NAME",
 )
@@ -126,43 +124,26 @@
 )
 
 def _toolchain_aliases_impl(rctx):
-    logger = repo_utils.logger(rctx)
-    (os_name, arch) = _get_host_os_arch(rctx, logger)
-
-    host_platform = _get_host_platform(os_name, arch)
-
-    is_windows = (os_name == WINDOWS_NAME)
-    python3_binary_path = "python.exe" if is_windows else "bin/python3"
-
     # Base BUILD file for this repository.
     build_contents = """\
 # Generated by python/private/toolchains_repo.bzl
+load("@rules_python//python/private:toolchain_aliases.bzl", "toolchain_aliases")
+
 package(default_visibility = ["//visibility:public"])
-load("@rules_python//python:versions.bzl", "gen_python_config_settings")
-gen_python_config_settings()
+
 exports_files(["defs.bzl"])
 
 PLATFORMS = [
 {loaded_platforms}
 ]
-alias(name = "files",           actual = select({{":" + item: "@{py_repository}_" + item + "//:files" for item in PLATFORMS}}))
-alias(name = "includes",        actual = select({{":" + item: "@{py_repository}_" + item + "//:includes" for item in PLATFORMS}}))
-alias(name = "libpython",       actual = select({{":" + item: "@{py_repository}_" + item + "//:libpython" for item in PLATFORMS}}))
-alias(name = "py3_runtime",     actual = select({{":" + item: "@{py_repository}_" + item + "//:py3_runtime" for item in PLATFORMS}}))
-alias(name = "python_headers",  actual = select({{":" + item: "@{py_repository}_" + item + "//:python_headers" for item in PLATFORMS}}))
-alias(name = "python_runtimes", actual = select({{":" + item: "@{py_repository}_" + item + "//:python_runtimes" for item in PLATFORMS}}))
-alias(name = "python3",         actual = select({{":" + item: "@{py_repository}_" + item + "//:" + ("python.exe" if "windows" in item else "bin/python3") for item in PLATFORMS}}))
+toolchain_aliases(
+    name = "{py_repository}",
+    platforms = PLATFORMS,
+)
 """.format(
         py_repository = rctx.attr.user_repository_name,
         loaded_platforms = "\n".join(["    \"{}\",".format(p) for p in rctx.attr.platforms]),
     )
-    if not is_windows:
-        build_contents += """\
-alias(name = "pip",             actual = select({{":" + item: "@{py_repository}_" + item + "//:python_runtimes" for item in PLATFORMS if "windows" not in item}}))
-""".format(
-            py_repository = rctx.attr.user_repository_name,
-            host_platform = host_platform,
-        )
     rctx.file("BUILD.bazel", build_contents)
 
     # Expose a Starlark file so rules can know what host platform we used and where to find an interpreter
@@ -181,9 +162,6 @@
 )
 load("{rules_python}//python:pip.bzl", _compile_pip_requirements = "compile_pip_requirements")
 
-host_platform = "{host_platform}"
-interpreter = "@{py_repository}_{host_platform}//:{python3_binary_path}"
-
 def py_binary(name, **kwargs):
     return _py_binary(
         name = name,
@@ -214,10 +192,7 @@
     )
 
 """.format(
-        host_platform = host_platform,
-        py_repository = rctx.attr.user_repository_name,
         python_version = rctx.attr.python_version,
-        python3_binary_path = python3_binary_path,
         rules_python = get_repository_name(rctx.attr._rules_python_workspace),
     ))
 
@@ -243,15 +218,21 @@
 )
 
 def _host_toolchain_impl(rctx):
-    logger = repo_utils.logger(rctx)
     rctx.file("BUILD.bazel", """\
 # Generated by python/private/toolchains_repo.bzl
 
 exports_files(["python"], visibility = ["//visibility:public"])
 """)
 
-    (os_name, arch) = _get_host_os_arch(rctx, logger)
-    host_platform = _get_host_platform(os_name, arch)
+    os_name = repo_utils.get_platforms_os_name(rctx)
+    host_platform = _get_host_platform(
+        rctx = rctx,
+        logger = repo_utils.logger(rctx),
+        python_version = rctx.attr.python_version,
+        os_name = os_name,
+        cpu_name = repo_utils.get_platforms_cpu_name(rctx),
+        platforms = rctx.attr.platforms,
+    )
     repo = "@@{py_repository}_{host_platform}".format(
         py_repository = rctx.attr.name[:-len("_host")],
         host_platform = host_platform,
@@ -320,6 +301,8 @@
 this repo causes an eager fetch of the toolchain for the host platform.
     """,
     attrs = {
+        "platforms": attr.string_list(mandatory = True),
+        "python_version": attr.string(mandatory = True),
         "_rule_name": attr.string(default = "host_toolchain"),
         "_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")),
     },
@@ -336,16 +319,12 @@
 load(
     "@{repository_name}//:defs.bzl",
     _compile_pip_requirements = "compile_pip_requirements",
-    _host_platform = "host_platform",
-    _interpreter = "interpreter",
     _py_binary = "py_binary",
     _py_console_script_binary = "py_console_script_binary",
     _py_test = "py_test",
 )
 
 compile_pip_requirements = _compile_pip_requirements
-host_platform = _host_platform
-interpreter = _interpreter
 py_binary = _py_binary
 py_console_script_binary = _py_console_script_binary
 py_test = _py_test
@@ -388,57 +367,51 @@
 def sanitize_platform_name(platform):
     return platform.replace("-", "_")
 
-def _get_host_platform(os_name, arch):
+def _get_host_platform(*, rctx, logger, python_version, os_name, cpu_name, platforms):
     """Gets the host platform.
 
     Args:
-        os_name: the host OS name.
-        arch: the host arch.
+        rctx: {type}`repository_ctx`.
+        logger: {type}`struct`.
+        python_version: {type}`string`.
+        os_name: {type}`str` the host OS name.
+        cpu_name: {type}`str` the host CPU name.
+        platforms: {type}`list[str]` the list of loaded platforms.
     Returns:
         The host platform.
     """
-    host_platform = None
-    for platform, meta in PLATFORMS.items():
-        if "freethreaded" in platform:
-            continue
+    candidates = []
+    for platform in platforms:
+        meta = PLATFORMS[platform]
 
-        if meta.os_name == os_name and meta.arch == arch:
-            host_platform = platform
-    if not host_platform:
-        fail("No platform declared for host OS {} on arch {}".format(os_name, arch))
-    return host_platform
+        if meta.os_name == os_name and meta.arch == cpu_name:
+            candidates.append(platform)
 
-def _get_host_os_arch(rctx, logger):
-    """Infer the host OS name and arch from a repository context.
+    if len(candidates) == 1:
+        return candidates[0]
 
-    Args:
-        rctx: Bazel's repository_ctx.
-        logger: Logger to use for operations.
+    if candidates:
+        env_var = "RULES_PYTHON_REPO_TOOLCHAIN_{}_{}_{}".format(
+            python_version.replace(".", "_"),
+            os_name.upper(),
+            cpu_name.upper(),
+        )
+        preference = repo_utils.getenv(rctx, env_var)
+        if preference == None:
+            logger.info("Consider using '{}' to select from one of the platforms: {}".format(
+                env_var,
+                candidates,
+            ))
+        elif preference not in candidates:
+            return logger.fail("Please choose a preferred interpreter out of the following platforms: {}".format(candidates))
+        else:
+            candidates = [preference]
 
-    Returns:
-        A tuple with the host OS name and arch.
-    """
-    os_name = rctx.os.name
+    if candidates:
+        return candidates[0]
 
-    # We assume the arch for Windows is always x86_64.
-    if "windows" in os_name.lower():
-        arch = "x86_64"
-
-        # Normalize the os_name. E.g. os_name could be "OS windows server 2019".
-        os_name = WINDOWS_NAME
-    else:
-        # This is not ideal, but bazel doesn't directly expose arch.
-        arch = repo_utils.execute_unchecked(
-            rctx,
-            op = "GetUname",
-            arguments = [repo_utils.which_checked(rctx, "uname"), "-m"],
-            logger = logger,
-        ).stdout.strip()
-
-        # Normalize the os_name.
-        if "mac" in os_name.lower():
-            os_name = MACOS_NAME
-        elif "linux" in os_name.lower():
-            os_name = LINUX_NAME
-
-    return (os_name, arch)
+    return logger.fail("Could not find a compatible 'host' python for '{os_name}', '{cpu_name}' from the loaded platforms: {platforms}".format(
+        os_name = os_name,
+        cpu_name = cpu_name,
+        platforms = platforms,
+    ))
diff --git a/python/versions.bzl b/python/versions.bzl
index 688c4e2..d229b9d 100644
--- a/python/versions.bzl
+++ b/python/versions.bzl
@@ -15,8 +15,8 @@
 """The Python versions we use for the toolchains.
 """
 
-# Values returned by https://bazel.build/rules/lib/repository_os.
-MACOS_NAME = "mac os"
+# Values present in the @platforms//os package
+MACOS_NAME = "osx"
 LINUX_NAME = "linux"
 WINDOWS_NAME = "windows"
 FREETHREADED = "freethreaded"
@@ -620,9 +620,8 @@
             ],
             flag_values = {},
             os_name = MACOS_NAME,
-            # Matches the value returned from:
-            # repository_ctx.execute(["uname", "-m"]).stdout.strip()
-            arch = "arm64",
+            # Matches the value in @platforms//cpu package
+            arch = "aarch64",
         ),
         "aarch64-unknown-linux-gnu": struct(
             compatible_with = [
@@ -633,9 +632,7 @@
                 libc: "glibc",
             },
             os_name = LINUX_NAME,
-            # Note: this string differs between OSX and Linux
-            # Matches the value returned from:
-            # repository_ctx.execute(["uname", "-m"]).stdout.strip()
+            # Matches the value in @platforms//cpu package
             arch = "aarch64",
         ),
         "armv7-unknown-linux-gnu": struct(
@@ -647,7 +644,8 @@
                 libc: "glibc",
             },
             os_name = LINUX_NAME,
-            arch = "armv7",
+            # Matches the value in @platforms//cpu package
+            arch = "arm",
         ),
         "i386-unknown-linux-gnu": struct(
             compatible_with = [
@@ -658,7 +656,8 @@
                 libc: "glibc",
             },
             os_name = LINUX_NAME,
-            arch = "i386",
+            # Matches the value in @platforms//cpu package
+            arch = "x86_32",
         ),
         "ppc64le-unknown-linux-gnu": struct(
             compatible_with = [
@@ -669,10 +668,8 @@
                 libc: "glibc",
             },
             os_name = LINUX_NAME,
-            # Note: this string differs between OSX and Linux
-            # Matches the value returned from:
-            # repository_ctx.execute(["uname", "-m"]).stdout.strip()
-            arch = "ppc64le",
+            # Matches the value in @platforms//cpu package
+            arch = "ppc",
         ),
         "riscv64-unknown-linux-gnu": struct(
             compatible_with = [
@@ -683,6 +680,7 @@
                 Label("//python/config_settings:py_linux_libc"): "glibc",
             },
             os_name = LINUX_NAME,
+            # Matches the value in @platforms//cpu package
             arch = "riscv64",
         ),
         "s390x-unknown-linux-gnu": struct(
@@ -694,9 +692,7 @@
                 Label("//python/config_settings:py_linux_libc"): "glibc",
             },
             os_name = LINUX_NAME,
-            # Note: this string differs between OSX and Linux
-            # Matches the value returned from:
-            # repository_ctx.execute(["uname", "-m"]).stdout.strip()
+            # Matches the value in @platforms//cpu package
             arch = "s390x",
         ),
         "x86_64-apple-darwin": struct(
@@ -706,6 +702,7 @@
             ],
             flag_values = {},
             os_name = MACOS_NAME,
+            # Matches the value in @platforms//cpu package
             arch = "x86_64",
         ),
         "x86_64-pc-windows-msvc": struct(
@@ -715,6 +712,7 @@
             ],
             flag_values = {},
             os_name = WINDOWS_NAME,
+            # Matches the value in @platforms//cpu package
             arch = "x86_64",
         ),
         "x86_64-unknown-linux-gnu": struct(
@@ -726,6 +724,7 @@
                 libc: "glibc",
             },
             os_name = LINUX_NAME,
+            # Matches the value in @platforms//cpu package
             arch = "x86_64",
         ),
     }
diff --git a/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE b/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE
index 48caeb4..7834000 100644
--- a/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE
+++ b/tests/integration/compile_pip_requirements_test_from_external_repo/WORKSPACE
@@ -12,7 +12,6 @@
     python_version = "3.9",
 )
 
-load("@python39//:defs.bzl", "interpreter")
 load("@rules_python//python:pip.bzl", "pip_parse")
 
 local_repository(
@@ -22,7 +21,7 @@
 
 pip_parse(
     name = "pypi",
-    python_interpreter_target = interpreter,
+    python_interpreter_target = "@python39_host//:python",
     requirements_lock = "@compile_pip_requirements//:requirements_lock.txt",
 )