refactor(whl_library): reimplement wheel platform parsing in starlark (#1636)

It seems that the CI did not catch the `macosx_10_9_universal2` wheels
edge case, which this new code is handling. I moved the implementation
to starlark because #1625 needs this.

No changelog is needed because the feature this is fixing/refactoring
an unreleased feature.

---------

Co-authored-by: Richard Levasseur <richardlev@gmail.com>
diff --git a/python/pip_install/BUILD.bazel b/python/pip_install/BUILD.bazel
index d6d2b3c..a22b9f3 100644
--- a/python/pip_install/BUILD.bazel
+++ b/python/pip_install/BUILD.bazel
@@ -31,10 +31,12 @@
         "//python/pip_install/private:srcs_bzl",
         "//python/private:bzlmod_enabled_bzl",
         "//python/private:normalize_name_bzl",
+        "//python/private:parse_whl_name_bzl",
         "//python/private:patch_whl_bzl",
         "//python/private:render_pkg_aliases_bzl",
         "//python/private:toolchains_repo_bzl",
         "//python/private:which_bzl",
+        "//python/private:whl_target_platforms_bzl",
         "@bazel_skylib//lib:sets",
     ],
 )
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index b897e8c..3e4878b 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -24,10 +24,12 @@
 load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
 load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
 load("//python/private:normalize_name.bzl", "normalize_name")
+load("//python/private:parse_whl_name.bzl", "parse_whl_name")
 load("//python/private:patch_whl.bzl", "patch_whl")
 load("//python/private:render_pkg_aliases.bzl", "render_pkg_aliases")
 load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
 load("//python/private:which.bzl", "which_with_fail")
+load("//python/private:whl_target_platforms.bzl", "whl_target_platforms")
 
 CPPFLAGS = "CPPFLAGS"
 
@@ -743,11 +745,22 @@
             timeout = rctx.attr.timeout,
         )
 
+    target_platforms = rctx.attr.experimental_target_platforms
+    if target_platforms:
+        parsed_whl = parse_whl_name(whl_path.basename)
+        if parsed_whl.platform_tag != "any":
+            # NOTE @aignas 2023-12-04: if the wheel is a platform specific
+            # wheel, we only include deps for that target platform
+            target_platforms = [
+                "{}_{}".format(p.os, p.cpu)
+                for p in whl_target_platforms(parsed_whl.platform_tag)
+            ]
+
     result = rctx.execute(
         args + [
             "--whl-file",
             whl_path,
-        ] + ["--platform={}".format(p) for p in rctx.attr.experimental_target_platforms],
+        ] + ["--platform={}".format(p) for p in target_platforms],
         environment = environment,
         quiet = rctx.attr.quiet,
         timeout = rctx.attr.timeout,
diff --git a/python/pip_install/tools/wheel_installer/wheel.py b/python/pip_install/tools/wheel_installer/wheel.py
index 9c18dfd..efd916d 100644
--- a/python/pip_install/tools/wheel_installer/wheel.py
+++ b/python/pip_install/tools/wheel_installer/wheel.py
@@ -36,21 +36,6 @@
     darwin = osx
     win32 = windows
 
-    @staticmethod
-    def from_tag(tag: str) -> "OS":
-        if tag.startswith("linux"):
-            return OS.linux
-        elif tag.startswith("manylinux"):
-            return OS.linux
-        elif tag.startswith("musllinux"):
-            return OS.linux
-        elif tag.startswith("macos"):
-            return OS.osx
-        elif tag.startswith("win"):
-            return OS.windows
-        else:
-            raise ValueError(f"unknown tag: {tag}")
-
 
 class Arch(Enum):
     x86_64 = 1
@@ -65,17 +50,6 @@
     x86 = x86_32
     ppc64le = ppc
 
-    @staticmethod
-    def from_tag(tag: str) -> "Arch":
-        for s, value in Arch.__members__.items():
-            if s in tag:
-                return value
-
-        if tag == "win32":
-            return Arch.x86_32
-        else:
-            raise ValueError(f"unknown tag: {tag}")
-
 
 @dataclass(frozen=True)
 class Platform:
@@ -143,13 +117,6 @@
         return self.os.name.lower() + "_" + self.arch.name.lower()
 
     @classmethod
-    def from_tag(cls, tag: str) -> "Platform":
-        return cls(
-            os=OS.from_tag(tag),
-            arch=Arch.from_tag(tag),
-        )
-
-    @classmethod
     def from_string(cls, platform: Union[str, List[str]]) -> List["Platform"]:
         """Parse a string and return a list of platforms"""
         platform = [platform] if isinstance(platform, str) else list(platform)
@@ -462,17 +429,6 @@
         extras_requested: Set[str] = None,
         platforms: Optional[Set[Platform]] = None,
     ) -> FrozenDeps:
-        if platforms:
-            # NOTE @aignas 2023-12-04: if the wheel is a platform specific wheel, we only include deps for that platform
-            _, _, platform_tag = self._path.name.rpartition("-")
-            platform_tag = platform_tag[:-4]  # strip .whl
-            if platform_tag != "any":
-                platform = Platform.from_tag(platform_tag)
-                assert (
-                    platform in platforms
-                ), f"BUG: wheel platform '{platform}' must be one of '{platforms}'"
-                platforms = {platform}
-
         dependency_set = Deps(
             self.name,
             extras=extras_requested,
diff --git a/python/pip_install/tools/wheel_installer/wheel_test.py b/python/pip_install/tools/wheel_installer/wheel_test.py
index 57bfa94..721b710 100644
--- a/python/pip_install/tools/wheel_installer/wheel_test.py
+++ b/python/pip_install/tools/wheel_installer/wheel_test.py
@@ -199,21 +199,6 @@
 
 
 class PlatformTest(unittest.TestCase):
-    def test_platform_from_string(self):
-        tests = {
-            "win_amd64": "windows_x86_64",
-            "macosx_10_9_arm64": "osx_aarch64",
-            "manylinux1_i686.manylinux_2_17_i686": "linux_x86_32",
-            "musllinux_1_1_ppc64le": "linux_ppc",
-        }
-
-        for give, want in tests.items():
-            with self.subTest(give=give, want=want):
-                self.assertEqual(
-                    wheel.Platform.from_string(want)[0],
-                    wheel.Platform.from_tag(give),
-                )
-
     def test_can_get_host(self):
         host = wheel.Platform.host()
         self.assertIsNotNone(host)
diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel
index 162d4ed..25937f0 100644
--- a/python/private/BUILD.bazel
+++ b/python/private/BUILD.bazel
@@ -239,6 +239,12 @@
 )
 
 bzl_library(
+    name = "whl_target_platforms_bzl",
+    srcs = ["whl_target_platforms.bzl"],
+    visibility = ["//:__subpackages__"],
+)
+
+bzl_library(
     name = "labels_bzl",
     srcs = ["labels.bzl"],
 )
diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl
new file mode 100644
index 0000000..2c63efe
--- /dev/null
+++ b/python/private/whl_target_platforms.bzl
@@ -0,0 +1,80 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+A starlark implementation of the wheel platform tag parsing to get the target platform.
+"""
+
+# The order of the dictionaries is to keep definitions with their aliases next to each
+# other
+_CPU_ALIASES = {
+    "x86_32": "x86_32",
+    "i386": "x86_32",
+    "i686": "x86_32",
+    "x86": "x86_32",
+    "x86_64": "x86_64",
+    "amd64": "x86_64",
+    "aarch64": "aarch64",
+    "arm64": "aarch64",
+    "ppc": "ppc",
+    "ppc64le": "ppc",
+    "s390x": "s390x",
+}  # buildifier: disable=unsorted-dict-items
+
+_OS_PREFIXES = {
+    "linux": "linux",
+    "manylinux": "linux",
+    "musllinux": "linux",
+    "macos": "osx",
+    "win": "windows",
+}  # buildifier: disable=unsorted-dict-items
+
+def whl_target_platforms(tag):
+    """Parse the wheel platform tag and return (os, cpu) tuples.
+
+    Args:
+        tag (str): The platform_tag part of the wheel name. See
+            ./parse_whl_name.bzl for more details.
+
+    Returns:
+        A list of structs, with attributes:
+        * os: str, one of the _OS_PREFIXES values
+        * cpu: str, one of the _CPU_PREFIXES values
+    """
+    cpus = _cpu_from_tag(tag)
+
+    for prefix, os in _OS_PREFIXES.items():
+        if tag.startswith(prefix):
+            return [
+                struct(os = os, cpu = cpu)
+                for cpu in cpus
+            ]
+
+    fail("unknown tag os: {}".format(tag))
+
+def _cpu_from_tag(tag):
+    candidate = [
+        cpu
+        for input, cpu in _CPU_ALIASES.items()
+        if tag.endswith(input)
+    ]
+    if candidate:
+        return candidate
+
+    if tag == "win32":
+        return ["x86_32"]
+    elif tag.endswith("universal2") and tag.startswith("macosx"):
+        return ["x86_64", "aarch64"]
+    else:
+        fail("Unrecognized tag: '{}': cannot determine CPU".format(tag))
diff --git a/tests/private/whl_target_platforms/BUILD.bazel b/tests/private/whl_target_platforms/BUILD.bazel
new file mode 100644
index 0000000..fec25af
--- /dev/null
+++ b/tests/private/whl_target_platforms/BUILD.bazel
@@ -0,0 +1,17 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(":whl_target_platforms_tests.bzl", "whl_target_platforms_test_suite")
+
+whl_target_platforms_test_suite(name = "whl_target_platforms_tests")
diff --git a/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl b/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl
new file mode 100644
index 0000000..9ccff0e
--- /dev/null
+++ b/tests/private/whl_target_platforms/whl_target_platforms_tests.bzl
@@ -0,0 +1,54 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""
+
+load("@rules_testing//lib:test_suite.bzl", "test_suite")
+load("//python/private:whl_target_platforms.bzl", "whl_target_platforms")  # buildifier: disable=bzl-visibility
+
+_tests = []
+
+def _test_simple(env):
+    tests = {
+        "macosx_10_9_arm64": [
+            struct(os = "osx", cpu = "aarch64"),
+        ],
+        "macosx_10_9_universal2": [
+            struct(os = "osx", cpu = "x86_64"),
+            struct(os = "osx", cpu = "aarch64"),
+        ],
+        "manylinux1_i686.manylinux_2_17_i686": [
+            struct(os = "linux", cpu = "x86_32"),
+        ],
+        "musllinux_1_1_ppc64le": [
+            struct(os = "linux", cpu = "ppc"),
+        ],
+        "win_amd64": [
+            struct(os = "windows", cpu = "x86_64"),
+        ],
+    }
+
+    for give, want in tests.items():
+        got = whl_target_platforms(give)
+        env.expect.that_collection(got).contains_exactly(want)
+
+_tests.append(_test_simple)
+
+def whl_target_platforms_test_suite(name):
+    """Create the test suite.
+
+    Args:
+        name: the name of the test suite
+    """
+    test_suite(name = name, basic_tests = _tests)