fix(pip): allow for different extras for different target platforms  (#3385)

With this PR we first evaluate the markers in the requirements
files before going any further to aggregate them and process further.
This makes the separation of logic a little bit more clear.

I wanted to do this before I add more tests to after debugging
the failures observed when enabling pipstar.

Whilst cleaning up further I realized that I can fix the handling of
packages where some platforms may end up not needing extras whilst
others
do. This is achieved by reusing the same code that allows us to have
different versions per platform.

Work towards #2949
Fixes #3374
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index 58d35f2..7cf60ff 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -566,8 +566,13 @@
             for p in src.target_platforms
         ]
 
+    # TODO @aignas 2025-11-02: once we have pipstar enabled we can add extra
+    # targets to each hub for each extra combination and solve this more cleanly as opposed to
+    # duplicating whl_library repositories.
+    target_platforms = src.target_platforms if is_multiple_versions else []
+
     return struct(
-        repo_name = whl_repo_name(src.filename, src.sha256),
+        repo_name = whl_repo_name(src.filename, src.sha256, *target_platforms),
         args = args,
         config_setting = whl_config_setting(
             version = python_version,
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index acf3b0c..7d210ab 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -88,6 +88,7 @@
     evaluate_markers = evaluate_markers or (lambda _ctx, _requirements: {})
     options = {}
     requirements = {}
+    reqs_with_env_markers = {}
     for file, plats in requirements_by_platform.items():
         logger.trace(lambda: "Using {} for {}".format(file, plats))
         contents = ctx.read(file)
@@ -96,16 +97,41 @@
         # needed for the whl_library declarations later.
         parse_result = parse_requirements_txt(contents)
 
+        tokenized_options = []
+        for opt in parse_result.options:
+            for p in opt.split(" "):
+                tokenized_options.append(p)
+
+        pip_args = tokenized_options + extra_pip_args
+        for plat in plats:
+            requirements[plat] = parse_result.requirements
+            for entry in parse_result.requirements:
+                requirement_line = entry[1]
+
+                # output all of the requirement lines that have a marker
+                if ";" in requirement_line:
+                    reqs_with_env_markers.setdefault(requirement_line, []).append(plat)
+            options[plat] = pip_args
+
+    # This may call to Python, so execute it early (before calling to the
+    # internet below) and ensure that we call it only once.
+    resolved_marker_platforms = evaluate_markers(ctx, reqs_with_env_markers)
+    logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
+        reqs_with_env_markers,
+        resolved_marker_platforms,
+    ))
+
+    requirements_by_platform = {}
+    for plat, parse_results in requirements.items():
         # Replicate a surprising behavior that WORKSPACE builds allowed:
         # Defining a repo with the same name multiple times, but only the last
         # definition is respected.
         # The requirement lines might have duplicate names because lines for extras
         # are returned as just the base package name. e.g., `foo[bar]` results
         # in an entry like `("foo", "foo[bar] == 1.0 ...")`.
-        # Lines with different markers are not condidered duplicates.
         requirements_dict = {}
         for entry in sorted(
-            parse_result.requirements,
+            parse_results,
             # Get the longest match and fallback to original WORKSPACE sorting,
             # which should get us the entry with most extras.
             #
@@ -114,33 +140,22 @@
             # should do this now.
             key = lambda x: (len(x[1].partition("==")[0]), x),
         ):
-            req = requirement(entry[1])
-            requirements_dict[(req.name, req.version, req.marker)] = entry
+            req_line = entry[1]
+            req = requirement(req_line)
 
-        tokenized_options = []
-        for opt in parse_result.options:
-            for p in opt.split(" "):
-                tokenized_options.append(p)
+            if req.marker and plat not in resolved_marker_platforms.get(req_line, []):
+                continue
 
-        pip_args = tokenized_options + extra_pip_args
-        for plat in plats:
-            requirements[plat] = requirements_dict.values()
-            options[plat] = pip_args
+            requirements_dict[req.name] = entry
 
-    requirements_by_platform = {}
-    reqs_with_env_markers = {}
-    for target_platform, reqs_ in requirements.items():
-        extra_pip_args = options[target_platform]
+        extra_pip_args = options[plat]
 
-        for distribution, requirement_line in reqs_:
+        for distribution, requirement_line in requirements_dict.values():
             for_whl = requirements_by_platform.setdefault(
                 normalize_name(distribution),
                 {},
             )
 
-            if ";" in requirement_line:
-                reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform)
-
             for_req = for_whl.setdefault(
                 (requirement_line, ",".join(extra_pip_args)),
                 struct(
@@ -151,20 +166,7 @@
                     extra_pip_args = extra_pip_args,
                 ),
             )
-            for_req.target_platforms.append(target_platform)
-
-    # This may call to Python, so execute it early (before calling to the
-    # internet below) and ensure that we call it only once.
-    #
-    # NOTE @aignas 2024-07-13: in the future, if this is something that we want
-    # to do, we could use Python to parse the requirement lines and infer the
-    # URL of the files to download things from. This should be important for
-    # VCS package references.
-    env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers)
-    logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
-        reqs_with_env_markers,
-        env_marker_target_platforms,
-    ))
+            for_req.target_platforms.append(plat)
 
     index_urls = {}
     if get_index_urls:
@@ -183,8 +185,7 @@
     for name, reqs in sorted(requirements_by_platform.items()):
         requirement_target_platforms = {}
         for r in reqs.values():
-            target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
-            for p in target_platforms:
+            for p in r.target_platforms:
                 requirement_target_platforms[p] = None
 
         item = struct(
@@ -197,7 +198,6 @@
                 reqs = reqs,
                 index_urls = index_urls,
                 platforms = platforms,
-                env_marker_target_platforms = env_marker_target_platforms,
                 extract_url_srcs = extract_url_srcs,
                 logger = logger,
             ),
@@ -221,18 +221,13 @@
         index_urls,
         platforms,
         logger,
-        env_marker_target_platforms,
         extract_url_srcs):
     """A function to return sources for a particular package."""
     srcs = {}
     for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
-        if ";" in r.requirement_line:
-            target_platforms = env_marker_target_platforms.get(r.requirement_line, [])
-        else:
-            target_platforms = r.target_platforms
         extra_pip_args = tuple(r.extra_pip_args)
 
-        for target_platform in target_platforms:
+        for target_platform in r.target_platforms:
             if platforms and target_platform not in platforms:
                 fail("The target platform '{}' could not be found in {}".format(
                     target_platform,
diff --git a/python/private/pypi/whl_repo_name.bzl b/python/private/pypi/whl_repo_name.bzl
index 2b3b541..29d774c 100644
--- a/python/private/pypi/whl_repo_name.bzl
+++ b/python/private/pypi/whl_repo_name.bzl
@@ -18,12 +18,14 @@
 load("//python/private:normalize_name.bzl", "normalize_name")
 load(":parse_whl_name.bzl", "parse_whl_name")
 
-def whl_repo_name(filename, sha256):
+def whl_repo_name(filename, sha256, *target_platforms):
     """Return a valid whl_library repo name given a distribution filename.
 
     Args:
         filename: {type}`str` the filename of the distribution.
         sha256: {type}`str` the sha256 of the distribution.
+        *target_platforms: {type}`list[str]` the extra suffixes to append.
+            Only used when we need to support different extras per version.
 
     Returns:
         a string that can be used in {obj}`whl_library`.
@@ -59,6 +61,8 @@
     elif version:
         parts.insert(1, version)
 
+    parts.extend([p.partition("_")[-1] for p in target_platforms])
+
     return "_".join(parts)
 
 def pypi_repo_name(whl_name, *target_platforms):
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index ee6200a..6d061f4 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -203,6 +203,142 @@
 
 _tests.append(_test_simple_multiple_requirements)
 
+def _test_simple_extras_vs_no_extras(env):
+    builder = hub_builder(env)
+    builder.pip_parse(
+        _mock_mctx(
+            read = lambda x: {
+                "darwin.txt": "simple[foo]==0.0.1 --hash=sha256:deadbeef",
+                "win.txt": "simple==0.0.1 --hash=sha256:deadbeef",
+            }[x],
+        ),
+        _parse(
+            hub_name = "pypi",
+            python_version = "3.15",
+            requirements_darwin = "darwin.txt",
+            requirements_windows = "win.txt",
+        ),
+    )
+    pypi = builder.build()
+
+    pypi.exposed_packages().contains_exactly(["simple"])
+    pypi.group_map().contains_exactly({})
+    pypi.whl_map().contains_exactly({
+        "simple": {
+            "pypi_315_simple_osx_aarch64": [
+                whl_config_setting(
+                    target_platforms = [
+                        "cp315_osx_aarch64",
+                    ],
+                    version = "3.15",
+                ),
+            ],
+            "pypi_315_simple_windows_aarch64": [
+                whl_config_setting(
+                    target_platforms = [
+                        "cp315_windows_aarch64",
+                    ],
+                    version = "3.15",
+                ),
+            ],
+        },
+    })
+    pypi.whl_libraries().contains_exactly({
+        "pypi_315_simple_osx_aarch64": {
+            "config_load": "@pypi//:config.bzl",
+            "dep_template": "@pypi//{name}:{target}",
+            "python_interpreter_target": "unit_test_interpreter_target",
+            "requirement": "simple[foo]==0.0.1 --hash=sha256:deadbeef",
+        },
+        "pypi_315_simple_windows_aarch64": {
+            "config_load": "@pypi//:config.bzl",
+            "dep_template": "@pypi//{name}:{target}",
+            "python_interpreter_target": "unit_test_interpreter_target",
+            "requirement": "simple==0.0.1 --hash=sha256:deadbeef",
+        },
+    })
+    pypi.extra_aliases().contains_exactly({})
+
+_tests.append(_test_simple_extras_vs_no_extras)
+
+def _test_simple_extras_vs_no_extras_simpleapi(env):
+    def mocksimpleapi_download(*_, **__):
+        return {
+            "simple": parse_simpleapi_html(
+                url = "https://example.com",
+                content = """\
+    <a href="/simple-0.0.1-py3-none-any.whl#sha256=deadbeef">simple-0.0.1-py3-none-any.whl</a><br/>
+""",
+            ),
+        }
+
+    builder = hub_builder(
+        env,
+        simpleapi_download_fn = mocksimpleapi_download,
+    )
+    builder.pip_parse(
+        _mock_mctx(
+            read = lambda x: {
+                "darwin.txt": "simple[foo]==0.0.1 --hash=sha256:deadbeef",
+                "win.txt": "simple==0.0.1 --hash=sha256:deadbeef",
+            }[x],
+        ),
+        _parse(
+            hub_name = "pypi",
+            python_version = "3.15",
+            requirements_darwin = "darwin.txt",
+            requirements_windows = "win.txt",
+            experimental_index_url = "example.com",
+        ),
+    )
+    pypi = builder.build()
+
+    pypi.exposed_packages().contains_exactly(["simple"])
+    pypi.group_map().contains_exactly({})
+    pypi.whl_map().contains_exactly({
+        "simple": {
+            "pypi_315_simple_py3_none_any_deadbeef_osx_aarch64": [
+                whl_config_setting(
+                    target_platforms = [
+                        "cp315_osx_aarch64",
+                    ],
+                    version = "3.15",
+                ),
+            ],
+            "pypi_315_simple_py3_none_any_deadbeef_windows_aarch64": [
+                whl_config_setting(
+                    target_platforms = [
+                        "cp315_windows_aarch64",
+                    ],
+                    version = "3.15",
+                ),
+            ],
+        },
+    })
+    pypi.whl_libraries().contains_exactly({
+        "pypi_315_simple_py3_none_any_deadbeef_osx_aarch64": {
+            "config_load": "@pypi//:config.bzl",
+            "dep_template": "@pypi//{name}:{target}",
+            "filename": "simple-0.0.1-py3-none-any.whl",
+            "python_interpreter_target": "unit_test_interpreter_target",
+            "requirement": "simple[foo]==0.0.1",
+            "sha256": "deadbeef",
+            "urls": ["https://example.com/simple-0.0.1-py3-none-any.whl"],
+        },
+        "pypi_315_simple_py3_none_any_deadbeef_windows_aarch64": {
+            "config_load": "@pypi//:config.bzl",
+            "dep_template": "@pypi//{name}:{target}",
+            "filename": "simple-0.0.1-py3-none-any.whl",
+            "python_interpreter_target": "unit_test_interpreter_target",
+            "requirement": "simple==0.0.1",
+            "sha256": "deadbeef",
+            "urls": ["https://example.com/simple-0.0.1-py3-none-any.whl"],
+        },
+    })
+    pypi.extra_aliases().contains_exactly({})
+
+_tests.append(_test_simple_extras_vs_no_extras_simpleapi)
+
 def _test_simple_multiple_python_versions(env):
     builder = hub_builder(
         env,
@@ -496,34 +632,34 @@
     pypi.group_map().contains_exactly({})
     pypi.whl_map().contains_exactly({
         "torch": {
-            "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef": [
+            "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef_linux_x86_64": [
                 whl_config_setting(
-                    target_platforms = ("cp312_linux_x86_64",),
+                    target_platforms = ["cp312_linux_x86_64"],
                     version = "3.12",
                 ),
             ],
-            "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432": [
+            "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432_linux_aarch64": [
                 whl_config_setting(
-                    target_platforms = ("cp312_linux_aarch64",),
+                    target_platforms = ["cp312_linux_aarch64"],
                     version = "3.12",
                 ),
             ],
-            "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c": [
+            "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c_windows_x86_64": [
                 whl_config_setting(
-                    target_platforms = ("cp312_windows_x86_64",),
+                    target_platforms = ["cp312_windows_x86_64"],
                     version = "3.12",
                 ),
             ],
-            "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5": [
+            "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5_osx_aarch64": [
                 whl_config_setting(
-                    target_platforms = ("cp312_osx_aarch64",),
+                    target_platforms = ["cp312_osx_aarch64"],
                     version = "3.12",
                 ),
             ],
         },
     })
     pypi.whl_libraries().contains_exactly({
-        "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef": {
+        "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef_linux_x86_64": {
             "config_load": "@pypi//:config.bzl",
             "dep_template": "@pypi//{name}:{target}",
             "filename": "torch-2.4.1+cpu-cp312-cp312-linux_x86_64.whl",
@@ -532,7 +668,7 @@
             "sha256": "8800deef0026011d502c0c256cc4b67d002347f63c3a38cd8e45f1f445c61364",
             "urls": ["https://torch.index/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"],
         },
-        "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432": {
+        "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432_linux_aarch64": {
             "config_load": "@pypi//:config.bzl",
             "dep_template": "@pypi//{name}:{target}",
             "filename": "torch-2.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
@@ -541,7 +677,7 @@
             "sha256": "36109432b10bd7163c9b30ce896f3c2cca1b86b9765f956a1594f0ff43091e2a",
             "urls": ["https://torch.index/whl/cpu/torch-2.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"],
         },
-        "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c": {
+        "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c_windows_x86_64": {
             "config_load": "@pypi//:config.bzl",
             "dep_template": "@pypi//{name}:{target}",
             "filename": "torch-2.4.1+cpu-cp312-cp312-win_amd64.whl",
@@ -550,7 +686,7 @@
             "sha256": "3a570e5c553415cdbddfe679207327b3a3806b21c6adea14fba77684d1619e97",
             "urls": ["https://torch.index/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-win_amd64.whl"],
         },
-        "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5": {
+        "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5_osx_aarch64": {
             "config_load": "@pypi//:config.bzl",
             "dep_template": "@pypi//{name}:{target}",
             "filename": "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl",
diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
index bd0078b..63755d2 100644
--- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl
+++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
@@ -22,12 +22,6 @@
 
 def _mock_ctx():
     testdata = {
-        "requirements_different_package_version": """\
-foo==0.0.1+local \
-    --hash=sha256:deadbeef
-foo==0.0.1 \
-    --hash=sha256:deadb00f
-""",
         "requirements_direct": """\
 foo[extra] @ https://some-url/package.whl
 """,
@@ -40,6 +34,14 @@
 foo[extra]==0.0.1 \
     --hash=sha256:deadbeef
 """,
+        "requirements_foo": """\
+foo==0.0.1 \
+    --hash=sha256:deadb00f
+""",
+        "requirements_foo_local": """\
+foo==0.0.1+local \
+    --hash=sha256:deadbeef
+""",
         "requirements_git": """
 foo @ git+https://github.com/org/foo.git@deadbeef
 """,
@@ -75,7 +77,7 @@
     --hash=sha256:deadb11f
 """,
         "requirements_optional_hash": """
-foo==0.0.4 @ https://example.org/foo-0.0.4.whl
+bar==0.0.4 @ https://example.org/bar-0.0.4.whl
 foo==0.0.5 @ https://example.org/foo-0.0.5.whl --hash=sha256:deadbeef
 """,
         "requirements_osx": """\
@@ -441,7 +443,8 @@
 def _test_different_package_version(env):
     got = parse_requirements(
         requirements_by_platform = {
-            "requirements_different_package_version": ["linux_x86_64"],
+            "requirements_foo": ["linux_aarch64"],
+            "requirements_foo_local": ["linux_x86_64"],
         },
     )
     env.expect.that_collection(got).contains_exactly([
@@ -454,7 +457,7 @@
                     distribution = "foo",
                     extra_pip_args = [],
                     requirement_line = "foo==0.0.1 --hash=sha256:deadb00f",
-                    target_platforms = ["linux_x86_64"],
+                    target_platforms = ["linux_aarch64"],
                     url = "",
                     filename = "",
                     sha256 = "",
@@ -476,10 +479,11 @@
 
 _tests.append(_test_different_package_version)
 
-def _test_optional_hash(env):
+def _test_different_package_extras(env):
     got = parse_requirements(
         requirements_by_platform = {
-            "requirements_optional_hash": ["linux_x86_64"],
+            "requirements_foo": ["linux_aarch64"],
+            "requirements_lock": ["linux_x86_64"],
         },
     )
     env.expect.that_collection(got).contains_exactly([
@@ -491,16 +495,61 @@
                 struct(
                     distribution = "foo",
                     extra_pip_args = [],
-                    requirement_line = "foo==0.0.4",
-                    target_platforms = ["linux_x86_64"],
-                    url = "https://example.org/foo-0.0.4.whl",
-                    filename = "foo-0.0.4.whl",
+                    requirement_line = "foo==0.0.1 --hash=sha256:deadb00f",
+                    target_platforms = ["linux_aarch64"],
+                    url = "",
+                    filename = "",
                     sha256 = "",
                     yanked = False,
                 ),
                 struct(
                     distribution = "foo",
                     extra_pip_args = [],
+                    requirement_line = "foo[extra]==0.0.1 --hash=sha256:deadbeef",
+                    target_platforms = ["linux_x86_64"],
+                    url = "",
+                    filename = "",
+                    sha256 = "",
+                    yanked = False,
+                ),
+            ],
+        ),
+    ])
+
+_tests.append(_test_different_package_extras)
+
+def _test_optional_hash(env):
+    got = parse_requirements(
+        requirements_by_platform = {
+            "requirements_optional_hash": ["linux_x86_64"],
+        },
+    )
+    env.expect.that_collection(got).contains_exactly([
+        struct(
+            name = "bar",
+            is_exposed = True,
+            is_multiple_versions = False,
+            srcs = [
+                struct(
+                    distribution = "bar",
+                    extra_pip_args = [],
+                    requirement_line = "bar==0.0.4",
+                    target_platforms = ["linux_x86_64"],
+                    url = "https://example.org/bar-0.0.4.whl",
+                    filename = "bar-0.0.4.whl",
+                    sha256 = "",
+                    yanked = False,
+                ),
+            ],
+        ),
+        struct(
+            name = "foo",
+            is_exposed = True,
+            is_multiple_versions = False,
+            srcs = [
+                struct(
+                    distribution = "foo",
+                    extra_pip_args = [],
                     requirement_line = "foo==0.0.5",
                     target_platforms = ["linux_x86_64"],
                     url = "https://example.org/foo-0.0.5.whl",
@@ -686,7 +735,6 @@
                 ),
             },
         ),
-        debug = True,
     )
 
     env.expect.that_collection(got).contains_exactly([
@@ -760,13 +808,12 @@
                 ),
             },
         ),
-        debug = True,
     )
 
     env.expect.that_collection(got).contains_exactly([
         struct(
             is_exposed = True,
-            is_multiple_versions = True,
+            is_multiple_versions = False,
             name = "foo",
             srcs = [
                 struct(