fix(bzlmod): only expose common packages via the requirements constants (#1955)

With this change the default `gazelle` instructions still work and users
do not need to worry about which package is present on which platform.

Work towards #260
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1fe53e7..93707a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,20 @@
   removed in a future release.
 
 ### Fixed
+* (bzlmod): When using `experimental_index_url` the `all_requirements`,
+  `all_whl_requirements` and `all_data_requirements` will now only include
+  common packages that are available on all target platforms. This is to ensure
+  that packages that are only present for some platforms are pulled only via
+  the `deps` of the materialized `py_library`. If you would like to include
+  platform specific packages, using a `select` statement with references to the
+  specific package will still work (e.g.
+  ```
+  my_attr = all_requirements + select(
+      {
+          "@platforms//os:linux": ["@pypi//foo_available_only_on_linux"],
+          "//conditions:default": [],
+      }
+  )`.
 * (bzlmod): Targets in `all_requirements` now use the same form as targets returned by the `requirement` macro.
 * (rules) Auto exec groups are enabled. This allows actions run by the rules,
   such as precompiling, to pick an execution platform separately from what
diff --git a/python/private/pypi/bzlmod.bzl b/python/private/pypi/bzlmod.bzl
index e98208a..6aafc71 100644
--- a/python/private/pypi/bzlmod.bzl
+++ b/python/private/pypi/bzlmod.bzl
@@ -97,7 +97,7 @@
             whl_mods = whl_mods,
         )
 
-def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache):
+def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache, exposed_packages):
     logger = repo_utils.logger(module_ctx)
     python_interpreter_target = pip_attr.python_interpreter_target
     is_hub_reproducible = True
@@ -245,7 +245,9 @@
         if get_index_urls:
             # TODO @aignas 2024-05-26: move to a separate function
             found_something = False
+            is_exposed = False
             for requirement in requirements:
+                is_exposed = is_exposed or requirement.is_exposed
                 for distribution in requirement.whls + [requirement.sdist]:
                     if not distribution:
                         # sdist may be None
@@ -290,6 +292,8 @@
                     )
 
             if found_something:
+                if is_exposed:
+                    exposed_packages.setdefault(hub_name, {})[whl_name] = None
                 continue
 
         requirement = select_requirement(
@@ -431,6 +435,7 @@
     # Where hub, whl, and pip are the repo names
     hub_whl_map = {}
     hub_group_map = {}
+    exposed_packages = {}
 
     simpleapi_cache = {}
     is_extension_reproducible = True
@@ -470,7 +475,7 @@
             else:
                 pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version)
 
-            is_hub_reproducible = _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides, hub_group_map, simpleapi_cache)
+            is_hub_reproducible = _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides, hub_group_map, simpleapi_cache, exposed_packages)
             is_extension_reproducible = is_extension_reproducible and is_hub_reproducible
 
     for hub_name, whl_map in hub_whl_map.items():
@@ -482,6 +487,7 @@
                 for key, value in whl_map.items()
             },
             default_version = _major_minor_version(DEFAULT_PYTHON_VERSION),
+            packages = sorted(exposed_packages.get(hub_name, {})),
             groups = hub_group_map.get(hub_name),
         )
 
diff --git a/python/private/pypi/hub_repository.bzl b/python/private/pypi/hub_repository.bzl
index 40a3ab1..f589dd4 100644
--- a/python/private/pypi/hub_repository.bzl
+++ b/python/private/pypi/hub_repository.bzl
@@ -29,7 +29,7 @@
 """
 
 def _impl(rctx):
-    bzl_packages = rctx.attr.whl_map.keys()
+    bzl_packages = rctx.attr.packages or rctx.attr.whl_map.keys()
     aliases = render_multiplatform_pkg_aliases(
         aliases = {
             key: [whl_alias(**v) for v in json.decode(values)]
@@ -77,6 +77,12 @@
         "groups": attr.string_list_dict(
             mandatory = False,
         ),
+        "packages": attr.string_list(
+            mandatory = False,
+            doc = """\
+The list of packages that will be exposed via all_*requirements macros. Defaults to whl_map keys.
+""",
+        ),
         "repo_name": attr.string(
             mandatory = True,
             doc = "The apparent name of the repo. This is needed because in bzlmod, the name attribute becomes the canonical name.",
diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl
index f99329a..d52180c 100644
--- a/python/private/pypi/parse_requirements.bzl
+++ b/python/private/pypi/parse_requirements.bzl
@@ -181,6 +181,8 @@
          * srcs: The Simple API downloadable source list.
          * requirement_line: The original requirement line.
          * target_platforms: The list of target platforms that this package is for.
+         * is_exposed: A boolean if the package should be exposed via the hub
+           repository.
 
         The second element is extra_pip_args should be passed to `whl_library`.
     """
@@ -266,6 +268,8 @@
                 for p in DEFAULT_PLATFORMS
                 if p not in configured_platforms
             ]
+            for p in plats:
+                configured_platforms[p] = file
 
         contents = ctx.read(file)
 
@@ -344,6 +348,19 @@
 
     ret = {}
     for whl_name, reqs in requirements_by_platform.items():
+        requirement_target_platforms = {}
+        for r in reqs.values():
+            for p in r.target_platforms:
+                requirement_target_platforms[p] = None
+
+        is_exposed = len(requirement_target_platforms) == len(configured_platforms)
+        if not is_exposed and logger:
+            logger.debug(lambda: "Package {} will not be exposed because it is only present on a subset of platforms: {} out of {}".format(
+                whl_name,
+                sorted(requirement_target_platforms),
+                sorted(configured_platforms),
+            ))
+
         for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
             whls, sdist = _add_dists(
                 r,
@@ -362,6 +379,7 @@
                     download = r.download,
                     whls = whls,
                     sdist = sdist,
+                    is_exposed = is_exposed,
                 ),
             )
 
diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
index 13ce8f4..5c33dd8 100644
--- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl
+++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl
@@ -98,6 +98,7 @@
                 ],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
         ],
     })
@@ -145,6 +146,7 @@
                 ],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
         ],
     })
@@ -181,6 +183,7 @@
                 ],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
         ],
     })
@@ -220,6 +223,7 @@
                 target_platforms = ["windows_x86_64"],
                 whls = [],
                 sdist = None,
+                is_exposed = False,
             ),
         ],
         "foo": [
@@ -244,6 +248,7 @@
                 ],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
             struct(
                 distribution = "foo",
@@ -258,6 +263,7 @@
                 target_platforms = ["windows_x86_64"],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
         ],
     })
@@ -317,6 +323,7 @@
                 target_platforms = ["linux_x86_64"],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
         ],
     })
@@ -371,6 +378,7 @@
                 target_platforms = ["linux_aarch64", "linux_x86_64"],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
             struct(
                 distribution = "foo",
@@ -385,6 +393,7 @@
                 target_platforms = ["linux_super_exotic"],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
             struct(
                 distribution = "foo",
@@ -406,6 +415,7 @@
                 ],
                 whls = [],
                 sdist = None,
+                is_exposed = True,
             ),
         ],
     })