| # Copyright 2024 The Bazel Authors. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| A file that houses private functions used in the `bzlmod` extension with the same name. |
| """ |
| |
| load("@bazel_features//:features.bzl", "bazel_features") |
| load("//python/private:auth.bzl", "get_auth") |
| load("//python/private:envsubst.bzl", "envsubst") |
| load("//python/private:normalize_name.bzl", "normalize_name") |
| load(":parse_simpleapi_html.bzl", "parse_simpleapi_html") |
| |
| def simpleapi_download(ctx, *, attr, cache, parallel_download = True): |
| """Download Simple API HTML. |
| |
| Args: |
| ctx: The module_ctx or repository_ctx. |
| attr: Contains the parameters for the download. They are grouped into a |
| struct for better clarity. It must have attributes: |
| * index_url: str, the index. |
| * index_url_overrides: dict[str, str], the index overrides for |
| separate packages. |
| * extra_index_urls: Extra index URLs that will be looked up after |
| the main is looked up. |
| * sources: list[str], the sources to download things for. Each value is |
| the contents of requirements files. |
| * envsubst: list[str], the envsubst vars for performing substitution in index url. |
| * netrc: The netrc parameter for ctx.download, see http_file for docs. |
| * auth_patterns: The auth_patterns parameter for ctx.download, see |
| http_file for docs. |
| cache: A dictionary that can be used as a cache between calls during a |
| single evaluation of the extension. We use a dictionary as a cache |
| so that we can reuse calls to the simple API when evaluating the |
| extension. Using the canonical_id parameter of the module_ctx would |
| deposit the simple API responses to the bazel cache and that is |
| undesirable because additions to the PyPI index would not be |
| reflected when re-evaluating the extension unless we do |
| `bazel clean --expunge`. |
| parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. |
| |
| Returns: |
| dict of pkg name to the parsed HTML contents - a list of structs. |
| """ |
| index_url_overrides = { |
| normalize_name(p): i |
| for p, i in (attr.index_url_overrides or {}).items() |
| } |
| |
| download_kwargs = {} |
| if bazel_features.external_deps.download_has_block_param: |
| download_kwargs["block"] = not parallel_download |
| |
| # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes |
| # to replicate how `pip` would handle this case. |
| async_downloads = {} |
| contents = {} |
| index_urls = [attr.index_url] + attr.extra_index_urls |
| for pkg in attr.sources: |
| pkg_normalized = normalize_name(pkg) |
| |
| success = False |
| for index_url in index_urls: |
| result = _read_simpleapi( |
| ctx = ctx, |
| url = "{}/{}/".format( |
| index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), |
| pkg, |
| ), |
| attr = attr, |
| cache = cache, |
| **download_kwargs |
| ) |
| if hasattr(result, "wait"): |
| # We will process it in a separate loop: |
| async_downloads.setdefault(pkg_normalized, []).append( |
| struct( |
| pkg_normalized = pkg_normalized, |
| wait = result.wait, |
| ), |
| ) |
| continue |
| |
| if result.success: |
| contents[pkg_normalized] = result.output |
| success = True |
| break |
| |
| if not async_downloads and not success: |
| fail("Failed to download metadata from urls: {}".format( |
| ", ".join(index_urls), |
| )) |
| |
| if not async_downloads: |
| return contents |
| |
| # If we use `block` == False, then we need to have a second loop that is |
| # collecting all of the results as they were being downloaded in parallel. |
| for pkg, downloads in async_downloads.items(): |
| success = False |
| for download in downloads: |
| result = download.wait() |
| |
| if result.success and download.pkg_normalized not in contents: |
| contents[download.pkg_normalized] = result.output |
| success = True |
| |
| if not success: |
| fail("Failed to download metadata from urls: {}".format( |
| ", ".join(index_urls), |
| )) |
| |
| return contents |
| |
| def _read_simpleapi(ctx, url, attr, cache, **download_kwargs): |
| """Read SimpleAPI. |
| |
| Args: |
| ctx: The module_ctx or repository_ctx. |
| url: str, the url parameter that can be passed to ctx.download. |
| attr: The attribute that contains necessary info for downloading. The |
| following attributes must be present: |
| * envsubst: The envsubst values for performing substitutions in the URL. |
| * netrc: The netrc parameter for ctx.download, see http_file for docs. |
| * auth_patterns: The auth_patterns parameter for ctx.download, see |
| http_file for docs. |
| cache: A dict for storing the results. |
| **download_kwargs: Any extra params to ctx.download. |
| Note that output and auth will be passed for you. |
| |
| Returns: |
| A similar object to what `download` would return except that in result.out |
| will be the parsed simple api contents. |
| """ |
| # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for |
| # the whl location and we cannot handle multiple URLs at once by passing |
| # them to ctx.download if we want to correctly handle the relative URLs. |
| # TODO: Add a test that env subbed index urls do not leak into the lock file. |
| |
| real_url = envsubst( |
| url, |
| attr.envsubst, |
| ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, |
| ) |
| |
| cache_key = real_url |
| if cache_key in cache: |
| return struct(success = True, output = cache[cache_key]) |
| |
| output_str = envsubst( |
| url, |
| attr.envsubst, |
| # Use env names in the subst values - this will be unique over |
| # the lifetime of the execution of this function and we also use |
| # `~` as the separator to ensure that we don't get clashes. |
| {e: "~{}~".format(e) for e in attr.envsubst}.get, |
| ) |
| |
| # Transform the URL into a valid filename |
| for char in [".", ":", "/", "\\", "-"]: |
| output_str = output_str.replace(char, "_") |
| |
| output = ctx.path(output_str.strip("_").lower() + ".html") |
| |
| # NOTE: this may have block = True or block = False in the download_kwargs |
| download = ctx.download( |
| url = [real_url], |
| output = output, |
| auth = get_auth(ctx, [real_url], ctx_attr = attr), |
| allow_fail = True, |
| **download_kwargs |
| ) |
| |
| if download_kwargs.get("block") == False: |
| # Simulate the same API as ctx.download has |
| return struct( |
| wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key), |
| ) |
| |
| return _read_index_result(ctx, download, output, real_url, cache, cache_key) |
| |
| def _read_index_result(ctx, result, output, url, cache, cache_key): |
| if not result.success: |
| return struct(success = False) |
| |
| content = ctx.read(output) |
| |
| output = parse_simpleapi_html(url = url, content = content) |
| if output: |
| cache.setdefault(cache_key, output) |
| return struct(success = True, output = output, cache_key = cache_key) |
| else: |
| return struct(success = False) |