python/private/pypi/parse_requirements.bzl - third_party/github/bazelbuild/rules_python - Git at Google

 # Copyright 2024 The Bazel Authors. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """Requirements parsing for whl_library creation.

 Use cases that the code needs to cover:
 * A single requirements_lock file that is used for the host platform.
 * Per-OS requirements_lock files that are used for the host platform.
 * A target platform specific requirements_lock that is used with extra
   pip arguments with --platform, etc and download_only = True.

 In the last case only a single `requirements_lock` file is allowed, in all
 other cases we assume that there may be a desire to resolve the requirements
 file for the host platform to be backwards compatible with the legacy
 behavior.
 """

 load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:repo_utils.bzl", "repo_utils")
 load(":index_sources.bzl", "index_sources")
 load(":parse_requirements_txt.bzl", "parse_requirements_txt")
 load(":pep508_requirement.bzl", "requirement")
 load(":select_whl.bzl", "select_whl")

 def parse_requirements(
         ctx,
         *,
         requirements_by_platform = {},
         extra_pip_args = [],
         platforms = {},
         get_index_urls = None,
         evaluate_markers = None,
         extract_url_srcs = True,
         logger):
     """Get the requirements with platforms that the requirements apply to.

     Args:
         ctx: A context that has .read function that would read contents from a label.
         platforms: The target platform descriptions.
         requirements_by_platform (label_keyed_string_dict): a way to have
             different package versions (or different packages) for different
             os, arch combinations.
         extra_pip_args (string list): Extra pip arguments to perform extra validations and to
             be joined with args found in files.
         get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
             of the distribution URLs from a PyPI index. Accepts ctx and
             distribution names to query.
         evaluate_markers: A function to use to evaluate the requirements.
             Accepts a dict where keys are requirement lines to evaluate against
             the platforms stored as values in the input dict. Returns the same
             dict, but with values being platforms that are compatible with the
             requirements line.
         extract_url_srcs: A boolean to enable extracting URLs from requirement
             lines to enable using bazel downloader.
         logger: repo_utils.logger, a simple struct to log diagnostic messages.

     Returns:
         {type}`dict[str, list[struct]]` where the key is the distribution name and the struct
         contains the following attributes:
          * `distribution`: {type}`str` The non-normalized distribution name.
          * `srcs`: {type}`struct` The parsed requirement line for easier Simple
            API downloading (see `index_sources` return value).
          * `target_platforms`: {type}`list[str]` Target platforms that this package is for.
              The format is `cp3{minor}_{os}_{arch}`.
          * `is_exposed`: {type}`bool` `True` if the package should be exposed via the hub
            repository.
          * `extra_pip_args`: {type}`list[str]` pip args to use in case we are
            not using the bazel downloader to download the archives. This should
            be passed to {obj}`whl_library`.
          * `whls`: {type}`list[struct]` The list of whl entries that can be
            downloaded using the bazel downloader.
          * `sdist`: {type}`list[struct]` The sdist that can be downloaded using
            the bazel downloader.

         The second element is extra_pip_args should be passed to `whl_library`.
     """
     evaluate_markers = evaluate_markers or (lambda _ctx, _requirements: {})
     options = {}
     requirements = {}
     for file, plats in requirements_by_platform.items():
         logger.trace(lambda: "Using {} for {}".format(file, plats))
         contents = ctx.read(file)

         # Parse the requirements file directly in starlark to get the information
         # needed for the whl_library declarations later.
         parse_result = parse_requirements_txt(contents)

         # Replicate a surprising behavior that WORKSPACE builds allowed:
         # Defining a repo with the same name multiple times, but only the last
         # definition is respected.
         # The requirement lines might have duplicate names because lines for extras
         # are returned as just the base package name. e.g., `foo[bar]` results
         # in an entry like `("foo", "foo[bar] == 1.0 ...")`.
         # Lines with different markers are not condidered duplicates.
         requirements_dict = {}
         for entry in sorted(
             parse_result.requirements,
             # Get the longest match and fallback to original WORKSPACE sorting,
             # which should get us the entry with most extras.
             #
             # FIXME @aignas 2024-05-13: The correct behaviour might be to get an
             # entry with all aggregated extras, but it is unclear if we
             # should do this now.
             key = lambda x: (len(x[1].partition("==")[0]), x),
         ):
             req = requirement(entry[1])
             requirements_dict[(req.name, req.version, req.marker)] = entry

         tokenized_options = []
         for opt in parse_result.options:
             for p in opt.split(" "):
                 tokenized_options.append(p)

         pip_args = tokenized_options + extra_pip_args
         for plat in plats:
             requirements[plat] = requirements_dict.values()
             options[plat] = pip_args

     requirements_by_platform = {}
     reqs_with_env_markers = {}
     for target_platform, reqs_ in requirements.items():
         extra_pip_args = options[target_platform]

         for distribution, requirement_line in reqs_:
             for_whl = requirements_by_platform.setdefault(
                 normalize_name(distribution),
                 {},
             )

             if ";" in requirement_line:
                 reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform)

             for_req = for_whl.setdefault(
                 (requirement_line, ",".join(extra_pip_args)),
                 struct(
                     distribution = distribution,
                     srcs = index_sources(requirement_line),
                     requirement_line = requirement_line,
                     target_platforms = [],
                     extra_pip_args = extra_pip_args,
                 ),
             )
             for_req.target_platforms.append(target_platform)

     # This may call to Python, so execute it early (before calling to the
     # internet below) and ensure that we call it only once.
     #
     # NOTE @aignas 2024-07-13: in the future, if this is something that we want
     # to do, we could use Python to parse the requirement lines and infer the
     # URL of the files to download things from. This should be important for
     # VCS package references.
     env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers)
     logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
         reqs_with_env_markers,
         env_marker_target_platforms,
     ))

     index_urls = {}
     if get_index_urls:
         index_urls = get_index_urls(
             ctx,
             # Use list({}) as a way to have a set
             list({
                 req.distribution: None
                 for reqs in requirements_by_platform.values()
                 for req in reqs.values()
                 if not req.srcs.url
             }),
         )

     ret = []
     for name, reqs in sorted(requirements_by_platform.items()):
         requirement_target_platforms = {}
         for r in reqs.values():
             target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
             for p in target_platforms:
                 requirement_target_platforms[p] = None

         item = struct(
             # Return normalized names
             name = normalize_name(name),
             is_exposed = len(requirement_target_platforms) == len(requirements),
             is_multiple_versions = len(reqs.values()) > 1,
             srcs = _package_srcs(
                 name = name,
                 reqs = reqs,
                 index_urls = index_urls,
                 platforms = platforms,
                 env_marker_target_platforms = env_marker_target_platforms,
                 extract_url_srcs = extract_url_srcs,
                 logger = logger,
             ),
         )
         ret.append(item)
         if not item.is_exposed and logger:
             logger.trace(lambda: "Package '{}' will not be exposed because it is only present on a subset of platforms: {} out of {}".format(
                 name,
                 sorted(requirement_target_platforms),
                 sorted(requirements),
             ))

     logger.debug(lambda: "Will configure whl repos: {}".format([w.name for w in ret]))

     return ret

 def _package_srcs(
         *,
         name,
         reqs,
         index_urls,
         platforms,
         logger,
         env_marker_target_platforms,
         extract_url_srcs):
     """A function to return sources for a particular package."""
     srcs = {}
     for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
         if ";" in r.requirement_line:
             target_platforms = env_marker_target_platforms.get(r.requirement_line, [])
         else:
             target_platforms = r.target_platforms
         extra_pip_args = tuple(r.extra_pip_args)

         for target_platform in target_platforms:
             if platforms and target_platform not in platforms:
                 fail("The target platform '{}' could not be found in {}".format(
                     target_platform,
                     platforms.keys(),
                 ))

             dist = _add_dists(
                 requirement = r,
                 target_platform = platforms.get(target_platform),
                 index_urls = index_urls.get(name),
                 logger = logger,
             )
             logger.debug(lambda: "The whl dist is: {}".format(dist.filename if dist else dist))

             if extract_url_srcs and dist:
                 req_line = r.srcs.requirement
             else:
                 dist = struct(
                     url = "",
                     filename = "",
                     sha256 = "",
                     yanked = False,
                 )
                 req_line = r.srcs.requirement_line

             key = (
                 dist.filename,
                 req_line,
                 extra_pip_args,
             )
             entry = srcs.setdefault(
                 key,
                 struct(
                     distribution = name,
                     extra_pip_args = r.extra_pip_args,
                     requirement_line = req_line,
                     target_platforms = [],
                     filename = dist.filename,
                     sha256 = dist.sha256,
                     url = dist.url,
                     yanked = dist.yanked,
                 ),
             )

             if target_platform not in entry.target_platforms:
                 entry.target_platforms.append(target_platform)

     return srcs.values()

 def select_requirement(requirements, *, platform):
     """A simple function to get a requirement for a particular platform.

     Only used in WORKSPACE.

     Args:
         requirements (list[struct]): The list of requirements as returned by
             the `parse_requirements` function above.
         platform (str or None): The host platform. Usually an output of the
             `host_platform` function. If None, then this function will return
             the first requirement it finds.

     Returns:
         None if not found or a struct returned as one of the values in the
         parse_requirements function. The requirement that should be downloaded
         by the host platform will be returned.
     """
     maybe_requirement = [
         req
         for req in requirements
         if not platform or [p for p in req.target_platforms if p.endswith(platform)]
     ]
     if not maybe_requirement:
         # Sometimes the package is not present for host platform if there
         # are whls specified only in particular requirements files, in that
         # case just continue, however, if the download_only flag is set up,
         # then the user can also specify the target platform of the wheel
         # packages they want to download, in that case there will be always
         # a requirement here, so we will not be in this code branch.
         return None

     return maybe_requirement[0]

 def host_platform(ctx):
     """Return a string representation of the repository OS.

     Only used in WORKSPACE.

     Args:
         ctx (struct): The `module_ctx` or `repository_ctx` attribute.

     Returns:
         The string representation of the platform that we can later used in the `pip`
         machinery.
     """
     return "{}_{}".format(
         repo_utils.get_platforms_os_name(ctx),
         repo_utils.get_platforms_cpu_name(ctx),
     )

 def _add_dists(*, requirement, index_urls, target_platform, logger = None):
     """Populate dists based on the information from the PyPI index.

     This function will modify the given requirements_by_platform data structure.

     Args:
         requirement: The result of parse_requirements function.
         index_urls: The result of simpleapi_download.
         target_platform: The target_platform information.
         logger: A logger for printing diagnostic info.
     """

     if requirement.srcs.url:
         if not requirement.srcs.filename:
             logger.debug(lambda: "Could not detect the filename from the URL, falling back to pip: {}".format(
                 requirement.srcs.url,
             ))
             return None

         # Handle direct URLs in requirements
         dist = struct(
             url = requirement.srcs.url,
             filename = requirement.srcs.filename,
             sha256 = requirement.srcs.shas[0] if requirement.srcs.shas else "",
             yanked = False,
         )

         if dist.filename.endswith(".whl"):
             return dist
         else:
             return dist

     if not index_urls:
         return None

     whls = []
     sdist = None

     # First try to find distributions by SHA256 if provided
     shas_to_use = requirement.srcs.shas
     if not shas_to_use:
         version = requirement.srcs.version
         shas_to_use = index_urls.sha256s_by_version.get(version, [])
         logger.warn(lambda: "requirement file has been generated without hashes, will use all hashes for the given version {} that could find on the index:\n    {}".format(version, shas_to_use))

     for sha256 in shas_to_use:
         # For now if the artifact is marked as yanked we just ignore it.
         #
         # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

         maybe_whl = index_urls.whls.get(sha256)
         if maybe_whl and not maybe_whl.yanked:
             whls.append(maybe_whl)
             continue

         maybe_sdist = index_urls.sdists.get(sha256)
         if maybe_sdist and not maybe_sdist.yanked:
             sdist = maybe_sdist
             continue

         logger.warn(lambda: "Could not find a whl or an sdist with sha256={}".format(sha256))

     yanked = {}
     for dist in whls + [sdist]:
         if dist and dist.yanked:
             yanked.setdefault(dist.yanked, []).append(dist.filename)
     if yanked:
         logger.warn(lambda: "\n".join([
             "the following distributions got yanked:",
         ] + [
             "reason: {}\n  {}".format(reason, "\n".join(sorted(dists)))
             for reason, dists in yanked.items()
         ]))

     if not target_platform:
         # The pipstar platforms are undefined here, so we cannot do any matching
         return sdist

     # Select a single wheel that can work on the target_platform
     return select_whl(
         whls = whls,
         python_version = target_platform.env["python_full_version"],
         implementation_name = target_platform.env["implementation_name"],
         whl_abi_tags = target_platform.whl_abi_tags,
         whl_platform_tags = target_platform.whl_platform_tags,
         logger = logger,
     ) or sdist
	# Copyright 2024 The Bazel Authors. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Requirements parsing for whl_library creation.

	Use cases that the code needs to cover:
	* A single requirements_lock file that is used for the host platform.
	* Per-OS requirements_lock files that are used for the host platform.
	* A target platform specific requirements_lock that is used with extra
	pip arguments with --platform, etc and download_only = True.

	In the last case only a single `requirements_lock` file is allowed, in all
	other cases we assume that there may be a desire to resolve the requirements
	file for the host platform to be backwards compatible with the legacy
	behavior.
	"""

	load("//python/private:normalize_name.bzl", "normalize_name")
	load("//python/private:repo_utils.bzl", "repo_utils")
	load(":index_sources.bzl", "index_sources")
	load(":parse_requirements_txt.bzl", "parse_requirements_txt")
	load(":pep508_requirement.bzl", "requirement")
	load(":select_whl.bzl", "select_whl")

	def parse_requirements(
	ctx,
	*,
	requirements_by_platform = {},
	extra_pip_args = [],
	platforms = {},
	get_index_urls = None,
	evaluate_markers = None,
	extract_url_srcs = True,
	logger):
	"""Get the requirements with platforms that the requirements apply to.

	Args:
	ctx: A context that has .read function that would read contents from a label.
	platforms: The target platform descriptions.
	requirements_by_platform (label_keyed_string_dict): a way to have
	different package versions (or different packages) for different
	os, arch combinations.
	extra_pip_args (string list): Extra pip arguments to perform extra validations and to
	be joined with args found in files.
	get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
	of the distribution URLs from a PyPI index. Accepts ctx and
	distribution names to query.
	evaluate_markers: A function to use to evaluate the requirements.
	Accepts a dict where keys are requirement lines to evaluate against
	the platforms stored as values in the input dict. Returns the same
	dict, but with values being platforms that are compatible with the
	requirements line.
	extract_url_srcs: A boolean to enable extracting URLs from requirement
	lines to enable using bazel downloader.
	logger: repo_utils.logger, a simple struct to log diagnostic messages.

	Returns:
	{type}`dict[str, list[struct]]` where the key is the distribution name and the struct
	contains the following attributes:
	* `distribution`: {type}`str` The non-normalized distribution name.
	* `srcs`: {type}`struct` The parsed requirement line for easier Simple
	API downloading (see `index_sources` return value).
	* `target_platforms`: {type}`list[str]` Target platforms that this package is for.
	The format is `cp3{minor}_{os}_{arch}`.
	* `is_exposed`: {type}`bool` `True` if the package should be exposed via the hub
	repository.
	* `extra_pip_args`: {type}`list[str]` pip args to use in case we are
	not using the bazel downloader to download the archives. This should
	be passed to {obj}`whl_library`.
	* `whls`: {type}`list[struct]` The list of whl entries that can be
	downloaded using the bazel downloader.
	* `sdist`: {type}`list[struct]` The sdist that can be downloaded using
	the bazel downloader.

	The second element is extra_pip_args should be passed to `whl_library`.
	"""
	evaluate_markers = evaluate_markers or (lambda _ctx, _requirements: {})
	options = {}
	requirements = {}
	for file, plats in requirements_by_platform.items():
	logger.trace(lambda: "Using {} for {}".format(file, plats))
	contents = ctx.read(file)

	# Parse the requirements file directly in starlark to get the information
	# needed for the whl_library declarations later.
	parse_result = parse_requirements_txt(contents)

	# Replicate a surprising behavior that WORKSPACE builds allowed:
	# Defining a repo with the same name multiple times, but only the last
	# definition is respected.
	# The requirement lines might have duplicate names because lines for extras
	# are returned as just the base package name. e.g., `foo[bar]` results
	# in an entry like `("foo", "foo[bar] == 1.0 ...")`.
	# Lines with different markers are not condidered duplicates.
	requirements_dict = {}
	for entry in sorted(
	parse_result.requirements,
	# Get the longest match and fallback to original WORKSPACE sorting,
	# which should get us the entry with most extras.
	#
	# FIXME @aignas 2024-05-13: The correct behaviour might be to get an
	# entry with all aggregated extras, but it is unclear if we
	# should do this now.
	key = lambda x: (len(x[1].partition("==")[0]), x),
	):
	req = requirement(entry[1])
	requirements_dict[(req.name, req.version, req.marker)] = entry

	tokenized_options = []
	for opt in parse_result.options:
	for p in opt.split(" "):
	tokenized_options.append(p)

	pip_args = tokenized_options + extra_pip_args
	for plat in plats:
	requirements[plat] = requirements_dict.values()
	options[plat] = pip_args

	requirements_by_platform = {}
	reqs_with_env_markers = {}
	for target_platform, reqs_ in requirements.items():
	extra_pip_args = options[target_platform]

	for distribution, requirement_line in reqs_:
	for_whl = requirements_by_platform.setdefault(
	normalize_name(distribution),
	{},
	)

	if ";" in requirement_line:
	reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform)

	for_req = for_whl.setdefault(
	(requirement_line, ",".join(extra_pip_args)),
	struct(
	distribution = distribution,
	srcs = index_sources(requirement_line),
	requirement_line = requirement_line,
	target_platforms = [],
	extra_pip_args = extra_pip_args,
	),
	)
	for_req.target_platforms.append(target_platform)

	# This may call to Python, so execute it early (before calling to the
	# internet below) and ensure that we call it only once.
	#
	# NOTE @aignas 2024-07-13: in the future, if this is something that we want
	# to do, we could use Python to parse the requirement lines and infer the
	# URL of the files to download things from. This should be important for
	# VCS package references.
	env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers)
	logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
	reqs_with_env_markers,
	env_marker_target_platforms,
	))

	index_urls = {}
	if get_index_urls:
	index_urls = get_index_urls(
	ctx,
	# Use list({}) as a way to have a set
	list({
	req.distribution: None
	for reqs in requirements_by_platform.values()
	for req in reqs.values()
	if not req.srcs.url
	}),
	)

	ret = []
	for name, reqs in sorted(requirements_by_platform.items()):
	requirement_target_platforms = {}
	for r in reqs.values():
	target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
	for p in target_platforms:
	requirement_target_platforms[p] = None

	item = struct(
	# Return normalized names
	name = normalize_name(name),
	is_exposed = len(requirement_target_platforms) == len(requirements),
	is_multiple_versions = len(reqs.values()) > 1,
	srcs = _package_srcs(
	name = name,
	reqs = reqs,
	index_urls = index_urls,
	platforms = platforms,
	env_marker_target_platforms = env_marker_target_platforms,
	extract_url_srcs = extract_url_srcs,
	logger = logger,
	),
	)
	ret.append(item)
	if not item.is_exposed and logger:
	logger.trace(lambda: "Package '{}' will not be exposed because it is only present on a subset of platforms: {} out of {}".format(
	name,
	sorted(requirement_target_platforms),
	sorted(requirements),
	))

	logger.debug(lambda: "Will configure whl repos: {}".format([w.name for w in ret]))

	return ret

	def _package_srcs(
	*,
	name,
	reqs,
	index_urls,
	platforms,
	logger,
	env_marker_target_platforms,
	extract_url_srcs):
	"""A function to return sources for a particular package."""
	srcs = {}
	for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
	if ";" in r.requirement_line:
	target_platforms = env_marker_target_platforms.get(r.requirement_line, [])
	else:
	target_platforms = r.target_platforms
	extra_pip_args = tuple(r.extra_pip_args)

	for target_platform in target_platforms:
	if platforms and target_platform not in platforms:
	fail("The target platform '{}' could not be found in {}".format(
	target_platform,
	platforms.keys(),
	))

	dist = _add_dists(
	requirement = r,
	target_platform = platforms.get(target_platform),
	index_urls = index_urls.get(name),
	logger = logger,
	)
	logger.debug(lambda: "The whl dist is: {}".format(dist.filename if dist else dist))

	if extract_url_srcs and dist:
	req_line = r.srcs.requirement
	else:
	dist = struct(
	url = "",
	filename = "",
	sha256 = "",
	yanked = False,
	)
	req_line = r.srcs.requirement_line

	key = (
	dist.filename,
	req_line,
	extra_pip_args,
	)
	entry = srcs.setdefault(
	key,
	struct(
	distribution = name,
	extra_pip_args = r.extra_pip_args,
	requirement_line = req_line,
	target_platforms = [],
	filename = dist.filename,
	sha256 = dist.sha256,
	url = dist.url,
	yanked = dist.yanked,
	),
	)

	if target_platform not in entry.target_platforms:
	entry.target_platforms.append(target_platform)

	return srcs.values()

	def select_requirement(requirements, *, platform):
	"""A simple function to get a requirement for a particular platform.

	Only used in WORKSPACE.

	Args:
	requirements (list[struct]): The list of requirements as returned by
	the `parse_requirements` function above.
	platform (str or None): The host platform. Usually an output of the
	`host_platform` function. If None, then this function will return
	the first requirement it finds.

	Returns:
	None if not found or a struct returned as one of the values in the
	parse_requirements function. The requirement that should be downloaded
	by the host platform will be returned.
	"""
	maybe_requirement = [
	req
	for req in requirements
	if not platform or [p for p in req.target_platforms if p.endswith(platform)]
	]
	if not maybe_requirement:
	# Sometimes the package is not present for host platform if there
	# are whls specified only in particular requirements files, in that
	# case just continue, however, if the download_only flag is set up,
	# then the user can also specify the target platform of the wheel
	# packages they want to download, in that case there will be always
	# a requirement here, so we will not be in this code branch.
	return None

	return maybe_requirement[0]

	def host_platform(ctx):
	"""Return a string representation of the repository OS.

	Only used in WORKSPACE.

	Args:
	ctx (struct): The `module_ctx` or `repository_ctx` attribute.

	Returns:
	The string representation of the platform that we can later used in the `pip`
	machinery.
	"""
	return "{}_{}".format(
	repo_utils.get_platforms_os_name(ctx),
	repo_utils.get_platforms_cpu_name(ctx),
	)

	def _add_dists(*, requirement, index_urls, target_platform, logger = None):
	"""Populate dists based on the information from the PyPI index.

	This function will modify the given requirements_by_platform data structure.

	Args:
	requirement: The result of parse_requirements function.
	index_urls: The result of simpleapi_download.
	target_platform: The target_platform information.
	logger: A logger for printing diagnostic info.
	"""

	if requirement.srcs.url:
	if not requirement.srcs.filename:
	logger.debug(lambda: "Could not detect the filename from the URL, falling back to pip: {}".format(
	requirement.srcs.url,
	))
	return None

	# Handle direct URLs in requirements
	dist = struct(
	url = requirement.srcs.url,
	filename = requirement.srcs.filename,
	sha256 = requirement.srcs.shas[0] if requirement.srcs.shas else "",
	yanked = False,
	)

	if dist.filename.endswith(".whl"):
	return dist
	else:
	return dist

	if not index_urls:
	return None

	whls = []
	sdist = None

	# First try to find distributions by SHA256 if provided
	shas_to_use = requirement.srcs.shas
	if not shas_to_use:
	version = requirement.srcs.version
	shas_to_use = index_urls.sha256s_by_version.get(version, [])
	logger.warn(lambda: "requirement file has been generated without hashes, will use all hashes for the given version {} that could find on the index:\n {}".format(version, shas_to_use))

	for sha256 in shas_to_use:
	# For now if the artifact is marked as yanked we just ignore it.
	#
	# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

	maybe_whl = index_urls.whls.get(sha256)
	if maybe_whl and not maybe_whl.yanked:
	whls.append(maybe_whl)
	continue

	maybe_sdist = index_urls.sdists.get(sha256)
	if maybe_sdist and not maybe_sdist.yanked:
	sdist = maybe_sdist
	continue

	logger.warn(lambda: "Could not find a whl or an sdist with sha256={}".format(sha256))

	yanked = {}
	for dist in whls + [sdist]:
	if dist and dist.yanked:
	yanked.setdefault(dist.yanked, []).append(dist.filename)
	if yanked:
	logger.warn(lambda: "\n".join([
	"the following distributions got yanked:",
	] + [
	"reason: {}\n {}".format(reason, "\n".join(sorted(dists)))
	for reason, dists in yanked.items()
	]))

	if not target_platform:
	# The pipstar platforms are undefined here, so we cannot do any matching
	return sdist

	# Select a single wheel that can work on the target_platform
	return select_whl(
	whls = whls,
	python_version = target_platform.env["python_full_version"],
	implementation_name = target_platform.env["implementation_name"],
	whl_abi_tags = target_platform.whl_abi_tags,
	whl_platform_tags = target_platform.whl_platform_tags,
	logger = logger,
	) or sdist