blob: e01ebd3506eadf1da1140d52e4b55b7a944a4722 [file]
"""Definitions for the modules_mapping.json generation.
The modules_mapping.json file is a mapping from Python modules to the wheel
names that provide those modules. It is used for determining which wheel
distribution should be used in the `deps` attribute of `py_*` targets.
This mapping is necessary when reading Python import statements and determining
if they are provided by third-party dependencies. Most importantly, when the
module name doesn't match the wheel distribution name.
Currently, this module only works with requirements.txt files locked using
pip-tools (https://github.com/jazzband/pip-tools) with hashes. This is necessary
in order to keep downloaded wheels in the Bazel cache. Also, the
modules_mapping rule does not consider extras as specified by PEP 508.
"""
# _modules_mapping_impl is the root entry for the modules_mapping rule
# implementation.
def _modules_mapping_impl(rctx):
requirements_data = rctx.read(rctx.attr.requirements)
python_interpreter = _get_python_interpreter(rctx)
pythonpath = "{}/__pythonpath".format(rctx.path(""))
res = rctx.execute(
[
python_interpreter,
"-m",
"pip",
"--verbose",
"--isolated",
"install",
"--target={}".format(pythonpath),
"--upgrade",
"--no-build-isolation",
"--no-cache-dir",
"--disable-pip-version-check",
"--index-url={}".format(rctx.attr.pip_index_url),
"build=={}".format(rctx.attr.build_wheel_version),
"setuptools=={}".format(rctx.attr.setuptools_wheel_version),
],
quiet = rctx.attr.quiet,
timeout = rctx.attr.install_build_timeout,
)
if res.return_code != 0:
fail(res.stderr)
parsed_requirements = _parse_requirements_txt(requirements_data)
wheels = _get_wheels(rctx, python_interpreter, pythonpath, parsed_requirements)
res = rctx.execute(
[
python_interpreter,
rctx.path(rctx.attr._generator),
] + wheels,
quiet = rctx.attr.quiet,
timeout = rctx.attr.generate_timeout,
)
if res.return_code != 0:
fail(res.stderr)
rctx.file("modules_mapping.json", content = res.stdout)
rctx.file("print.sh", content = "#!/usr/bin/env bash\ncat $1", executable = True)
rctx.file("BUILD", """\
exports_files(["modules_mapping.json"])
sh_binary(
name = "print",
srcs = ["print.sh"],
data = [":modules_mapping.json"],
args = ["$(rootpath :modules_mapping.json)"],
)
""")
# _get_python_interpreter determines whether the system or the user-provided
# Python interpreter should be used and returns the path to be called.
def _get_python_interpreter(rctx):
if rctx.attr.python_interpreter == None:
return "python"
return rctx.path(rctx.attr.python_interpreter)
# _parse_requirements_txt parses the requirements.txt data into structs with the
# information needed to download them using Bazel.
def _parse_requirements_txt(data):
result = []
lines = data.split("\n")
current_requirement = ""
continue_previous_line = False
for line in lines:
# Ignore empty lines and comments.
if len(line) == 0 or line.startswith("#"):
continue
line = line.strip()
stripped_backslash = False
if line.endswith("\\"):
line = line[:-1]
stripped_backslash = True
# If this line is a continuation of the previous one, append the current
# line to the current requirement being processed, otherwise, start a
# new requirement.
if continue_previous_line:
current_requirement += line
else:
current_requirement = line
# Control whether the next line in the requirements.txt should be a
# continuation of the current requirement being processed or not.
continue_previous_line = stripped_backslash
if not continue_previous_line:
result.append(_parse_requirement(current_requirement))
return result
# _parse_requirement parses a single requirement line.
def _parse_requirement(requirement_line):
split = requirement_line.split("==")
requirement = {}
# Removing the extras (https://www.python.org/dev/peps/pep-0508/#extras)
# from the requirement name is fine since it's expected that the
# requirements.txt was compiled with pip-tools, which includes the extras as
# direct dependencies.
name = _remove_extras_from_name(split[0])
requirement["name"] = name
if len(split) == 1:
return struct(**requirement)
split = split[1].split(" ")
requirement["version"] = split[0]
if len(split) == 1:
return struct(**requirement)
args = split[1:]
hashes = []
for arg in args:
arg = arg.strip()
# Skip empty arguments.
if len(arg) == 0:
continue
# Halt processing if it hits a comment.
if arg.startswith("#"):
break
if arg.startswith("--hash="):
hashes.append(arg[len("--hash="):])
requirement["hashes"] = hashes
return struct(**requirement)
# _remove_extras_from_name removes the [extras] from a requirement.
# https://www.python.org/dev/peps/pep-0508/#extras
def _remove_extras_from_name(name):
bracket_index = name.find("[")
if bracket_index == -1:
return name
return name[:bracket_index]
# _get_wheels returns the wheel distributions for the given requirements. It
# uses a few different strategies depending on whether compiled wheel
# distributions exist on the remote index or not. The order in which it
# operates:
#
# 1. Try to use the platform-independent compiled wheel (*-none-any.whl).
# 2. Try to use the first match of the linux-dependent compiled wheel from the
# sorted releases list. This is valid as it's deterministic and the Python
# extension for Gazelle doesn't support other platform-specific wheels
# (one must use manual means to accomplish platform-specific dependency
# resolution).
# 3. Use the published source for the wheel.
def _get_wheels(rctx, python_interpreter, pythonpath, requirements):
wheels = []
to_build = []
for requirement in requirements:
if not hasattr(requirement, "hashes"):
if hasattr(requirement, "name") and requirement.name.startswith("#"):
# This is a comment in the requirements file.
continue
else:
fail("missing requirement hash for {}-{}: use pip-tools to produce a locked file".format(
requirement.name,
requirement.version,
))
wheel = {}
wheel["name"] = requirement.name
requirement_info_url = "{index_base}/{name}/{version}/json".format(
index_base = rctx.attr.index_base,
name = requirement.name,
version = requirement.version,
)
requirement_info_path = "{}_info.json".format(requirement.name)
# TODO(f0rmiga): if the logs are too spammy, use rctx.execute with
# Python to perform the downloads since it's impossible to get the
# checksums of these JSON files and there's no option to mute Bazel
# here.
rctx.download(requirement_info_url, output = requirement_info_path)
requirement_info = json.decode(rctx.read(requirement_info_path))
if requirement.version in requirement_info["releases"]:
wheel["version"] = requirement.version
elif requirement.version.endswith(".0") and requirement.version[:-len(".0")] in requirement_info["releases"]:
wheel["version"] = requirement.version[:-len(".0")]
else:
fail("missing requirement version \"{}\" for wheel \"{}\" in fetched releases: available {}".format(
requirement.version,
requirement.name,
[version for version in requirement_info["releases"]],
))
releases = sorted(requirement_info["releases"][wheel["version"]], key = _sort_release_by_url)
(wheel_url, sha256) = _search_url(releases, "-none-any.whl")
# TODO(f0rmiga): handle PEP 600.
# https://www.python.org/dev/peps/pep-0600/
if not wheel_url:
# Search for the Linux tag as defined in PEP 599.
(wheel_url, sha256) = _search_url(releases, "manylinux2014_x86_64")
if not wheel_url:
# Search for the Linux tag as defined in PEP 571.
(wheel_url, sha256) = _search_url(releases, "manylinux2010_x86_64")
if not wheel_url:
# Search for the Linux tag as defined in PEP 513.
(wheel_url, sha256) = _search_url(releases, "manylinux1_x86_64")
if not wheel_url:
# Search for the MacOS tag
(wheel_url, sha256) = _search_url(releases, "macosx_10_9_x86_64")
if wheel_url:
wheel_path = wheel_url.split("/")[-1]
rctx.download(wheel_url, output = wheel_path, sha256 = sha256)
wheel["path"] = wheel_path
else:
extension = ".tar.gz"
(src_url, sha256) = _search_url(releases, extension)
if not src_url:
extension = ".zip"
(src_url, sha256) = _search_url(releases, extension)
if not src_url:
fail("requirement URL for {}-{} not found".format(requirement.name, wheel["version"]))
rctx.download_and_extract(src_url, sha256 = sha256)
sanitized_name = requirement.name.lower().replace("-", "_")
requirement_path = src_url.split("/")[-1]
requirement_path = requirement_path[:-len(extension)]
# The resulting filename for the .whl file is not feasible to
# predict as it has too many variations, so we defer it to the
# Python globing to find the right file name since only one .whl
# file should be generated by the compilation.
wheel_path = "{}/**/*.whl".format(requirement_path)
wheel["path"] = wheel_path
to_build.append(requirement_path)
wheels.append(json.encode(wheel))
if len(to_build) > 0:
res = rctx.execute(
[python_interpreter, rctx.path(rctx.attr._builder)] + to_build,
quiet = rctx.attr.quiet,
environment = {
# To avoid use local "pip.conf"
"HOME": str(rctx.path("").realpath),
# Make uses of pip to use the requested index
"PIP_INDEX_URL": rctx.attr.pip_index_url,
"PYTHONPATH": pythonpath,
},
)
if res.return_code != 0:
fail(res.stderr)
return wheels
# _sort_release_by_url is the custom function for the key property of the sorted
# releases.
def _sort_release_by_url(release):
return release["url"]
# _search_url searches for a release in the list of releases that has a url
# matching the provided extension.
def _search_url(releases, extension):
for release in releases:
url = release["url"]
if url.find(extension) >= 0:
return (url, release["digests"]["sha256"])
return (None, None)
modules_mapping = repository_rule(
_modules_mapping_impl,
attrs = {
"build_wheel_version": attr.string(
default = "0.5.1",
doc = "The build wheel version.",
),
"generate_timeout": attr.int(
default = 30,
doc = "The timeout for the generator.py command.",
),
"index_base": attr.string(
default = "https://pypi.org/pypi",
doc = "The base URL used for querying releases data as JSON.",
),
"install_build_timeout": attr.int(
default = 30,
doc = "The timeout for the `pip install build` command.",
),
"pip_index_url": attr.string(
default = "https://pypi.python.org/simple",
doc = "The index URL used for any pip install actions",
),
"python_interpreter": attr.label(
allow_single_file = True,
doc = "If set, uses the custom-built Python interpreter, otherwise, uses the system one.",
),
"quiet": attr.bool(
default = True,
doc = "Toggle this attribute to get verbose output from this rule.",
),
"requirements": attr.label(
allow_single_file = True,
doc = "The requirements.txt file with hashes locked using pip-tools.",
mandatory = True,
),
"setuptools_wheel_version": attr.string(
default = "v57.5.0",
doc = "The setuptools wheel version.",
),
"_builder": attr.label(
allow_single_file = True,
default = "//gazelle/modules_mapping:builder.py",
),
"_generator": attr.label(
allow_single_file = True,
default = "//gazelle/modules_mapping:generator.py",
),
},
doc = "Creates a modules_mapping.json file for mapping module names to wheel distribution names.",
)