Incrementally download wheels at workspace time. (#432)
* Create support for lazily fetched repo's.
Refactor pip_repository rule to invoke different scripts based on the
value of the incremental attribute to the rule.
Create a new macro in repositories.bzl which will instantiate all the child repos representing
individual python packages.
Refactor code which is repeated between the parse_requirements_to_bzl
scripts and the extract_wheels script.
diff --git a/.bazelrc b/.bazelrc
index 1afaedb..ddba1f3 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -3,7 +3,7 @@
# This lets us glob() up all the files inside the examples to make them inputs to tests
# (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it)
# To update these lines, run tools/bazel_integration_test/update_deleted_packages.sh
-build --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install
-query --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install
+build --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install,examples/pip_parse
+query --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install,examples/pip_parse
test --test_output=errors
diff --git a/.gitignore b/.gitignore
index dcfa539..cc8decd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,7 @@
bazel-genfiles
bazel-out
bazel-testlogs
+
+# vim swap files
+*.swp
+*.swo
diff --git a/README.md b/README.md
index 591c403..85df2c9 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,7 @@
### Importing `pip` dependencies
-To add pip dependencies to your `WORKSPACE` is you load
+To add pip dependencies to your `WORKSPACE` load
the `pip_install` function, and call it to create the
individual wheel repos.
@@ -136,6 +136,40 @@
updating your system `python` interpreter), you can completely flush out your
repo cache with `bazel clean --expunge`.
+### Fetch `pip` dependencies lazily (experimental)
+
+One pain point with `pip_install` is the need to download all dependencies resolved by
+your requirements.txt before the bazel analysis phase can start. For large python monorepos
+this can take a long time, especially on slow connections.
+
+`pip_parse` provides a solution to this problem. If you can provide a lock
+file of all your python dependencies `pip_parse` will translate each requirement into its own external repository.
+Bazel will only fetch/build wheels for the requirements in the subgraph of your build target.
+
+There are API differences between `pip_parse` and `pip_install`:
+1. `pip_parse` requires a fully resolved lock file of your python dependencies. You can generate this using
+ `pip-compile`, or a virtualenv and `pip freeze`. `pip_parse` uses a label argument called `requirements_lock` instead of `requirements`
+ to make this distinction clear.
+2. `pip_parse` translates your requirements into a starlark macro called `install_deps`. You must call this macro in your WORKSPACE to
+ declare your dependencies.
+
+
+```python
+load("@rules_python//python:pip.bzl", "pip_parse")
+
+# Create a central repo that knows about the dependencies needed from
+# requirements_lock.txt.
+pip_parse(
+ name = "my_deps",
+ requirements_lock = "//path/to:requirements_lock.txt",
+)
+
+# Load the starlark macro which will define your dependencies.
+load("@my_deps//:requirements.bzl", "install_deps")
+# Call it to define repos for your requirements.
+install_deps()
+```
+
### Importing `pip` dependencies with `pip_import` (legacy)
The deprecated `pip_import` can still be used if needed.
diff --git a/examples/BUILD b/examples/BUILD
index 092ad40..5b798d5 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -26,3 +26,8 @@
name = "pip_install_example",
timeout = "long",
)
+
+bazel_integration_test(
+ name = "pip_parse_example",
+ timeout = "long",
+)
diff --git a/examples/pip_parse/BUILD b/examples/pip_parse/BUILD
new file mode 100644
index 0000000..ca56af9
--- /dev/null
+++ b/examples/pip_parse/BUILD
@@ -0,0 +1,42 @@
+load("@pip_parsed_deps//:requirements.bzl", "requirement")
+load("@rules_python//python:defs.bzl", "py_binary", "py_test")
+
+# Toolchain setup, this is optional.
+# Demonstrate that we can use the same python interpreter for the toolchain and executing pip in pip install (see WORKSPACE).
+#
+#load("@rules_python//python:defs.bzl", "py_runtime_pair")
+#
+#py_runtime(
+# name = "python3_runtime",
+# files = ["@python_interpreter//:files"],
+# interpreter = "@python_interpreter//:python_bin",
+# python_version = "PY3",
+# visibility = ["//visibility:public"],
+#)
+#
+#py_runtime_pair(
+# name = "my_py_runtime_pair",
+# py2_runtime = None,
+# py3_runtime = ":python3_runtime",
+#)
+#
+#toolchain(
+# name = "my_py_toolchain",
+# toolchain = ":my_py_runtime_pair",
+# toolchain_type = "@bazel_tools//tools/python:toolchain_type",
+#)
+# End of toolchain setup.
+
+py_binary(
+ name = "main",
+ srcs = ["main.py"],
+ deps = [
+ requirement("requests"),
+ ],
+)
+
+py_test(
+ name = "test",
+ srcs = ["test.py"],
+ deps = [":main"],
+)
diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE
new file mode 100644
index 0000000..418e762
--- /dev/null
+++ b/examples/pip_parse/WORKSPACE
@@ -0,0 +1,39 @@
+workspace(name = "example_repo")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+ name = "rules_python",
+ url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
+ sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
+)
+
+load("@rules_python//python:pip.bzl", "pip_parse")
+
+pip_parse(
+ # (Optional) You can provide extra parameters to pip.
+ # Here, make pip output verbose (this is usable with `quiet = False`).
+ # extra_pip_args = ["-v"],
+
+ # (Optional) You can exclude custom elements in the data section of the generated BUILD files for pip packages.
+ # Exclude directories with spaces in their names in this example (avoids build errors if there are such directories).
+ #pip_data_exclude = ["**/* */**"],
+
+ # (Optional) You can provide a python_interpreter (path) or a python_interpreter_target (a Bazel target, that
+ # acts as an executable). The latter can be anything that could be used as Python interpreter. E.g.:
+ # 1. Python interpreter that you compile in the build file (as above in @python_interpreter).
+ # 2. Pre-compiled python interpreter included with http_archive
+ # 3. Wrapper script, like in the autodetecting python toolchain.
+ #python_interpreter_target = "@python_interpreter//:python_bin",
+
+ # (Optional) You can set quiet to False if you want to see pip output.
+ #quiet = False,
+
+ # Uses the default repository name "pip_incremental"
+ requirements_lock = "//:requirements_lock.txt",
+)
+
+load("@pip_parsed_deps//:requirements.bzl", "install_deps")
+
+# Initialize repositories for all packages in requirements_lock.txt.
+install_deps()
diff --git a/examples/pip_parse/main.py b/examples/pip_parse/main.py
new file mode 100644
index 0000000..79e1c12
--- /dev/null
+++ b/examples/pip_parse/main.py
@@ -0,0 +1,5 @@
+import requests
+
+
+def version():
+ return requests.__version__
diff --git a/examples/pip_parse/requirements.txt b/examples/pip_parse/requirements.txt
new file mode 100644
index 0000000..989b995
--- /dev/null
+++ b/examples/pip_parse/requirements.txt
@@ -0,0 +1 @@
+requests==2.24.0
diff --git a/examples/pip_parse/requirements_lock.txt b/examples/pip_parse/requirements_lock.txt
new file mode 100644
index 0000000..b0d5b9e
--- /dev/null
+++ b/examples/pip_parse/requirements_lock.txt
@@ -0,0 +1,16 @@
+#
+# This file is autogenerated by pip-compile
+# To update, run:
+#
+# pip-compile --output-file=requirements_lock.txt requirements.txt
+#
+certifi==2020.12.5
+ # via requests
+chardet==3.0.4
+ # via requests
+idna==2.10
+ # via requests
+requests==2.24.0
+ # via -r requirements.txt
+urllib3==1.25.11
+ # via requests
diff --git a/examples/pip_parse/test.py b/examples/pip_parse/test.py
new file mode 100644
index 0000000..3cfb9bb
--- /dev/null
+++ b/examples/pip_parse/test.py
@@ -0,0 +1,11 @@
+import unittest
+import main
+
+
+class ExampleTest(unittest.TestCase):
+ def test_main(self):
+ self.assertEqual("2.24.0", main.version())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/pip.bzl b/python/pip.bzl
index 44e4167..32a8901 100644
--- a/python/pip.bzl
+++ b/python/pip.bzl
@@ -56,6 +56,17 @@
**kwargs
)
+def pip_parse(requirements_lock, name = "pip_parsed_deps", **kwargs):
+ # Just in case our dependencies weren't already fetched
+ pip_install_dependencies()
+
+ pip_repository(
+ name = name,
+ requirements_lock = requirements_lock,
+ incremental = True,
+ **kwargs
+ )
+
def pip_repositories():
# buildifier: disable=print
print("DEPRECATED: the pip_repositories rule has been replaced with pip_install, please see rules_python 0.1 release notes")
diff --git a/python/pip_install/BUILD b/python/pip_install/BUILD
index c24e6c7..bd53f5b 100644
--- a/python/pip_install/BUILD
+++ b/python/pip_install/BUILD
@@ -3,6 +3,7 @@
srcs = glob(["*.bzl"]) + [
"BUILD",
"//python/pip_install/extract_wheels:distribution",
+ "//python/pip_install/parse_requirements_to_bzl:distribution",
],
visibility = ["//:__pkg__"],
)
diff --git a/python/pip_install/extract_wheels/__init__.py b/python/pip_install/extract_wheels/__init__.py
index fe8b8ef..96913cd 100644
--- a/python/pip_install/extract_wheels/__init__.py
+++ b/python/pip_install/extract_wheels/__init__.py
@@ -12,7 +12,7 @@
import sys
import json
-from python.pip_install.extract_wheels.lib import bazel, requirements
+from python.pip_install.extract_wheels.lib import bazel, requirements, arguments
def configure_reproducible_wheels() -> None:
@@ -58,25 +58,7 @@
required=True,
help="Path to requirements.txt from where to install dependencies",
)
- parser.add_argument(
- "--repo",
- action="store",
- required=True,
- help="The external repo name to install dependencies. In the format '@{REPO_NAME}'",
- )
- parser.add_argument(
- "--extra_pip_args", action="store", help="Extra arguments to pass down to pip.",
- )
- parser.add_argument(
- "--pip_data_exclude",
- action="store",
- help="Additional data exclusion parameters to add to the pip packages BUILD file.",
- )
- parser.add_argument(
- "--enable_implicit_namespace_pkgs",
- action="store_true",
- help="Disables conversion of implicit namespace packages into pkg-util style packages.",
- )
+ arguments.parse_common_args(parser)
args = parser.parse_args()
pip_args = [sys.executable, "-m", "pip", "--isolated", "wheel", "-r", args.requirements]
@@ -93,10 +75,12 @@
else:
pip_data_exclude = []
+ repo_label = "@%s" % args.repo
+
targets = [
'"%s%s"'
% (
- args.repo,
+ repo_label,
bazel.extract_wheel(
whl, extras, pip_data_exclude, args.enable_implicit_namespace_pkgs
),
@@ -106,5 +90,5 @@
with open("requirements.bzl", "w") as requirement_file:
requirement_file.write(
- bazel.generate_requirements_file_contents(args.repo, targets)
+ bazel.generate_requirements_file_contents(repo_label, targets)
)
diff --git a/python/pip_install/extract_wheels/lib/BUILD b/python/pip_install/extract_wheels/lib/BUILD
index 2a26985..c23d8f3 100644
--- a/python/pip_install/extract_wheels/lib/BUILD
+++ b/python/pip_install/extract_wheels/lib/BUILD
@@ -9,8 +9,12 @@
"purelib.py",
"requirements.py",
"wheel.py",
+ "arguments.py",
],
- visibility = ["//python/pip_install/extract_wheels:__subpackages__"],
+ visibility = [
+ "//python/pip_install/extract_wheels:__subpackages__",
+ "//python/pip_install/parse_requirements_to_bzl:__subpackages__",
+ ],
deps = [
requirement("pkginfo"),
requirement("setuptools"),
@@ -42,6 +46,19 @@
)
py_test(
+ name = "arguments_test",
+ size = "small",
+ srcs = [
+ "arguments_test.py",
+ ],
+ tags = ["unit"],
+ deps = [
+ ":lib",
+ "//python/pip_install/parse_requirements_to_bzl:lib",
+ ],
+)
+
+py_test(
name = "whl_filegroup_test",
size = "small",
srcs = [
diff --git a/python/pip_install/extract_wheels/lib/arguments.py b/python/pip_install/extract_wheels/lib/arguments.py
new file mode 100644
index 0000000..ee9a649
--- /dev/null
+++ b/python/pip_install/extract_wheels/lib/arguments.py
@@ -0,0 +1,24 @@
+from argparse import ArgumentParser
+
+
+def parse_common_args(parser: ArgumentParser) -> ArgumentParser:
+ parser.add_argument(
+ "--repo",
+ action="store",
+ required=True,
+ help="The external repo name to install dependencies. In the format '@{REPO_NAME}'",
+ )
+ parser.add_argument(
+ "--extra_pip_args", action="store", help="Extra arguments to pass down to pip.",
+ )
+ parser.add_argument(
+ "--pip_data_exclude",
+ action="store",
+ help="Additional data exclusion parameters to add to the pip packages BUILD file.",
+ )
+ parser.add_argument(
+ "--enable_implicit_namespace_pkgs",
+ action="store_true",
+ help="Disables conversion of implicit namespace packages into pkg-util style packages.",
+ )
+ return parser
diff --git a/python/pip_install/extract_wheels/lib/arguments_test.py b/python/pip_install/extract_wheels/lib/arguments_test.py
new file mode 100644
index 0000000..0d6a6af
--- /dev/null
+++ b/python/pip_install/extract_wheels/lib/arguments_test.py
@@ -0,0 +1,27 @@
+import argparse
+import json
+import unittest
+
+from python.pip_install.extract_wheels.lib import arguments
+from python.pip_install.parse_requirements_to_bzl import deserialize_structured_args
+
+
+class ArgumentsTestCase(unittest.TestCase):
+ def test_arguments(self) -> None:
+ parser = argparse.ArgumentParser()
+ parser = arguments.parse_common_args(parser)
+ repo_name = "foo"
+ index_url = "--index_url=pypi.org/simple"
+ args_dict = vars(parser.parse_args(
+ args=["--repo", repo_name, "--extra_pip_args={index_url}".format(index_url=json.dumps({"args": index_url}))]))
+ args_dict = deserialize_structured_args(args_dict)
+ self.assertIn("repo", args_dict)
+ self.assertIn("extra_pip_args", args_dict)
+ self.assertEqual(args_dict["pip_data_exclude"], None)
+ self.assertEqual(args_dict["enable_implicit_namespace_pkgs"], False)
+ self.assertEqual(args_dict["repo"], repo_name)
+ self.assertEqual(args_dict["extra_pip_args"], index_url)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/pip_install/extract_wheels/lib/bazel.py b/python/pip_install/extract_wheels/lib/bazel.py
index 0affa20..95e8553 100644
--- a/python/pip_install/extract_wheels/lib/bazel.py
+++ b/python/pip_install/extract_wheels/lib/bazel.py
@@ -2,13 +2,15 @@
import os
import textwrap
import json
-from typing import Iterable, List, Dict, Set
+from typing import Iterable, List, Dict, Set, Optional
import shutil
from python.pip_install.extract_wheels.lib import namespace_pkgs, wheel, purelib
WHEEL_FILE_LABEL = "whl"
+PY_LIBRARY_LABEL = "pkg"
+
def generate_build_file_contents(
name: str, dependencies: List[str], whl_file_deps: List[str], pip_data_exclude: List[str],
@@ -91,6 +93,9 @@
def whl_requirement(name):
return requirement(name) + ":whl"
+
+ def install_deps():
+ fail("install_deps() only works if you are creating an incremental repo. Did you mean to use pip_install_incremental()?")
""".format(
repo=repo_name,
requirement_labels=requirement_labels,
@@ -99,7 +104,17 @@
)
-def sanitise_name(name: str) -> str:
+DEFAULT_PACKAGE_PREFIX = "pypi__"
+
+
+def whl_library_repo_prefix(parent_repo: str) -> str:
+ return "{parent}_{default_package_prefix}".format(
+ parent=parent_repo,
+ default_package_prefix=DEFAULT_PACKAGE_PREFIX
+ )
+
+
+def sanitise_name(name: str, prefix: str = DEFAULT_PACKAGE_PREFIX) -> str:
"""Sanitises the name to be compatible with Bazel labels.
There are certain requirements around Bazel labels that we need to consider. From the Bazel docs:
@@ -116,7 +131,7 @@
See: https://github.com/bazelbuild/bazel/issues/2636
"""
- return "pypi__" + name.replace("-", "_").replace(".", "_").lower()
+ return prefix + name.replace("-", "_").replace(".", "_").lower()
def setup_namespace_pkg_compatibility(wheel_dir: str) -> None:
@@ -135,18 +150,40 @@
"""
namespace_pkg_dirs = namespace_pkgs.implicit_namespace_packages(
- wheel_dir, ignored_dirnames=["%s/bin" % wheel_dir,],
+ wheel_dir, ignored_dirnames=["%s/bin" % wheel_dir],
)
for ns_pkg_dir in namespace_pkg_dirs:
namespace_pkgs.add_pkgutil_style_namespace_pkg_init(ns_pkg_dir)
+def sanitised_library_label(whl_name: str) -> str:
+ return '"//%s"' % sanitise_name(whl_name)
+
+
+def sanitised_file_label(whl_name: str) -> str:
+ return '"//%s:%s"' % (sanitise_name(whl_name), WHEEL_FILE_LABEL)
+
+
+def _whl_name_to_repo_root(whl_name: str, repo_prefix: str) -> str:
+ return "@{}//".format(sanitise_name(whl_name, prefix=repo_prefix))
+
+
+def sanitised_repo_library_label(whl_name: str, repo_prefix: str) -> str:
+ return '"{}:{}"'.format(_whl_name_to_repo_root(whl_name, repo_prefix), PY_LIBRARY_LABEL)
+
+
+def sanitised_repo_file_label(whl_name: str, repo_prefix: str) -> str:
+ return '"{}:{}"'.format(_whl_name_to_repo_root(whl_name, repo_prefix), WHEEL_FILE_LABEL)
+
+
def extract_wheel(
wheel_file: str,
extras: Dict[str, Set[str]],
pip_data_exclude: List[str],
enable_implicit_namespace_pkgs: bool,
+ incremental: bool = False,
+ incremental_repo_prefix: Optional[str] = None,
) -> str:
"""Extracts wheel into given directory and creates py_library and filegroup targets.
@@ -155,17 +192,24 @@
extras: a list of extras to add as dependencies for the installed wheel
pip_data_exclude: list of file patterns to exclude from the generated data section of the py_library
enable_implicit_namespace_pkgs: if true, disables conversion of implicit namespace packages and will unzip as-is
+ incremental: If true the extract the wheel in a format suitable for an external repository. This
+ effects the names of libraries and their dependencies, which point to other external repositories.
+ incremental_repo_prefix: If incremental is true, use this prefix when creating labels from wheel
+ names instead of the default.
Returns:
The Bazel label for the extracted wheel, in the form '//path/to/wheel'.
"""
whl = wheel.Wheel(wheel_file)
- directory = sanitise_name(whl.name)
+ if incremental:
+ directory = "."
+ else:
+ directory = sanitise_name(whl.name)
- os.mkdir(directory)
- # copy the original wheel
- shutil.copy(whl.path, directory)
+ os.mkdir(directory)
+ # copy the original wheel
+ shutil.copy(whl.path, directory)
whl.unzip(directory)
# Note: Order of operations matters here
@@ -177,16 +221,30 @@
extras_requested = extras[whl.name] if whl.name in extras else set()
whl_deps = sorted(whl.dependencies(extras_requested))
- sanitised_dependencies = [
- '"//%s"' % sanitise_name(d) for d in whl_deps
- ]
- sanitised_wheel_file_dependencies = [
- '"//%s:%s"' % (sanitise_name(d), WHEEL_FILE_LABEL) for d in whl_deps
- ]
+ if incremental:
+ # check for mypy Optional validity
+ if incremental_repo_prefix is None:
+ raise TypeError("incremental_repo_prefix arguement cannot be None if incremental == True")
+ sanitised_dependencies = [
+ sanitised_repo_library_label(d, repo_prefix=incremental_repo_prefix) for d in whl_deps
+ ]
+ sanitised_wheel_file_dependencies = [
+ sanitised_repo_file_label(d, repo_prefix=incremental_repo_prefix) for d in whl_deps
+ ]
+ else:
+ sanitised_dependencies = [
+ sanitised_library_label(d) for d in whl_deps
+ ]
+ sanitised_wheel_file_dependencies = [
+ sanitised_file_label(d) for d in whl_deps
+ ]
with open(os.path.join(directory, "BUILD.bazel"), "w") as build_file:
contents = generate_build_file_contents(
- sanitise_name(whl.name), sanitised_dependencies, sanitised_wheel_file_dependencies, pip_data_exclude
+ PY_LIBRARY_LABEL if incremental else sanitise_name(whl.name),
+ sanitised_dependencies,
+ sanitised_wheel_file_dependencies,
+ pip_data_exclude
)
build_file.write(contents)
diff --git a/python/pip_install/extract_wheels/lib/purelib.py b/python/pip_install/extract_wheels/lib/purelib.py
index 99f6299..4e9eb3f 100644
--- a/python/pip_install/extract_wheels/lib/purelib.py
+++ b/python/pip_install/extract_wheels/lib/purelib.py
@@ -27,7 +27,7 @@
return
dot_data_dir = wheel.get_dot_data_directory(wheel_dir)
- # 'Root-Is-Purelib: false' is no guarantee a .date directory exists with
+ # 'Root-Is-Purelib: false' is no guarantee a .data directory exists with
# package code in it. eg. the 'markupsafe' package.
if not dot_data_dir:
return
diff --git a/python/pip_install/parse_requirements_to_bzl/BUILD b/python/pip_install/parse_requirements_to_bzl/BUILD
new file mode 100644
index 0000000..61bde47
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/BUILD
@@ -0,0 +1,40 @@
+load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
+load("//python/pip_install:repositories.bzl", "requirement")
+
+py_binary(
+ name = "parse_requirements_to_bzl",
+ srcs = [
+ "__init__.py",
+ "__main__.py",
+ ],
+ main = "__main__.py",
+ deps = ["//python/pip_install/extract_wheels/lib"],
+)
+
+py_library(
+ name = "lib",
+ srcs = ["__init__.py"],
+ deps = [requirement("pip")],
+ visibility = ["//python/pip_install/extract_wheels:__subpackages__"],
+)
+
+py_test(
+ name = "parse_requirements_to_bzl_test",
+ size = "small",
+ srcs = [
+ "parse_requirements_to_bzl_test.py",
+ ],
+ tags = ["unit"],
+ deps = [
+ ":lib",
+ "//python/pip_install/extract_wheels/lib"
+ ],
+)
+
+filegroup(
+ name = "distribution",
+ srcs = glob(["*"], exclude = ["*_test.py"]) + [
+ "//python/pip_install/parse_requirements_to_bzl/extract_single_wheel:distribution",
+ ],
+ visibility = ["//python/pip_install:__subpackages__"],
+)
diff --git a/python/pip_install/parse_requirements_to_bzl/__init__.py b/python/pip_install/parse_requirements_to_bzl/__init__.py
new file mode 100644
index 0000000..e38f9b0
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/__init__.py
@@ -0,0 +1,132 @@
+import argparse
+import json
+import textwrap
+import sys
+from typing import List, Tuple
+
+from python.pip_install.extract_wheels.lib import bazel, arguments
+from pip._internal.req import parse_requirements, constructors
+from pip._internal.req.req_install import InstallRequirement
+from pip._internal.network.session import PipSession
+
+
+def parse_install_requirements(requirements_lock: str) -> List[InstallRequirement]:
+ return [
+ constructors.install_req_from_parsed_requirement(pr)
+ for pr in parse_requirements(requirements_lock, session=PipSession())
+ ]
+
+
+def repo_names_and_requirements(install_reqs: List[InstallRequirement], repo_prefix: str) -> List[Tuple[str, str]]:
+ return [
+ (
+ bazel.sanitise_name(ir.name, prefix=repo_prefix),
+ str(ir.req)
+ )
+ for ir in install_reqs
+ ]
+
+def deserialize_structured_args(args):
+ """Deserialize structured arguments passed from the starlark rules.
+ Args:
+ args: dict of parsed command line arguments
+ """
+ structured_args = ("extra_pip_args", "pip_data_exclude")
+ for arg_name in structured_args:
+ if args.get(arg_name) is not None:
+ args[arg_name] = json.loads(args[arg_name])["args"]
+ return args
+
+
+def generate_parsed_requirements_contents(all_args: argparse.Namespace) -> str:
+ """
+ Parse each requirement from the requirements_lock file, and prepare arguments for each
+ repository rule, which will represent the individual requirements.
+
+ Generates a requirements.bzl file containing a macro (install_deps()) which instantiates
+ a repository rule for each requirment in the lock file.
+ """
+
+ args = dict(vars(all_args))
+ args = deserialize_structured_args(args)
+ args.setdefault("python_interpreter", sys.executable)
+ # Pop this off because it wont be used as a config argument to the whl_library rule.
+ requirements_lock = args.pop("requirements_lock")
+ repo_prefix = bazel.whl_library_repo_prefix(args["repo"])
+
+ install_reqs = parse_install_requirements(requirements_lock)
+ repo_names_and_reqs = repo_names_and_requirements(install_reqs, repo_prefix)
+ all_requirements = ", ".join(
+ [bazel.sanitised_repo_library_label(ir.name, repo_prefix=repo_prefix) for ir in install_reqs]
+ )
+ all_whl_requirements = ", ".join(
+ [bazel.sanitised_repo_file_label(ir.name, repo_prefix=repo_prefix) for ir in install_reqs]
+ )
+ return textwrap.dedent("""\
+ load("@rules_python//python/pip_install:pip_repository.bzl", "whl_library")
+
+ all_requirements = [{all_requirements}]
+
+ all_whl_requirements = [{all_whl_requirements}]
+
+ _packages = {repo_names_and_reqs}
+ _config = {args}
+
+ def _clean_name(name):
+ return name.replace("-", "_").replace(".", "_").lower()
+
+ def requirement(name):
+ return "@{repo_prefix}" + _clean_name(name) + "//:pkg"
+
+ def whl_requirement(name):
+ return "@{repo_prefix}" + _clean_name(name) + "//:whl"
+
+ def install_deps():
+ for name, requirement in _packages:
+ whl_library(
+ name = name,
+ requirement = requirement,
+ **_config,
+ )
+ """.format(
+ all_requirements=all_requirements,
+ all_whl_requirements=all_whl_requirements,
+ repo_names_and_reqs=repo_names_and_reqs,
+ args=args,
+ repo_prefix=repo_prefix,
+ )
+ )
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Create rules to incrementally fetch needed \
+dependencies from a fully resolved requirements lock file."
+ )
+ parser.add_argument(
+ "--requirements_lock",
+ action="store",
+ required=True,
+ help="Path to fully resolved requirements.txt to use as the source of repos.",
+ )
+ parser.add_argument(
+ "--quiet",
+ type=bool,
+ action="store",
+ required=True,
+ help="Whether to print stdout / stderr from child repos.",
+ )
+ parser.add_argument(
+ "--timeout",
+ type=int,
+ action="store",
+ required=True,
+ help="timeout to use for pip operation.",
+ )
+ arguments.parse_common_args(parser)
+ args = parser.parse_args()
+
+ with open("requirements.bzl", "w") as requirement_file:
+ requirement_file.write(
+ generate_parsed_requirements_contents(args)
+ )
diff --git a/python/pip_install/parse_requirements_to_bzl/__main__.py b/python/pip_install/parse_requirements_to_bzl/__main__.py
new file mode 100644
index 0000000..8919961
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/__main__.py
@@ -0,0 +1,5 @@
+"""Main entry point."""
+from python.pip_install.parse_requirements_to_bzl import main
+
+if __name__ == "__main__":
+ main()
diff --git a/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/BUILD b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/BUILD
new file mode 100644
index 0000000..17bdfe7
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/BUILD
@@ -0,0 +1,8 @@
+filegroup(
+ name = "distribution",
+ srcs = glob(
+ ["*"],
+ exclude = ["*_test.py"],
+ ),
+ visibility = ["//python/pip_install:__subpackages__"],
+)
diff --git a/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__init__.py b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__init__.py
new file mode 100644
index 0000000..d2b9413
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__init__.py
@@ -0,0 +1,51 @@
+import argparse
+import sys
+import glob
+import subprocess
+import json
+
+from python.pip_install.extract_wheels.lib import bazel, requirements, arguments
+from python.pip_install.extract_wheels import configure_reproducible_wheels
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Build and/or fetch a single wheel based on the requirement passed in"
+ )
+ parser.add_argument(
+ "--requirement",
+ action="store",
+ required=True,
+ help="A single PEP508 requirement specifier string.",
+ )
+ arguments.parse_common_args(parser)
+ args = parser.parse_args()
+
+ configure_reproducible_wheels()
+
+ pip_args = [sys.executable, "-m", "pip", "--isolated", "wheel", "--no-deps"]
+ if args.extra_pip_args:
+ pip_args += json.loads(args.extra_pip_args)["args"]
+
+ pip_args.append(args.requirement)
+
+ # Assumes any errors are logged by pip so do nothing. This command will fail if pip fails
+ subprocess.run(pip_args, check=True)
+
+ name, extras_for_pkg = requirements._parse_requirement_for_extra(args.requirement)
+ extras = {name: extras_for_pkg} if extras_for_pkg and name else dict()
+
+ if args.pip_data_exclude:
+ pip_data_exclude = json.loads(args.pip_data_exclude)["exclude"]
+ else:
+ pip_data_exclude = []
+
+ whl = next(iter(glob.glob("*.whl")))
+ bazel.extract_wheel(
+ whl,
+ extras,
+ pip_data_exclude,
+ args.enable_implicit_namespace_pkgs,
+ incremental=True,
+ incremental_repo_prefix=bazel.whl_library_repo_prefix(args.repo)
+ )
diff --git a/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__main__.py b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__main__.py
new file mode 100644
index 0000000..d45f90b
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__main__.py
@@ -0,0 +1,4 @@
+from python.pip_install.parse_requirements_to_bzl.extract_single_wheel import main
+
+if __name__ == "__main__":
+ main()
diff --git a/python/pip_install/parse_requirements_to_bzl/parse_requirements_to_bzl_test.py b/python/pip_install/parse_requirements_to_bzl/parse_requirements_to_bzl_test.py
new file mode 100644
index 0000000..4b474d4
--- /dev/null
+++ b/python/pip_install/parse_requirements_to_bzl/parse_requirements_to_bzl_test.py
@@ -0,0 +1,39 @@
+import unittest
+import argparse
+import json
+from tempfile import NamedTemporaryFile
+
+from python.pip_install.parse_requirements_to_bzl import generate_parsed_requirements_contents
+from python.pip_install.extract_wheels.lib.bazel import (
+ sanitised_repo_library_label,
+ whl_library_repo_prefix,
+ sanitised_repo_file_label
+)
+
+
+class TestParseRequirementsToBzl(unittest.TestCase):
+
+ def test_generated_requirements_bzl(self) -> None:
+ with NamedTemporaryFile() as requirements_lock:
+ requirement_string = "foo==0.0.0"
+ requirements_lock.write(bytes(requirement_string, encoding="utf-8"))
+ requirements_lock.flush()
+ args = argparse.Namespace()
+ args.requirements_lock = requirements_lock.name
+ args.repo = "pip_parsed_deps"
+ extra_pip_args = ["--index-url=pypi.org/simple"]
+ args.extra_pip_args = json.dumps({"args": extra_pip_args})
+ contents = generate_parsed_requirements_contents(args)
+ library_target = "@pip_parsed_deps_pypi__foo//:pkg"
+ whl_target = "@pip_parsed_deps_pypi__foo//:whl"
+ all_requirements = 'all_requirements = ["{library_target}"]'.format(library_target=library_target)
+ all_whl_requirements = 'all_whl_requirements = ["{whl_target}"]'.format(whl_target=whl_target)
+ self.assertIn(all_requirements, contents, contents)
+ self.assertIn(all_whl_requirements, contents, contents)
+ self.assertIn(requirement_string, contents, contents)
+ self.assertIn(requirement_string, contents, contents)
+ self.assertIn("'extra_pip_args': {}".format(repr(extra_pip_args)), contents, contents)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index b2cb0a8..c07c9cd 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -2,17 +2,16 @@
load("//python/pip_install:repositories.bzl", "all_requirements")
-def _pip_repository_impl(rctx):
- python_interpreter = rctx.attr.python_interpreter
- if rctx.attr.python_interpreter_target != None:
- target = rctx.attr.python_interpreter_target
- python_interpreter = rctx.path(target)
- else:
- if "/" not in python_interpreter:
- python_interpreter = rctx.which(python_interpreter)
- if not python_interpreter:
- fail("python interpreter not found")
+def _construct_pypath(rctx):
+ """Helper function to construct a PYTHONPATH.
+ Contains entries for code in this repo as well as packages downloaded from //python/pip_install:repositories.bzl.
+ This allows us to run python code inside repository rule implementations.
+
+ Args:
+ rctx: Handle to the repository_context.
+ Returns: String of the PYTHONPATH.
+ """
rctx.file("BUILD", "")
# Get the root directory of these rules
@@ -24,17 +23,16 @@
]
separator = ":" if not "windows" in rctx.os.name.lower() else ";"
pypath = separator.join([str(p) for p in [rules_root] + thirdparty_roots])
+ return pypath
- args = [
- python_interpreter,
- "-m",
- "python.pip_install.extract_wheels",
- "--requirements",
- rctx.path(rctx.attr.requirements),
- "--repo",
- "@%s" % rctx.attr.name,
- ]
+def _parse_optional_attrs(rctx, args):
+ """Helper function to parse common attributes of pip_repository and whl_library repository rules.
+ Args:
+ rctx: Handle to the rule repository context.
+ args: A list of parsed args for the rule.
+ Returns: Augmented args list.
+ """
if rctx.attr.extra_pip_args:
args += [
"--extra_pip_args",
@@ -50,6 +48,49 @@
if rctx.attr.enable_implicit_namespace_pkgs:
args.append("--enable_implicit_namespace_pkgs")
+ return args
+
+def _pip_repository_impl(rctx):
+ python_interpreter = rctx.attr.python_interpreter
+ if rctx.attr.python_interpreter_target != None:
+ target = rctx.attr.python_interpreter_target
+ python_interpreter = rctx.path(target)
+ else:
+ if "/" not in python_interpreter:
+ python_interpreter = rctx.which(python_interpreter)
+ if not python_interpreter:
+ fail("python interpreter not found")
+
+ if rctx.attr.incremental and not rctx.attr.requirements_lock:
+ fail("Incremental mode requires a requirements_lock attribute be specified.")
+
+ pypath = _construct_pypath(rctx)
+
+ if rctx.attr.incremental:
+ args = [
+ python_interpreter,
+ "-m",
+ "python.pip_install.parse_requirements_to_bzl",
+ "--requirements_lock",
+ rctx.path(rctx.attr.requirements_lock),
+ # pass quiet and timeout args through to child repos.
+ "--quiet",
+ str(rctx.attr.quiet),
+ "--timeout",
+ str(rctx.attr.timeout),
+ ]
+ else:
+ args = [
+ python_interpreter,
+ "-m",
+ "python.pip_install.extract_wheels",
+ "--requirements",
+ rctx.path(rctx.attr.requirements),
+ ]
+
+ args += ["--repo", rctx.attr.name]
+ args = _parse_optional_attrs(rctx, args)
+
result = rctx.execute(
args,
environment = {
@@ -59,52 +100,73 @@
timeout = rctx.attr.timeout,
quiet = rctx.attr.quiet,
)
+
if result.return_code:
- fail("rules_python_external failed: %s (%s)" % (result.stdout, result.stderr))
+ fail("rules_python failed: %s (%s)" % (result.stdout, result.stderr))
return
-pip_repository = repository_rule(
- attrs = {
- "enable_implicit_namespace_pkgs": attr.bool(
- default = False,
- doc = """
+common_attrs = {
+ "enable_implicit_namespace_pkgs": attr.bool(
+ default = False,
+ doc = """
If true, disables conversion of native namespace packages into pkg-util style namespace packages. When set all py_binary
and py_test targets must specify either `legacy_create_init=False` or the global Bazel option
`--incompatible_default_to_explicit_init_py` to prevent `__init__.py` being automatically generated in every directory.
This option is required to support some packages which cannot handle the conversion to pkg-util style.
""",
- ),
- "extra_pip_args": attr.string_list(
- doc = "Extra arguments to pass on to pip. Must not contain spaces.",
- ),
- "pip_data_exclude": attr.string_list(
- doc = "Additional data exclusion parameters to add to the pip packages BUILD file.",
- ),
- "python_interpreter": attr.string(default = "python3"),
- "python_interpreter_target": attr.label(allow_single_file = True, doc = """
+ ),
+ "extra_pip_args": attr.string_list(
+ doc = "Extra arguments to pass on to pip. Must not contain spaces.",
+ ),
+ "pip_data_exclude": attr.string_list(
+ doc = "Additional data exclusion parameters to add to the pip packages BUILD file.",
+ ),
+ "python_interpreter": attr.string(default = "python3"),
+ "python_interpreter_target": attr.label(
+ allow_single_file = True,
+ doc = """
If you are using a custom python interpreter built by another repository rule,
use this attribute to specify its BUILD target. This allows pip_repository to invoke
pip using the same interpreter as your toolchain. If set, takes precedence over
python_interpreter.
-"""),
- "quiet": attr.bool(
- default = True,
- doc = "If True, suppress printing stdout and stderr output to the terminal.",
- ),
- "requirements": attr.label(
- allow_single_file = True,
- mandatory = True,
- doc = "A 'requirements.txt' pip requirements file.",
- ),
- # 600 is documented as default here: https://docs.bazel.build/versions/master/skylark/lib/repository_ctx.html#execute
- "timeout": attr.int(
- default = 600,
- doc = "Timeout (in seconds) on the rule's execution duration.",
- ),
- },
- implementation = _pip_repository_impl,
+""",
+ ),
+ "quiet": attr.bool(
+ default = True,
+ doc = "If True, suppress printing stdout and stderr output to the terminal.",
+ ),
+ # 600 is documented as default here: https://docs.bazel.build/versions/master/skylark/lib/repository_ctx.html#execute
+ "timeout": attr.int(
+ default = 600,
+ doc = "Timeout (in seconds) on the rule's execution duration.",
+ ),
+}
+
+pip_repository_attrs = {
+ "incremental": attr.bool(
+ default = False,
+ doc = "Create the repository in incremental mode.",
+ ),
+ "requirements": attr.label(
+ allow_single_file = True,
+ doc = "A 'requirements.txt' pip requirements file.",
+ ),
+ "requirements_lock": attr.label(
+ allow_single_file = True,
+ doc = """
+A fully resolved 'requirements.txt' pip requirement file containing the transitive set of your dependencies. If this file is passed instead
+of 'requirements' no resolve will take place and pip_repository will create individual repositories for each of your dependencies so that
+wheels are fetched/built only for the targets specified by 'build/run/test'.
+""",
+ ),
+}
+
+pip_repository_attrs.update(**common_attrs)
+
+pip_repository = repository_rule(
+ attrs = pip_repository_attrs,
doc = """A rule for importing `requirements.txt` dependencies into Bazel.
This rule imports a `requirements.txt` file and generates a new
@@ -144,4 +206,55 @@
)
```
""",
+ implementation = _pip_repository_impl,
+)
+
+def _impl_whl_library(rctx):
+ # pointer to parent repo so these rules rerun if the definitions in requirements.bzl change.
+ _parent_repo_label = Label("@{parent}//:requirements.bzl".format(parent=rctx.attr.repo))
+ pypath = _construct_pypath(rctx)
+ args = [
+ rctx.attr.python_interpreter,
+ "-m",
+ "python.pip_install.parse_requirements_to_bzl.extract_single_wheel",
+ "--requirement",
+ rctx.attr.requirement,
+ "--repo",
+ rctx.attr.repo,
+ ]
+ args = _parse_optional_attrs(rctx, args)
+ result = rctx.execute(
+ args,
+ environment = {
+ # Manually construct the PYTHONPATH since we cannot use the toolchain here
+ "PYTHONPATH": pypath,
+ },
+ quiet = rctx.attr.quiet,
+ timeout = rctx.attr.timeout,
+ )
+
+ if result.return_code:
+ fail("whl_library %s failed: %s (%s)" % (rctx.attr.name, result.stdout, result.stderr))
+
+ return
+
+whl_library_attrs = {
+ "repo": attr.string(
+ mandatory = True,
+ doc = "Pointer to parent repo name. Used to make these rules rerun if the parent repo changes.",
+ ),
+ "requirement": attr.string(
+ mandatory = True,
+ doc = "Python requirement string describing the package to make available",
+ ),
+}
+
+whl_library_attrs.update(**common_attrs)
+
+whl_library = repository_rule(
+ attrs = whl_library_attrs,
+ doc = """
+Download and extracts a single wheel based into a bazel repo based on the requirement string passed in.
+Instantiated from pip_repository and inherits config options from there.""",
+ implementation = _impl_whl_library,
)