refactor(whl_library): split wheel downloading and extraction into separate executions (#1487)

Before the PR the downloading/building of the wheel and the extraction
would be done as a single step, which meant that for patching of the
wheel to happen, we would need to do it within the python script. In
order to have more flexibility in the approach, this PR splits the
process to two separate invocations of the wheel_installer, which
incidentally also helps in a case where the downloading of the wheel
file can happen separately via http_file.

Related issues #1076, #1357
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 5f829a9..207c47a 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -526,10 +526,25 @@
 
     args = _parse_optional_attrs(rctx, args)
 
+    # Manually construct the PYTHONPATH since we cannot use the toolchain here
+    environment = _create_repository_execution_environment(rctx, python_interpreter)
+
     result = rctx.execute(
         args,
-        # Manually construct the PYTHONPATH since we cannot use the toolchain here
-        environment = _create_repository_execution_environment(rctx, python_interpreter),
+        environment = environment,
+        quiet = rctx.attr.quiet,
+        timeout = rctx.attr.timeout,
+    )
+    if result.return_code:
+        fail("whl_library %s failed: %s (%s) error code: '%s'" % (rctx.attr.name, result.stdout, result.stderr, result.return_code))
+
+    whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"])
+    if not rctx.delete("whl_file.json"):
+        fail("failed to delete the whl_file.json file")
+
+    result = rctx.execute(
+        args + ["--whl-file", whl_path],
+        environment = environment,
         quiet = rctx.attr.quiet,
         timeout = rctx.attr.timeout,
     )
@@ -562,6 +577,7 @@
 
     build_file_contents = generate_whl_library_build_bazel(
         repo_prefix = rctx.attr.repo_prefix,
+        whl_name = whl_path.basename,
         dependencies = metadata["deps"],
         data_exclude = rctx.attr.pip_data_exclude,
         tags = [
diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl
index 229a917..f0f5242 100644
--- a/python/pip_install/private/generate_whl_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl
@@ -60,7 +60,7 @@
 
 filegroup(
     name = "{whl_file_label}",
-    srcs = glob(["*.whl"], allow_empty = True),
+    srcs = ["{whl_name}"],
     data = {whl_file_deps},
 )
 
@@ -86,7 +86,9 @@
 """
 
 def generate_whl_library_build_bazel(
+        *,
         repo_prefix,
+        whl_name,
         dependencies,
         data_exclude,
         tags,
@@ -96,6 +98,7 @@
 
     Args:
         repo_prefix: the repo prefix that should be used for dependency lists.
+        whl_name: the whl_name that this is generated for.
         dependencies: a list of PyPI packages that are dependencies to the py_library.
         data_exclude: more patterns to exclude from the data attribute of generated py_library rules.
         tags: list of tags to apply to generated py_library rules.
@@ -166,6 +169,7 @@
                 name = _PY_LIBRARY_LABEL,
                 dependencies = repr(lib_dependencies),
                 data_exclude = repr(_data_exclude),
+                whl_name = whl_name,
                 whl_file_label = _WHEEL_FILE_LABEL,
                 whl_file_deps = repr(whl_file_deps),
                 tags = repr(tags),
diff --git a/python/pip_install/tools/wheel_installer/arguments.py b/python/pip_install/tools/wheel_installer/arguments.py
index aac3c01..25fd30f 100644
--- a/python/pip_install/tools/wheel_installer/arguments.py
+++ b/python/pip_install/tools/wheel_installer/arguments.py
@@ -14,6 +14,7 @@
 
 import argparse
 import json
+import pathlib
 from typing import Any
 
 
@@ -59,6 +60,11 @@
         help="Use 'pip download' instead of 'pip wheel'. Disables building wheels from source, but allows use of "
         "--platform, --python-version, --implementation, and --abi in --extra_pip_args.",
     )
+    parser.add_argument(
+        "--whl-file",
+        type=pathlib.Path,
+        help="Extract a whl file to be used within Bazel.",
+    )
     return parser
 
 
diff --git a/python/pip_install/tools/wheel_installer/wheel_installer.py b/python/pip_install/tools/wheel_installer/wheel_installer.py
index c6c2961..f5ed8c3 100644
--- a/python/pip_install/tools/wheel_installer/wheel_installer.py
+++ b/python/pip_install/tools/wheel_installer/wheel_installer.py
@@ -155,6 +155,18 @@
 
     _configure_reproducible_wheels()
 
+    if args.whl_file:
+        whl = Path(args.whl_file)
+
+        name, extras_for_pkg = _parse_requirement_for_extra(args.requirement)
+        extras = {name: extras_for_pkg} if extras_for_pkg and name else dict()
+        _extract_wheel(
+            wheel_file=whl,
+            extras=extras,
+            enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs,
+        )
+        return
+
     pip_args = (
         [sys.executable, "-m", "pip"]
         + (["--isolated"] if args.isolated else [])
@@ -185,15 +197,10 @@
             if e.errno != errno.ENOENT:
                 raise
 
-    name, extras_for_pkg = _parse_requirement_for_extra(args.requirement)
-    extras = {name: extras_for_pkg} if extras_for_pkg and name else dict()
+    whl = Path(next(iter(glob.glob("*.whl"))))
 
-    whl = next(iter(glob.glob("*.whl")))
-    _extract_wheel(
-        wheel_file=whl,
-        extras=extras,
-        enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs,
-    )
+    with open("whl_file.json", "w") as f:
+        json.dump({"whl_file": f"{whl.resolve()}"}, f)
 
 
 if __name__ == "__main__":
diff --git a/tests/pip_install/whl_library/generate_build_bazel_tests.bzl b/tests/pip_install/whl_library/generate_build_bazel_tests.bzl
index 365233d..b6af0c7 100644
--- a/tests/pip_install/whl_library/generate_build_bazel_tests.bzl
+++ b/tests/pip_install/whl_library/generate_build_bazel_tests.bzl
@@ -38,7 +38,7 @@
 
 filegroup(
     name = "whl",
-    srcs = glob(["*.whl"], allow_empty = True),
+    srcs = ["foo.whl"],
     data = ["@pypi_bar_baz//:whl", "@pypi_foo//:whl"],
 )
 
@@ -64,6 +64,7 @@
 """
     actual = generate_whl_library_build_bazel(
         repo_prefix = "pypi_",
+        whl_name = "foo.whl",
         dependencies = ["foo", "bar-baz"],
         data_exclude = [],
         tags = ["tag1", "tag2"],
@@ -93,7 +94,7 @@
 
 filegroup(
     name = "whl",
-    srcs = glob(["*.whl"], allow_empty = True),
+    srcs = ["foo.whl"],
     data = ["@pypi_bar_baz//:whl", "@pypi_foo//:whl"],
 )
 
@@ -135,6 +136,7 @@
 """
     actual = generate_whl_library_build_bazel(
         repo_prefix = "pypi_",
+        whl_name = "foo.whl",
         dependencies = ["foo", "bar-baz"],
         data_exclude = [],
         tags = ["tag1", "tag2"],
@@ -171,7 +173,7 @@
 
 filegroup(
     name = "whl",
-    srcs = glob(["*.whl"], allow_empty = True),
+    srcs = ["foo.whl"],
     data = ["@pypi_bar_baz//:whl", "@pypi_foo//:whl"],
 )
 
@@ -206,6 +208,7 @@
 """
     actual = generate_whl_library_build_bazel(
         repo_prefix = "pypi_",
+        whl_name = "foo.whl",
         dependencies = ["foo", "bar-baz"],
         data_exclude = [],
         tags = ["tag1", "tag2"],