refactor(pypi): extract the wheel without python (#3430)
With this we start extracting the wheel without Python and
it becomes a requirement only when patching (we will extract
the wheel without Python, patch it and then re-compress it
which makes a very inefficient process).
This should result in much faster executions because we can start
extracting wheels even before we fetch the entire Python toolchain
and we don't need to fetch it in a wheel-only setup until we are
actually building/executing tests. What is more bazel is faster
in extracting everything.
Work towards #2948
diff --git a/python/private/internal_config_repo.bzl b/python/private/internal_config_repo.bzl
index b208037..91f786c 100644
--- a/python/private/internal_config_repo.bzl
+++ b/python/private/internal_config_repo.bzl
@@ -32,6 +32,7 @@
enable_pystar = True,
enable_pipstar = {enable_pipstar},
enable_deprecation_warnings = {enable_deprecation_warnings},
+ bazel_8_or_later = {bazel_8_or_later},
bazel_9_or_later = {bazel_9_or_later},
BuiltinPyInfo = getattr(getattr(native, "legacy_globals", None), "PyInfo", {builtin_py_info_symbol}),
BuiltinPyRuntimeInfo = getattr(getattr(native, "legacy_globals", None), "PyRuntimeInfo", {builtin_py_runtime_info_symbol}),
@@ -107,6 +108,7 @@
builtin_py_info_symbol = builtin_py_info_symbol,
builtin_py_runtime_info_symbol = builtin_py_runtime_info_symbol,
builtin_py_cc_link_params_provider = builtin_py_cc_link_params_provider,
+ bazel_8_or_later = str(bazel_major_version >= 8),
bazel_9_or_later = str(bazel_major_version >= 9),
))
diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl
index 69359bc..1378e2f 100644
--- a/python/private/pypi/hub_builder.bzl
+++ b/python/private/pypi/hub_builder.bzl
@@ -440,6 +440,9 @@
pip_attr = pip_attr,
enable_pipstar = enable_pipstar,
)
+
+ interpreter = _detect_interpreter(self, pip_attr)
+
for whl in requirements_by_platform:
whl_library_args = common_args | _whl_library_args(
self,
@@ -456,6 +459,7 @@
auth_patterns = self._config.auth_patterns or pip_attr.auth_patterns,
python_version = _major_minor_version(pip_attr.python_version),
is_multiple_versions = whl.is_multiple_versions,
+ interpreter = interpreter,
enable_pipstar = enable_pipstar,
)
_add_whl_library(
@@ -467,8 +471,6 @@
)
def _common_args(self, module_ctx, *, pip_attr, enable_pipstar):
- interpreter = _detect_interpreter(self, pip_attr)
-
# Construct args separately so that the lock file can be smaller and does not include unused
# attrs.
whl_library_args = dict(
@@ -483,8 +485,6 @@
environment = pip_attr.environment,
envsubst = pip_attr.envsubst,
pip_data_exclude = pip_attr.pip_data_exclude,
- python_interpreter = interpreter.path,
- python_interpreter_target = interpreter.target,
)
if not enable_pipstar:
maybe_args["experimental_target_platforms"] = pip_attr.experimental_target_platforms
@@ -536,6 +536,7 @@
auth_patterns,
python_version,
use_downloader,
+ interpreter,
enable_pipstar = False):
args = dict(whl_library_args)
args["requirement"] = src.requirement_line
@@ -548,6 +549,12 @@
# need to pass the extra args there, so only pop this for whls
args["extra_pip_args"] = src.extra_pip_args
+ if "whl_patches" in args or not (enable_pipstar and is_whl):
+ if interpreter.path:
+ args["python_interpreter"] = interpreter.path
+ if interpreter.target:
+ args["python_interpreter_target"] = interpreter.target
+
if not src.url or (not is_whl and download_only):
if download_only and use_downloader:
# If the user did not allow using sdists and we are using the downloader
diff --git a/python/private/pypi/patch_whl.bzl b/python/private/pypi/patch_whl.bzl
index 7af9c4d..e315989 100644
--- a/python/private/pypi/patch_whl.bzl
+++ b/python/private/pypi/patch_whl.bzl
@@ -87,6 +87,8 @@
# symlink to a zip file to use bazel's extract so that we can use bazel's
# repository_ctx patch implementation. The whl file may be in a different
# external repository.
+ #
+ # TODO @aignas 2025-11-24: remove this symlinking workaround when we drop support for bazel 7
whl_file_zip = whl_input.basename + ".zip"
rctx.symlink(whl_input, whl_file_zip)
rctx.extract(whl_file_zip)
diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl
index 5db7bc4..6b515a5 100644
--- a/python/private/pypi/whl_library.bzl
+++ b/python/private/pypi/whl_library.bzl
@@ -377,21 +377,17 @@
#
# Remove non-pipstar and config_load check when we release rules_python 2.
if enable_pipstar:
- pypi_repo_utils.execute_checked(
- rctx,
- op = "whl_library.ExtractWheel({}, {})".format(rctx.attr.name, whl_path),
- python = python_interpreter,
- arguments = args + [
- "--whl-file",
- whl_path,
- "--enable-pipstar",
- ],
- srcs = rctx.attr._python_srcs,
- environment = environment,
- quiet = rctx.attr.quiet,
- timeout = rctx.attr.timeout,
- logger = logger,
+ if rp_config.bazel_8_or_later:
+ extract_path = whl_path
+ else:
+ extract_path = rctx.path(whl_path.basename + ".zip")
+ rctx.symlink(whl_path, extract_path)
+ rctx.extract(
+ archive = extract_path,
+ output = "site-packages",
)
+ if not rp_config.bazel_8_or_later:
+ rctx.delete(extract_path)
metadata = whl_metadata(
install_dir = whl_path.dirname.get_child("site-packages"),
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index a0ab919..414ad12 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -44,6 +44,7 @@
debug = False,
config = None,
minor_mapping = {},
+ whl_overrides = {},
evaluate_markers_fn = None,
simpleapi_download_fn = None,
available_interpreters = {}):
@@ -76,7 +77,7 @@
netrc = None,
auth_patterns = None,
),
- whl_overrides = {},
+ whl_overrides = whl_overrides,
minor_mapping = minor_mapping or {"3.15": "3.15.19"},
available_interpreters = available_interpreters or {
"python_3_15_host": "unit_test_interpreter_target",
@@ -320,7 +321,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "simple-0.0.1-py3-none-any.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "simple[foo]==0.0.1",
"sha256": "deadbeef",
"urls": ["https://example.com/simple-0.0.1-py3-none-any.whl"],
@@ -329,7 +329,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "simple-0.0.1-py3-none-any.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "simple==0.0.1",
"sha256": "deadbeef",
"urls": ["https://example.com/simple-0.0.1-py3-none-any.whl"],
@@ -656,7 +655,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "torch-2.4.1+cpu-cp312-cp312-linux_x86_64.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "torch==2.4.1+cpu",
"sha256": "8800deef0026011d502c0c256cc4b67d002347f63c3a38cd8e45f1f445c61364",
"urls": ["https://torch.index/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"],
@@ -665,7 +663,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "torch-2.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "torch==2.4.1",
"sha256": "36109432b10bd7163c9b30ce896f3c2cca1b86b9765f956a1594f0ff43091e2a",
"urls": ["https://torch.index/whl/cpu/torch-2.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"],
@@ -674,7 +671,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "torch-2.4.1+cpu-cp312-cp312-win_amd64.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "torch==2.4.1+cpu",
"sha256": "3a570e5c553415cdbddfe679207327b3a3806b21c6adea14fba77684d1619e97",
"urls": ["https://torch.index/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-win_amd64.whl"],
@@ -683,7 +679,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "torch==2.4.1",
"sha256": "72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d",
"urls": ["https://torch.index/whl/cpu/torch-2.4.1-cp312-none-macosx_11_0_arm64.whl"],
@@ -845,6 +840,11 @@
builder = hub_builder(
env,
simpleapi_download_fn = mocksimpleapi_download,
+ whl_overrides = {
+ "direct_without_sha": {
+ "my_patch": 1,
+ },
+ },
)
builder.pip_parse(
_mock_mctx(
@@ -1003,6 +1003,10 @@
"requirement": "direct_without_sha==0.0.1",
"sha256": "",
"urls": ["example-direct.org/direct_without_sha-0.0.1-py3-none-any.whl"],
+ # NOTE @aignas 2025-11-24: any patching still requires the python interpreter from the
+ # hermetic toolchain or the system. This is so that we can rezip it back to a wheel and
+ # verify the metadata so that it is installable by any installer out there.
+ "whl_patches": {"my_patch": "1"},
},
"pypi_315_git_dep": {
"config_load": "@pypi//:config.bzl",
@@ -1022,7 +1026,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "plat-pkg-0.0.4-py3-none-linux_x86_64.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "plat_pkg==0.0.4",
"sha256": "deadb44f",
"urls": ["example2.org/index/plat_pkg/"],
@@ -1031,7 +1034,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "simple-0.0.1-py3-none-any.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "simple==0.0.1",
"sha256": "deadb00f",
"urls": ["example2.org"],
@@ -1040,7 +1042,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "some_pkg-0.0.1-py3-none-any.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "some_pkg==0.0.1",
"sha256": "deadbaaf",
"urls": ["example-direct.org/some_pkg-0.0.1-py3-none-any.whl"],
@@ -1049,7 +1050,6 @@
"config_load": "@pypi//:config.bzl",
"dep_template": "@pypi//{name}:{target}",
"filename": "some-other-pkg-0.0.1-py3-none-any.whl",
- "python_interpreter_target": "unit_test_interpreter_target",
"requirement": "some_other_pkg==0.0.1",
"sha256": "deadb33f",
"urls": ["example2.org/index/some_other_pkg/"],