blob: 6b2e0507acefb0b3e6989f65998ea4361be13e08 [file] [log] [blame]
"""A simple whl extractor."""
load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config")
load("//python/private:repo_utils.bzl", "repo_utils")
load(":whl_metadata.bzl", "find_whl_metadata")
def whl_extract(rctx, *, whl_path, logger):
"""Extract whls in Starlark.
Args:
rctx: the repository ctx.
whl_path: the whl path to extract.
logger: The logger to use
"""
install_dir_path = whl_path.dirname.get_child("site-packages")
repo_utils.extract(
rctx,
archive = whl_path,
output = install_dir_path,
supports_whl_extraction = rp_config.supports_whl_extraction,
)
metadata_file = find_whl_metadata(
install_dir = install_dir_path,
logger = logger,
)
# Get the <prefix>.dist_info dir name
dist_info_dir = metadata_file.dirname
rctx.file(
dist_info_dir.get_child("INSTALLER"),
"https://github.com/bazel-contrib/rules_python#pipstar",
)
repo_root_dir = whl_path.dirname
# Get the <prefix>.dist_info dir name
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
if data_dir.exists:
for prefix, dest_prefix in {
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
# We are taking this from the legacy whl installer config
"data": "data",
"headers": "include",
# In theory there may be directory collisions here, so it would be best to
# merge the paths here. We are doing for quite a few levels deep. What is
# more, this code has to be reasonably efficient because some packages like
# to not put everything to the top level, but to indicate explicitly if
# something is in `platlib` or `purelib` (e.g. libclang wheel).
"platlib": "site-packages",
"purelib": "site-packages",
"scripts": "bin",
}.items():
src = data_dir.get_child(prefix)
if not src.exists:
# The prefix does not exist in the wheel, we can continue
continue
for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)):
logger.debug(lambda: "Renaming: {} -> {}".format(src, dest))
rctx.rename(src, dest)
# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
# shebang to be something else, for inspiration look at the hermetic
# toolchain wrappers
# Ensure that there is no data dir left
rctx.delete(data_dir)
def merge_trees(src, dest):
"""Merge src into the destination path.
This will attempt to merge-move src files to the destination directory if there are
existing files. Fails at directory depth is 10000 or if there are collisions.
Args:
src: {type}`path` a src path to rename.
dest: {type}`path` a dest path to rename to.
Returns:
A list of tuples for src and destination paths.
"""
ret = []
remaining = [(src, dest)]
collisions = []
for _ in range(10000):
if collisions or not remaining:
break
tmp = []
for (s, d) in remaining:
if not d.exists:
ret.append((s, d))
continue
if not s.is_dir or not d.is_dir:
collisions.append(s)
continue
for file_or_dir in s.readdir():
tmp.append((file_or_dir, d.get_child(file_or_dir.basename)))
remaining = tmp
if remaining:
fail("Exceeded maximum directory depth of 10000 during tree merge.")
if collisions:
fail("Detected collisions between {} and {}: {}".format(src, dest, collisions))
return ret