| """A simple whl extractor.""" |
| |
| load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config") |
| load("//python/private:repo_utils.bzl", "repo_utils") |
| load(":whl_metadata.bzl", "find_whl_metadata") |
| |
| def whl_extract(rctx, *, whl_path, logger): |
| """Extract whls in Starlark. |
| |
| Args: |
| rctx: the repository ctx. |
| whl_path: the whl path to extract. |
| logger: The logger to use |
| """ |
| install_dir_path = whl_path.dirname.get_child("site-packages") |
| repo_utils.extract( |
| rctx, |
| archive = whl_path, |
| output = install_dir_path, |
| supports_whl_extraction = rp_config.supports_whl_extraction, |
| ) |
| metadata_file = find_whl_metadata( |
| install_dir = install_dir_path, |
| logger = logger, |
| ) |
| |
| # Get the <prefix>.dist_info dir name |
| dist_info_dir = metadata_file.dirname |
| rctx.file( |
| dist_info_dir.get_child("INSTALLER"), |
| "https://github.com/bazel-contrib/rules_python#pipstar", |
| ) |
| repo_root_dir = whl_path.dirname |
| |
| # Get the <prefix>.dist_info dir name |
| data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data") |
| if data_dir.exists: |
| for prefix, dest_prefix in { |
| # https://docs.python.org/3/library/sysconfig.html#posix-prefix |
| # We are taking this from the legacy whl installer config |
| "data": "data", |
| "headers": "include", |
| # In theory there may be directory collisions here, so it would be best to |
| # merge the paths here. We are doing for quite a few levels deep. What is |
| # more, this code has to be reasonably efficient because some packages like |
| # to not put everything to the top level, but to indicate explicitly if |
| # something is in `platlib` or `purelib` (e.g. libclang wheel). |
| "platlib": "site-packages", |
| "purelib": "site-packages", |
| "scripts": "bin", |
| }.items(): |
| src = data_dir.get_child(prefix) |
| if not src.exists: |
| # The prefix does not exist in the wheel, we can continue |
| continue |
| |
| for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)): |
| logger.debug(lambda: "Renaming: {} -> {}".format(src, dest)) |
| rctx.rename(src, dest) |
| |
| # TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python |
| # shebang to be something else, for inspiration look at the hermetic |
| # toolchain wrappers |
| |
| # Ensure that there is no data dir left |
| rctx.delete(data_dir) |
| |
| def merge_trees(src, dest): |
| """Merge src into the destination path. |
| |
| This will attempt to merge-move src files to the destination directory if there are |
| existing files. Fails at directory depth is 10000 or if there are collisions. |
| |
| Args: |
| src: {type}`path` a src path to rename. |
| dest: {type}`path` a dest path to rename to. |
| |
| Returns: |
| A list of tuples for src and destination paths. |
| """ |
| ret = [] |
| remaining = [(src, dest)] |
| collisions = [] |
| for _ in range(10000): |
| if collisions or not remaining: |
| break |
| |
| tmp = [] |
| for (s, d) in remaining: |
| if not d.exists: |
| ret.append((s, d)) |
| continue |
| |
| if not s.is_dir or not d.is_dir: |
| collisions.append(s) |
| continue |
| |
| for file_or_dir in s.readdir(): |
| tmp.append((file_or_dir, d.get_child(file_or_dir.basename))) |
| |
| remaining = tmp |
| |
| if remaining: |
| fail("Exceeded maximum directory depth of 10000 during tree merge.") |
| |
| if collisions: |
| fail("Detected collisions between {} and {}: {}".format(src, dest, collisions)) |
| |
| return ret |