| """Code for constructing venvs.""" |
| |
| load("@bazel_skylib//lib:paths.bzl", "paths") |
| load( |
| ":common.bzl", |
| "is_file", |
| "relative_path", |
| "runfiles_root_path", |
| ) |
| load( |
| ":py_info.bzl", |
| "PyInfo", |
| "VenvSymlinkEntry", |
| "VenvSymlinkKind", |
| ) |
| |
| def create_venv_app_files(ctx, deps, venv_dir_map): |
| """Creates the tree of app-specific files for a venv for a binary. |
| |
| App specific files are the files that come from dependencies. |
| |
| Args: |
| ctx: {type}`ctx` current ctx. |
| deps: {type}`list[Target]` the targets whose venv information |
| to put into the returned venv files. |
| venv_dir_map: mapping of VenvSymlinkKind constants to the |
| venv path. This tells the directory name of |
| platform/configuration-dependent directories. The values are |
| paths within the current ctx's venv (e.g. `_foo.venv/bin`). |
| |
| Returns: |
| {type}`struct` with the following attributes: |
| * {type}`list[File]` `venv_files` additional files created for |
| the venv. |
| * {type}`dict[str, File]` `runfiles_symlinks` map intended for |
| the `runfiles.symlinks` argument. A map of main-repo |
| relative paths to File. |
| """ |
| |
| # maps venv-relative path to the runfiles path it should point to |
| entries = depset( |
| transitive = [ |
| dep[PyInfo].venv_symlinks |
| for dep in deps |
| if PyInfo in dep |
| ], |
| ).to_list() |
| |
| link_map = build_link_map(ctx, entries) |
| venv_files = [] |
| runfiles_symlinks = {} |
| |
| for kind, kind_map in link_map.items(): |
| base = venv_dir_map[kind] |
| for venv_path, link_to in kind_map.items(): |
| bin_venv_path = paths.join(base, venv_path) |
| if is_file(link_to): |
| symlink_from = "{}/{}".format(ctx.label.package, bin_venv_path) |
| runfiles_symlinks[symlink_from] = link_to |
| else: |
| venv_link = ctx.actions.declare_symlink(bin_venv_path) |
| venv_link_rf_path = runfiles_root_path(ctx, venv_link.short_path) |
| rel_path = relative_path( |
| # dirname is necessary because a relative symlink is relative to |
| # the directory the symlink resides within. |
| from_ = paths.dirname(venv_link_rf_path), |
| to = link_to, |
| ) |
| ctx.actions.symlink(output = venv_link, target_path = rel_path) |
| venv_files.append(venv_link) |
| |
| return struct( |
| venv_files = venv_files, |
| runfiles_symlinks = runfiles_symlinks, |
| ) |
| |
| # Visible for testing |
| def build_link_map(ctx, entries, return_conflicts = False): |
| """Compute the mapping of venv paths to their backing objects. |
| |
| Args: |
| ctx: {type}`ctx` current ctx. |
| entries: {type}`list[VenvSymlinkEntry]` the entries that describe the |
| venv-relative |
| return_conflicts: {type}`bool`. Only present for testing. If True, |
| also return a list of the groups that had overlapping paths and had |
| to be resolved and merged. |
| |
| Returns: |
| {type}`dict[str, dict[str, str|File]]` Mappings of venv paths to their |
| backing files. The first key is a `VenvSymlinkKind` value. |
| The inner dict keys are venv paths relative to the kind's directory. The |
| inner dict values are strings or Files to link to. |
| """ |
| |
| version_by_pkg = {} # dict[str pkg, str version] |
| entries_by_kind = {} # dict[str kind, list[entry]] |
| |
| # Group by path kind and reduce to a single package's version of entries |
| for entry in entries: |
| entries_by_kind.setdefault(entry.kind, []) |
| if not entry.package: |
| entries_by_kind[entry.kind].append(entry) |
| continue |
| if entry.package not in version_by_pkg: |
| version_by_pkg[entry.package] = entry.version |
| entries_by_kind[entry.kind].append(entry) |
| continue |
| if entry.version == version_by_pkg[entry.package]: |
| entries_by_kind[entry.kind].append(entry) |
| continue |
| |
| # else: ignore it; not the selected version |
| |
| # final paths to keep, grouped by kind |
| keep_link_map = {} # dict[str kind, dict[path, str|File]] |
| conflicts = [] if return_conflicts else None |
| for kind, entries in entries_by_kind.items(): |
| # dict[str kind-relative path, str|File link_to] |
| keep_kind_link_map = {} |
| |
| groups = _group_venv_path_entries(entries) |
| |
| for group in groups: |
| # If there's just one group, we can symlink to the directory |
| if len(group) == 1: |
| entry = group[0] |
| if entry.link_to_file: |
| keep_kind_link_map[entry.venv_path] = entry.link_to_file |
| else: |
| keep_kind_link_map[entry.venv_path] = entry.link_to_path |
| else: |
| if return_conflicts: |
| conflicts.append(group) |
| |
| # Merge a group of overlapping prefixes |
| _merge_venv_path_group(ctx, group, keep_kind_link_map) |
| |
| keep_link_map[kind] = keep_kind_link_map |
| if return_conflicts: |
| return keep_link_map, conflicts |
| else: |
| return keep_link_map |
| |
| def _group_venv_path_entries(entries): |
| """Group entries by VenvSymlinkEntry.venv_path overlap. |
| |
| This does an initial grouping by the top-level venv path an entry wants. |
| Entries that are underneath another entry are put into the same group. |
| |
| Returns: |
| {type}`list[list[VenvSymlinkEntry]]` The inner list is the entries under |
| a common venv path. The inner list is ordered from shortest to longest |
| path. |
| """ |
| |
| # Sort so order is top-down, ensuring grouping by short common prefix |
| # Split it into path components so `foo foo-bar foo/bar` sorts as |
| # `foo foo/bar foo-bar` |
| entries = sorted(entries, key = lambda e: tuple(e.venv_path.split("/"))) |
| |
| groups = [] |
| current_group = None |
| current_group_prefix = None |
| for entry in entries: |
| prefix = entry.venv_path |
| anchored_prefix = prefix + "/" |
| if (current_group_prefix == None or |
| not anchored_prefix.startswith(current_group_prefix)): |
| current_group_prefix = anchored_prefix |
| current_group = [entry] |
| groups.append(current_group) |
| else: |
| current_group.append(entry) |
| |
| return groups |
| |
| def _merge_venv_path_group(ctx, group, keep_map): |
| """Merges a group of overlapping prefixes. |
| |
| Args: |
| ctx: {type}`ctx` current ctx. |
| group: {type}`list[VenvSymlinkEntry]` a group of entries with overlapping |
| `venv_path` prefixes, ordered from shortest to longest path. |
| keep_map: {type}`dict[str, str|File]` files kept after merging are |
| populated into this map. |
| """ |
| |
| # TODO: Compute the minimum number of entries to create. This can't avoid |
| # flattening the files depset, but can lower the number of materialized |
| # files significantly. Usually overlaps are limited to a small number |
| # of directories. Note that, when doing so, shared libraries need to |
| # be symlinked directly, not the directory containing them, due to |
| # dynamic linker symlink resolution semantics on Linux. |
| for entry in group: |
| prefix = entry.venv_path |
| for file in entry.files.to_list(): |
| # Compute the file-specific venv path. i.e. the relative |
| # path of the file under entry.venv_path, joined with |
| # entry.venv_path |
| rf_root_path = runfiles_root_path(ctx, file.short_path) |
| if not rf_root_path.startswith(entry.link_to_path): |
| # This generally shouldn't occur in practice, but just |
| # in case, skip them, for lack of a better option. |
| continue |
| venv_path = "{}/{}".format( |
| prefix, |
| rf_root_path.removeprefix(entry.link_to_path + "/"), |
| ) |
| |
| # For lack of a better option, first added wins. We happen to |
| # go in top-down prefix order, so the highest level namespace |
| # package typically wins. |
| if venv_path not in keep_map: |
| keep_map[venv_path] = file |
| |
| def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): |
| """Compute the VenvSymlinkEntry objects for a library. |
| |
| Args: |
| ctx: {type}`ctx` the current ctx. |
| files: {type}`list[File]` the underlying files that are under |
| `site_packages_root` and intended to be part of the venv |
| contents. |
| package: {type}`str` the Python distribution name. |
| version_str: {type}`str` the distribution's version. |
| site_packages_root: {type}`str` prefix under which files are |
| considered to be part of the installed files. |
| |
| Returns: |
| {type}`list[VenvSymlinkEntry]` the entries that describe how |
| to map the files into a venv. |
| """ |
| if site_packages_root.endswith("/"): |
| fail("The `site_packages_root` value cannot end in " + |
| "slash, got {}".format(site_packages_root)) |
| if site_packages_root.startswith("/"): |
| fail("The `site_packages_root` cannot start with " + |
| "slash, got {}".format(site_packages_root)) |
| |
| # Append slash to prevent incorrect prefix-string matches |
| site_packages_root += "/" |
| |
| all_files = sorted(files, key = lambda f: f.short_path) |
| |
| # venv paths that cannot be directly linked. Dict acting as set. |
| cannot_be_linked_directly = {} |
| |
| # dict[str path, VenvSymlinkEntry] |
| # Where path is the venv path (i.e. relative to site_packages_prefix) |
| venv_symlinks = {} |
| |
| # List of (File, str venv_path) tuples |
| files_left_to_link = [] |
| |
| # We want to minimize the number of files symlinked. Ideally, only the |
| # top-level directories are symlinked. Unfortunately, shared libraries |
| # complicate matters: if a shared library's directory is linked, then the |
| # dynamic linker computes the wrong search path. |
| # |
| # To fix, we have to directly link shared libraries. This then means that |
| # all the parent directories of the shared library can't be linked |
| # directly. |
| for src in all_files: |
| rf_root_path = runfiles_root_path(ctx, src.short_path) |
| _, _, repo_rel_path = rf_root_path.partition("/") |
| head, found_sp_root, venv_path = repo_rel_path.partition(site_packages_root) |
| if head or not found_sp_root: |
| # If head is set, then the path didn't start with site_packages_root |
| # if found_sp_root is empty, then it means it wasn't found at all. |
| continue |
| |
| filename = paths.basename(venv_path) |
| if _is_linker_loaded_library(filename): |
| venv_symlinks[venv_path] = VenvSymlinkEntry( |
| kind = VenvSymlinkKind.LIB, |
| link_to_path = rf_root_path, |
| link_to_file = src, |
| package = package, |
| version = version_str, |
| files = depset([src]), |
| venv_path = venv_path, |
| ) |
| parent = paths.dirname(venv_path) |
| for _ in range(len(venv_path) + 1): # Iterate enough times to traverse up |
| if not parent: |
| break |
| if cannot_be_linked_directly.get(parent, False): |
| # Already seen |
| break |
| cannot_be_linked_directly[parent] = True |
| parent = paths.dirname(parent) |
| else: |
| files_left_to_link.append((src, venv_path)) |
| |
| # At this point, venv_symlinks has entries for the shared libraries |
| # and cannot_be_linked_directly has the directories that cannot be |
| # directly linked. Next, we loop over the remaining files and group |
| # them into the highest level directory that can be linked. |
| |
| # dict[str venv_path, list[File]] |
| optimized_groups = {} |
| |
| for src, venv_path in files_left_to_link: |
| parent = paths.dirname(venv_path) |
| if not parent: |
| # File in root, must be linked directly |
| optimized_groups.setdefault(venv_path, []) |
| optimized_groups[venv_path].append(src) |
| continue |
| |
| if parent in cannot_be_linked_directly: |
| # File in a directory that cannot be directly linked, |
| # so link the file directly |
| optimized_groups.setdefault(venv_path, []) |
| optimized_groups[venv_path].append(src) |
| else: |
| # This path can be grouped. Find the highest-level directory to link. |
| venv_path = parent |
| next_parent = paths.dirname(parent) |
| for _ in range(len(venv_path) + 1): # Iterate enough times |
| if next_parent: |
| if next_parent not in cannot_be_linked_directly: |
| venv_path = next_parent |
| next_parent = paths.dirname(next_parent) |
| else: |
| break |
| else: |
| break |
| |
| optimized_groups.setdefault(venv_path, []) |
| optimized_groups[venv_path].append(src) |
| |
| # Finally, for each group, we create the VenvSymlinkEntry objects |
| for venv_path, files in optimized_groups.items(): |
| link_to_path = ( |
| _get_label_runfiles_repo(ctx, files[0].owner) + |
| "/" + |
| site_packages_root + |
| venv_path |
| ) |
| venv_symlinks[venv_path] = VenvSymlinkEntry( |
| kind = VenvSymlinkKind.LIB, |
| link_to_path = link_to_path, |
| link_to_file = None, |
| package = package, |
| version = version_str, |
| venv_path = venv_path, |
| files = depset(files), |
| ) |
| |
| return venv_symlinks.values() |
| |
| def _is_linker_loaded_library(filename): |
| """Tells if a filename is one that `dlopen()` or the runtime linker handles. |
| |
| This should return true for regular C libraries, but false for Python |
| C extension modules. |
| |
| Python extensions: .so (linux, mac), .pyd (windows) |
| |
| C libraries: lib*.so (linux), lib*.so.* (linux), lib*.dylib (mac), .dll (windows) |
| """ |
| if filename.endswith(".dll"): |
| return True |
| if filename.startswith("lib") and ( |
| filename.endswith((".so", ".dylib")) or ".so." in filename |
| ): |
| return True |
| return False |
| |
| def _get_label_runfiles_repo(ctx, label): |
| repo = label.repo_name |
| if repo: |
| return repo |
| else: |
| # For files, empty repo means the main repo |
| return ctx.workspace_name |