fix: venv site packages with pkgutil packages (#3268)
Currently, an error occurs if one packages files are intended to go into
a
sub-directory of another package's directory. This can happen when
pkgutil-style
namespace packages are used, which results in multiple distributions
wanting
to install the same files (pkgutil `__init__.py` files) into the same
top-level
directories. This eventually results in a Bazel error because Bazel
detects that
the one output is the prefix of another.
To fix, detect when distributions overlap in their paths and merge their
files
manually. Internally, entries are sorted from shorted venv path to
longest,
however, that's just an implementation detail.
Along the way, give agents better advice for bzl_library targets.
Fixes https://github.com/bazel-contrib/rules_python/issues/3204
---------
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
diff --git a/AGENTS.md b/AGENTS.md
index e21b15b..3d5219c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -21,10 +21,16 @@
### bzl_library targets for bzl source files
-* `.bzl` files should have `bzl_library` defined for them.
+* A `bzl_library` target should be defined for every `.bzl` file outside
+ of the `tests/` directory.
* They should have a single `srcs` file and be named after the file with `_bzl`
appended.
-* Their deps should be based on the `load()` statements in the source file.
+* Their deps should be based on the `load()` statements in the source file
+ and refer to the `bzl_library` target containing the loaded file.
+ * For files in rules_python: replace `.bzl` with `_bzl`.
+ e.g. given `load("//foo:bar.bzl", ...)`, the target is `//foo:bar_bzl`.
+ * For files outside rules_python: remove the `.bzl` suffix. e.g. given
+ `load("@foo//foo:bar.bzl", ...)`, the target is `@foo//foo:bar`.
* `bzl_library()` targets should be kept in alphabetical order by name.
Example:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index abd89e5..382096f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -86,6 +86,9 @@
{attr}`pip.defaults.whl_platform_tags` attribute to configure that. If
`musllinux_*_x86_64` is specified, we will chose the lowest available
wheel version. Fixes [#3250](https://github.com/bazel-contrib/rules_python/issues/3250).
+* (venvs) {obj}`--vens_site_packages=yes` no longer errors when packages with
+ overlapping files or directories are used together.
+ ([#3204](https://github.com/bazel-contrib/rules_python/issues/3204)).
{#v0-0-0-added}
### Added
diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel
index 916c14f..5e2043c 100644
--- a/python/private/BUILD.bazel
+++ b/python/private/BUILD.bazel
@@ -419,6 +419,7 @@
":rules_cc_srcs_bzl",
":toolchain_types_bzl",
":transition_labels_bzl",
+ ":venv_runfiles_bzl",
"@bazel_skylib//lib:dicts",
"@bazel_skylib//lib:paths",
"@bazel_skylib//lib:structs",
@@ -468,6 +469,7 @@
":py_internal_bzl",
":rule_builders_bzl",
":toolchain_types_bzl",
+ ":venv_runfiles_bzl",
":version_bzl",
"@bazel_skylib//lib:dicts",
"@bazel_skylib//rules:common_settings",
@@ -727,6 +729,16 @@
],
)
+bzl_library(
+ name = "venv_runfiles_bzl",
+ srcs = ["venv_runfiles.bzl"],
+ deps = [
+ ":common_bzl",
+ ":py_info.bzl",
+ "@bazel_skylib//lib:paths",
+ ],
+)
+
# Needed to define bzl_library targets for docgen. (We don't define the
# bzl_library target here because it'd give our users a transitive dependency
# on Skylib.)
diff --git a/python/private/common.bzl b/python/private/common.bzl
index 9fc3668..33b175c 100644
--- a/python/private/common.bzl
+++ b/python/private/common.bzl
@@ -495,6 +495,9 @@
def is_bool(v):
return type(v) == _BOOL_TYPE
+def is_file(v):
+ return type(v) == "File"
+
def target_platform_has_any_constraint(ctx, constraints):
"""Check if target platform has any of a list of constraints.
@@ -511,6 +514,37 @@
return True
return False
+def relative_path(from_, to):
+ """Compute a relative path from one path to another.
+
+ Args:
+ from_: {type}`str` the starting directory. Note that it should be
+ a directory because relative-symlinks are relative to the
+ directory the symlink resides in.
+ to: {type}`str` the path that `from_` wants to point to
+
+ Returns:
+ {type}`str` a relative path
+ """
+ from_parts = from_.split("/")
+ to_parts = to.split("/")
+
+ # Strip common leading parts from both paths
+ n = min(len(from_parts), len(to_parts))
+ for _ in range(n):
+ if from_parts[0] == to_parts[0]:
+ from_parts.pop(0)
+ to_parts.pop(0)
+ else:
+ break
+
+ # Impossible to compute a relative path without knowing what ".." is
+ if from_parts and from_parts[0] == "..":
+ fail("cannot compute relative path from '%s' to '%s'", from_, to)
+
+ parts = ([".."] * len(from_parts)) + to_parts
+ return paths.join(*parts)
+
def runfiles_root_path(ctx, short_path):
"""Compute a runfiles-root relative path from `File.short_path`
diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl
index 59800da..bef5934 100644
--- a/python/private/py_executable.bzl
+++ b/python/private/py_executable.bzl
@@ -48,6 +48,7 @@
"filter_to_py_srcs",
"get_imports",
"is_bool",
+ "relative_path",
"runfiles_root_path",
"target_platform_has_any_constraint",
)
@@ -63,6 +64,7 @@
load(":rule_builders.bzl", "ruleb")
load(":toolchain_types.bzl", "EXEC_TOOLS_TOOLCHAIN_TYPE", "TARGET_TOOLCHAIN_TYPE", TOOLCHAIN_TYPE = "TARGET_TOOLCHAIN_TYPE")
load(":transition_labels.bzl", "TRANSITION_LABELS")
+load(":venv_runfiles.bzl", "create_venv_app_files")
_py_builtins = py_internal
_EXTERNAL_PATH_PREFIX = "external"
@@ -499,37 +501,6 @@
)
return output
-def relative_path(from_, to):
- """Compute a relative path from one path to another.
-
- Args:
- from_: {type}`str` the starting directory. Note that it should be
- a directory because relative-symlinks are relative to the
- directory the symlink resides in.
- to: {type}`str` the path that `from_` wants to point to
-
- Returns:
- {type}`str` a relative path
- """
- from_parts = from_.split("/")
- to_parts = to.split("/")
-
- # Strip common leading parts from both paths
- n = min(len(from_parts), len(to_parts))
- for _ in range(n):
- if from_parts[0] == to_parts[0]:
- from_parts.pop(0)
- to_parts.pop(0)
- else:
- break
-
- # Impossible to compute a relative path without knowing what ".." is
- if from_parts and from_parts[0] == "..":
- fail("cannot compute relative path from '%s' to '%s'", from_, to)
-
- parts = ([".."] * len(from_parts)) + to_parts
- return paths.join(*parts)
-
# Create a venv the executable can use.
# For venv details and the venv startup process, see:
# * https://docs.python.org/3/library/venv.html
@@ -636,9 +607,9 @@
VenvSymlinkKind.BIN: bin_dir,
VenvSymlinkKind.LIB: site_packages,
}
- venv_symlinks = _create_venv_symlinks(ctx, venv_dir_map)
+ venv_app_files = create_venv_app_files(ctx, ctx.attr.deps, venv_dir_map)
- files_without_interpreter = [pth, site_init] + venv_symlinks
+ files_without_interpreter = [pth, site_init] + venv_app_files
if pyvenv_cfg:
files_without_interpreter.append(pyvenv_cfg)
@@ -663,94 +634,6 @@
),
)
-def _create_venv_symlinks(ctx, venv_dir_map):
- """Creates symlinks within the venv.
-
- Args:
- ctx: current rule ctx
- venv_dir_map: mapping of VenvSymlinkKind constants to the
- venv path.
-
- Returns:
- {type}`list[File]` list of the File symlink objects created.
- """
-
- # maps venv-relative path to the runfiles path it should point to
- entries = depset(
- transitive = [
- dep[PyInfo].venv_symlinks
- for dep in ctx.attr.deps
- if PyInfo in dep
- ],
- ).to_list()
-
- link_map = _build_link_map(entries)
- venv_files = []
- for kind, kind_map in link_map.items():
- base = venv_dir_map[kind]
- for venv_path, link_to in kind_map.items():
- venv_link = ctx.actions.declare_symlink(paths.join(base, venv_path))
- venv_link_rf_path = runfiles_root_path(ctx, venv_link.short_path)
- rel_path = relative_path(
- # dirname is necessary because a relative symlink is relative to
- # the directory the symlink resides within.
- from_ = paths.dirname(venv_link_rf_path),
- to = link_to,
- )
- ctx.actions.symlink(output = venv_link, target_path = rel_path)
- venv_files.append(venv_link)
-
- return venv_files
-
-def _build_link_map(entries):
- # dict[str package, dict[str kind, dict[str rel_path, str link_to_path]]]
- pkg_link_map = {}
-
- # dict[str package, str version]
- version_by_pkg = {}
-
- for entry in entries:
- link_map = pkg_link_map.setdefault(entry.package, {})
- kind_map = link_map.setdefault(entry.kind, {})
-
- if version_by_pkg.setdefault(entry.package, entry.version) != entry.version:
- # We ignore duplicates by design.
- continue
- elif entry.venv_path in kind_map:
- # We ignore duplicates by design.
- continue
- else:
- kind_map[entry.venv_path] = entry.link_to_path
-
- # An empty link_to value means to not create the site package symlink. Because of the
- # ordering, this allows binaries to remove entries by having an earlier dependency produce
- # empty link_to values.
- for link_map in pkg_link_map.values():
- for kind, kind_map in link_map.items():
- for dir_path, link_to in kind_map.items():
- if not link_to:
- kind_map.pop(dir_path)
-
- # dict[str kind, dict[str rel_path, str link_to_path]]
- keep_link_map = {}
-
- # Remove entries that would be a child path of a created symlink.
- # Earlier entries have precedence to match how exact matches are handled.
- for link_map in pkg_link_map.values():
- for kind, kind_map in link_map.items():
- keep_kind_map = keep_link_map.setdefault(kind, {})
- for _ in range(len(kind_map)):
- if not kind_map:
- break
- dirname, value = kind_map.popitem()
- keep_kind_map[dirname] = value
- prefix = dirname + "/" # Add slash to prevent /X matching /XY
- for maybe_suffix in kind_map.keys():
- maybe_suffix += "/" # Add slash to prevent /X matching /XY
- if maybe_suffix.startswith(prefix) or prefix.startswith(maybe_suffix):
- kind_map.pop(maybe_suffix)
- return keep_link_map
-
def _map_each_identity(v):
return v
diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl
index 31df5cf..f96dec5 100644
--- a/python/private/py_info.bzl
+++ b/python/private/py_info.bzl
@@ -56,6 +56,14 @@
An entry in `PyInfo.venv_symlinks`
""",
fields = {
+ "files": """
+:type: depset[File]
+
+Files under `link_to_path`.
+
+This is only used when multiple targets have overlapping `venv_path` paths. e.g.
+if one adds files to `venv_path=a/` and another adds files to `venv_path=a/b/`.
+""",
"kind": """
:type: str
diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl
index fc8e583..b2a9fdd 100644
--- a/python/private/py_library.bzl
+++ b/python/private/py_library.bzl
@@ -28,7 +28,6 @@
load(":builders.bzl", "builders")
load(
":common.bzl",
- "PYTHON_FILE_EXTENSIONS",
"collect_cc_info",
"collect_imports",
"collect_runfiles",
@@ -38,14 +37,13 @@
"create_py_info",
"filter_to_py_srcs",
"get_imports",
- "runfiles_root_path",
)
load(":common_labels.bzl", "labels")
load(":flags.bzl", "AddSrcsToRunfilesFlag", "PrecompileFlag", "VenvsSitePackages")
load(":normalize_name.bzl", "normalize_name")
load(":precompile.bzl", "maybe_precompile")
load(":py_cc_link_params_info.bzl", "PyCcLinkParamsInfo")
-load(":py_info.bzl", "PyInfo", "VenvSymlinkEntry", "VenvSymlinkKind")
+load(":py_info.bzl", "PyInfo")
load(":reexports.bzl", "BuiltinPyInfo")
load(":rule_builders.bzl", "ruleb")
load(
@@ -53,6 +51,7 @@
"EXEC_TOOLS_TOOLCHAIN_TYPE",
TOOLCHAIN_TYPE = "TARGET_TOOLCHAIN_TYPE",
)
+load(":venv_runfiles.bzl", "get_venv_symlinks")
load(":version.bzl", "version")
LIBRARY_ATTRS = dicts.add(
@@ -235,118 +234,28 @@
venv_symlinks = []
if VenvsSitePackages.is_enabled(ctx):
package, version_str = _get_package_and_version(ctx)
- venv_symlinks = _get_venv_symlinks(ctx, package, version_str)
+
+ # NOTE: Already a list, but buildifier thinks its a depset and
+ # adds to_list() calls later.
+ imports = list(ctx.attr.imports)
+ if len(imports) == 0:
+ fail("When venvs_site_packages is enabled, exactly one `imports` " +
+ "value must be specified, got 0")
+ elif len(imports) > 1:
+ fail("When venvs_site_packages is enabled, exactly one `imports` " +
+ "value must be specified, got {}".format(imports))
+
+ venv_symlinks = get_venv_symlinks(
+ ctx,
+ ctx.files.srcs + ctx.files.data + ctx.files.pyi_srcs,
+ package,
+ version_str,
+ site_packages_root = imports[0],
+ )
else:
imports = collect_imports(ctx, semantics)
return imports, venv_symlinks
-def _get_venv_symlinks(ctx, package, version_str):
- imports = ctx.attr.imports
- if len(imports) == 0:
- fail("When venvs_site_packages is enabled, exactly one `imports` " +
- "value must be specified, got 0")
- elif len(imports) > 1:
- fail("When venvs_site_packages is enabled, exactly one `imports` " +
- "value must be specified, got {}".format(imports))
- else:
- site_packages_root = imports[0]
-
- if site_packages_root.endswith("/"):
- fail("The site packages root value from `imports` cannot end in " +
- "slash, got {}".format(site_packages_root))
- if site_packages_root.startswith("/"):
- fail("The site packages root value from `imports` cannot start with " +
- "slash, got {}".format(site_packages_root))
-
- # Append slash to prevent incorrectly prefix-string matches
- site_packages_root += "/"
-
- # We have to build a list of (runfiles path, site-packages path) pairs of the files to
- # create in the consuming binary's venv site-packages directory. To minimize the number of
- # files to create, we just return the paths to the directories containing the code of
- # interest.
- #
- # However, namespace packages complicate matters: multiple distributions install in the
- # same directory in site-packages. This works out because they don't overlap in their
- # files. Typically, they install to different directories within the namespace package
- # directory. We also need to ensure that we can handle a case where the main package (e.g.
- # airflow) has directories only containing data files and then namespace packages coming
- # along and being next to it.
- #
- # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions)
- # is just a single Python file.
-
- dir_symlinks = {} # dirname -> runfile path
- venv_symlinks = []
- for src in ctx.files.srcs + ctx.files.data + ctx.files.pyi_srcs:
- path = _repo_relative_short_path(src.short_path)
- if not path.startswith(site_packages_root):
- continue
- path = path.removeprefix(site_packages_root)
- dir_name, _, filename = path.rpartition("/")
-
- if dir_name in dir_symlinks:
- # we already have this dir, this allows us to short-circuit since most of the
- # ctx.files.data might share the same directories as ctx.files.srcs
- continue
-
- runfiles_dir_name, _, _ = runfiles_root_path(ctx, src.short_path).partition("/")
- if dir_name:
- # This can be either:
- # * a directory with libs (e.g. numpy.libs, created by auditwheel)
- # * a directory with `__init__.py` file that potentially also needs to be
- # symlinked.
- # * `.dist-info` directory
- #
- # This could be also regular files, that just need to be symlinked, so we will
- # add the directory here.
- dir_symlinks[dir_name] = runfiles_dir_name
- elif src.extension in PYTHON_FILE_EXTENSIONS:
- # This would be files that do not have directories and we just need to add
- # direct symlinks to them as is, we only allow Python files in here
- entry = VenvSymlinkEntry(
- kind = VenvSymlinkKind.LIB,
- link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename),
- package = package,
- version = version_str,
- venv_path = filename,
- )
- venv_symlinks.append(entry)
-
- # Sort so that we encounter `foo` before `foo/bar`. This ensures we
- # see the top-most explicit package first.
- dirnames = sorted(dir_symlinks.keys())
- first_level_explicit_packages = []
- for d in dirnames:
- is_sub_package = False
- for existing in first_level_explicit_packages:
- # Suffix with / to prevent foo matching foobar
- if d.startswith(existing + "/"):
- is_sub_package = True
- break
- if not is_sub_package:
- first_level_explicit_packages.append(d)
-
- for dirname in first_level_explicit_packages:
- prefix = dir_symlinks[dirname]
- entry = VenvSymlinkEntry(
- kind = VenvSymlinkKind.LIB,
- link_to_path = paths.join(prefix, site_packages_root, dirname),
- package = package,
- version = version_str,
- venv_path = dirname,
- )
- venv_symlinks.append(entry)
-
- return venv_symlinks
-
-def _repo_relative_short_path(short_path):
- # Convert `../+pypi+foo/some/file.py` to `some/file.py`
- if short_path.startswith("../"):
- return short_path[3:].partition("/")[2]
- else:
- return short_path
-
_MaybeBuiltinPyInfo = [BuiltinPyInfo] if BuiltinPyInfo != None else []
# NOTE: Exported publicaly
diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl
new file mode 100644
index 0000000..291920b
--- /dev/null
+++ b/python/private/venv_runfiles.bzl
@@ -0,0 +1,318 @@
+"""Code for constructing venvs."""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load(
+ ":common.bzl",
+ "PYTHON_FILE_EXTENSIONS",
+ "is_file",
+ "relative_path",
+ "runfiles_root_path",
+)
+load(
+ ":py_info.bzl",
+ "PyInfo",
+ "VenvSymlinkEntry",
+ "VenvSymlinkKind",
+)
+
+def create_venv_app_files(ctx, deps, venv_dir_map):
+ """Creates the tree of app-specific files for a venv for a binary.
+
+ App specific files are the files that come from dependencies.
+
+ Args:
+ ctx: {type}`ctx` current ctx.
+ deps: {type}`list[Target]` the targets whose venv information
+ to put into the returned venv files.
+ venv_dir_map: mapping of VenvSymlinkKind constants to the
+ venv path. This tells the directory name of
+ platform/configuration-dependent directories. The values are
+ paths within the current ctx's venv (e.g. `_foo.venv/bin`).
+
+ Returns:
+ {type}`list[File]` of the files that were created.
+ """
+
+ # maps venv-relative path to the runfiles path it should point to
+ entries = depset(
+ transitive = [
+ dep[PyInfo].venv_symlinks
+ for dep in deps
+ if PyInfo in dep
+ ],
+ ).to_list()
+
+ link_map = build_link_map(ctx, entries)
+ venv_files = []
+ for kind, kind_map in link_map.items():
+ base = venv_dir_map[kind]
+ for venv_path, link_to in kind_map.items():
+ bin_venv_path = paths.join(base, venv_path)
+ if is_file(link_to):
+ if link_to.is_directory:
+ venv_link = ctx.actions.declare_directory(bin_venv_path)
+ else:
+ venv_link = ctx.actions.declare_file(bin_venv_path)
+ ctx.actions.symlink(output = venv_link, target_file = link_to)
+ else:
+ venv_link = ctx.actions.declare_symlink(bin_venv_path)
+ venv_link_rf_path = runfiles_root_path(ctx, venv_link.short_path)
+ rel_path = relative_path(
+ # dirname is necessary because a relative symlink is relative to
+ # the directory the symlink resides within.
+ from_ = paths.dirname(venv_link_rf_path),
+ to = link_to,
+ )
+ ctx.actions.symlink(output = venv_link, target_path = rel_path)
+ venv_files.append(venv_link)
+
+ return venv_files
+
+# Visible for testing
+def build_link_map(ctx, entries):
+ """Compute the mapping of venv paths to their backing objects.
+
+
+ Args:
+ ctx: {type}`ctx` current ctx.
+ entries: {type}`list[VenvSymlinkEntry]` the entries that describe the
+ venv-relative
+
+ Returns:
+ {type}`dict[str, dict[str, str|File]]` Mappings of venv paths to their
+ backing files. The first key is a `VenvSymlinkKind` value.
+ The inner dict keys are venv paths relative to the kind's diretory. The
+ inner dict values are strings or Files to link to.
+ """
+
+ version_by_pkg = {} # dict[str pkg, str version]
+ entries_by_kind = {} # dict[str kind, list[entry]]
+
+ # Group by path kind and reduce to a single package's version of entries
+ for entry in entries:
+ entries_by_kind.setdefault(entry.kind, [])
+ if not entry.package:
+ entries_by_kind[entry.kind].append(entry)
+ continue
+ if entry.package not in version_by_pkg:
+ version_by_pkg[entry.package] = entry.version
+ entries_by_kind[entry.kind].append(entry)
+ continue
+ if entry.version == version_by_pkg[entry.package]:
+ entries_by_kind[entry.kind].append(entry)
+ continue
+
+ # else: ignore it; not the selected version
+
+ # final paths to keep, grouped by kind
+ keep_link_map = {} # dict[str kind, dict[path, str|File]]
+ for kind, entries in entries_by_kind.items():
+ # dict[str kind-relative path, str|File link_to]
+ keep_kind_link_map = {}
+
+ groups = _group_venv_path_entries(entries)
+
+ for group in groups:
+ # If there's just one group, we can symlink to the directory
+ if len(group) == 1:
+ entry = group[0]
+ keep_kind_link_map[entry.venv_path] = entry.link_to_path
+ else:
+ # Merge a group of overlapping prefixes
+ _merge_venv_path_group(ctx, group, keep_kind_link_map)
+
+ keep_link_map[kind] = keep_kind_link_map
+
+ return keep_link_map
+
+def _group_venv_path_entries(entries):
+ """Group entries by VenvSymlinkEntry.venv_path overlap.
+
+ This does an initial grouping by the top-level venv path an entry wants.
+ Entries that are underneath another entry are put into the same group.
+
+ Returns:
+ {type}`list[list[VenvSymlinkEntry]]` The inner list is the entries under
+ a common venv path. The inner list is ordered from shortest to longest
+ path.
+ """
+
+ # Sort so order is top-down, ensuring grouping by short common prefix
+ entries = sorted(entries, key = lambda e: e.venv_path)
+
+ groups = []
+ current_group = None
+ current_group_prefix = None
+ for entry in entries:
+ prefix = entry.venv_path
+ anchored_prefix = prefix + "/"
+ if (current_group_prefix == None or
+ not anchored_prefix.startswith(current_group_prefix)):
+ current_group_prefix = anchored_prefix
+ current_group = [entry]
+ groups.append(current_group)
+ else:
+ current_group.append(entry)
+
+ return groups
+
+def _merge_venv_path_group(ctx, group, keep_map):
+ """Merges a group of overlapping prefixes.
+
+ Args:
+ ctx: {type}`ctx` current ctx.
+ group: {type}`list[VenvSymlinkEntry]` a group of entries with overlapping
+ `venv_path` prefixes, ordered from shortest to longest path.
+ keep_map: {type}`dict[str, str|File]` files kept after merging are
+ populated into this map.
+ """
+
+ # TODO: Compute the minimum number of entries to create. This can't avoid
+ # flattening the files depset, but can lower the number of materialized
+ # files significantly. Usually overlaps are limited to a small number
+ # of directories.
+ for entry in group:
+ prefix = entry.venv_path
+ for file in entry.files.to_list():
+ # Compute the file-specific venv path. i.e. the relative
+ # path of the file under entry.venv_path, joined with
+ # entry.venv_path
+ rf_root_path = runfiles_root_path(ctx, file.short_path)
+ if not rf_root_path.startswith(entry.link_to_path):
+ # This generally shouldn't occur in practice, but just
+ # in case, skip them, for lack of a better option.
+ continue
+ venv_path = "{}/{}".format(
+ prefix,
+ rf_root_path.removeprefix(entry.link_to_path + "/"),
+ )
+
+ # For lack of a better option, first added wins. We happen to
+ # go in top-down prefix order, so the highest level namespace
+ # package typically wins.
+ if venv_path not in keep_map:
+ keep_map[venv_path] = file
+
+def get_venv_symlinks(ctx, files, package, version_str, site_packages_root):
+ """Compute the VenvSymlinkEntry objects for a library.
+
+ Args:
+ ctx: {type}`ctx` the current ctx.
+ files: {type}`list[File]` the underlying files that are under
+ `site_packages_root` and intended to be part of the venv
+ contents.
+ package: {type}`str` the Python distribution name.
+ version_str: {type}`str` the distribution's version.
+ site_packages_root: {type}`str` prefix under which files are
+ considered to be part of the installed files.
+
+ Returns:
+ {type}`list[VenvSymlinkEntry]` the entries that describe how
+ to map the files into a venv.
+ """
+ if site_packages_root.endswith("/"):
+ fail("The `site_packages_root` value cannot end in " +
+ "slash, got {}".format(site_packages_root))
+ if site_packages_root.startswith("/"):
+ fail("The `site_packages_root` cannot start with " +
+ "slash, got {}".format(site_packages_root))
+
+ # Append slash to prevent incorrect prefix-string matches
+ site_packages_root += "/"
+
+ # We have to build a list of (runfiles path, site-packages path) pairs of the files to
+ # create in the consuming binary's venv site-packages directory. To minimize the number of
+ # files to create, we just return the paths to the directories containing the code of
+ # interest.
+ #
+ # However, namespace packages complicate matters: multiple distributions install in the
+ # same directory in site-packages. This works out because they don't overlap in their
+ # files. Typically, they install to different directories within the namespace package
+ # directory. We also need to ensure that we can handle a case where the main package (e.g.
+ # airflow) has directories only containing data files and then namespace packages coming
+ # along and being next to it.
+ #
+ # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions)
+ # is just a single Python file.
+
+ dir_symlinks = {} # dirname -> runfile path
+ venv_symlinks = []
+
+ # Sort so order is top-down
+ all_files = sorted(files, key = lambda f: f.short_path)
+
+ for src in all_files:
+ path = _repo_relative_short_path(src.short_path)
+ if not path.startswith(site_packages_root):
+ continue
+ path = path.removeprefix(site_packages_root)
+ dir_name, _, filename = path.rpartition("/")
+
+ if dir_name in dir_symlinks:
+ # we already have this dir, this allows us to short-circuit since most of the
+ # ctx.files.data might share the same directories as ctx.files.srcs
+ continue
+
+ runfiles_dir_name, _, _ = runfiles_root_path(ctx, src.short_path).partition("/")
+ if dir_name:
+ # This can be either:
+ # * a directory with libs (e.g. numpy.libs, created by auditwheel)
+ # * a directory with `__init__.py` file that potentially also needs to be
+ # symlinked.
+ # * `.dist-info` directory
+ #
+ # This could be also regular files, that just need to be symlinked, so we will
+ # add the directory here.
+ dir_symlinks[dir_name] = runfiles_dir_name
+ elif src.extension in PYTHON_FILE_EXTENSIONS:
+ # This would be files that do not have directories and we just need to add
+ # direct symlinks to them as is, we only allow Python files in here
+ entry = VenvSymlinkEntry(
+ kind = VenvSymlinkKind.LIB,
+ link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename),
+ package = package,
+ version = version_str,
+ venv_path = filename,
+ files = depset([src]),
+ )
+ venv_symlinks.append(entry)
+
+ # Sort so that we encounter `foo` before `foo/bar`. This ensures we
+ # see the top-most explicit package first.
+ dirnames = sorted(dir_symlinks.keys())
+ first_level_explicit_packages = []
+ for d in dirnames:
+ is_sub_package = False
+ for existing in first_level_explicit_packages:
+ # Suffix with / to prevent foo matching foobar
+ if d.startswith(existing + "/"):
+ is_sub_package = True
+ break
+ if not is_sub_package:
+ first_level_explicit_packages.append(d)
+
+ for dirname in first_level_explicit_packages:
+ prefix = dir_symlinks[dirname]
+ link_to_path = paths.join(prefix, site_packages_root, dirname)
+ entry = VenvSymlinkEntry(
+ kind = VenvSymlinkKind.LIB,
+ link_to_path = link_to_path,
+ package = package,
+ version = version_str,
+ venv_path = dirname,
+ files = depset([
+ f
+ for f in all_files
+ if runfiles_root_path(ctx, f.short_path).startswith(link_to_path + "/")
+ ]),
+ )
+ venv_symlinks.append(entry)
+
+ return venv_symlinks
+
+def _repo_relative_short_path(short_path):
+ # Convert `../+pypi+foo/some/file.py` to `some/file.py`
+ if short_path.startswith("../"):
+ return short_path[3:].partition("/")[2]
+ else:
+ return short_path
diff --git a/tests/bootstrap_impls/venv_relative_path_tests.bzl b/tests/bootstrap_impls/venv_relative_path_tests.bzl
index ad4870f..72b5012 100644
--- a/tests/bootstrap_impls/venv_relative_path_tests.bzl
+++ b/tests/bootstrap_impls/venv_relative_path_tests.bzl
@@ -15,7 +15,7 @@
"Unit tests for relative_path computation"
load("@rules_testing//lib:test_suite.bzl", "test_suite")
-load("//python/private:py_executable.bzl", "relative_path") # buildifier: disable=bzl-visibility
+load("//python/private:common.bzl", "relative_path") # buildifier: disable=bzl-visibility
_tests = []
diff --git a/tests/venv_site_packages_libs/BUILD.bazel b/tests/venv_site_packages_libs/BUILD.bazel
index e64299e..92d5dec 100644
--- a/tests/venv_site_packages_libs/BUILD.bazel
+++ b/tests/venv_site_packages_libs/BUILD.bazel
@@ -26,6 +26,8 @@
":closer_lib",
"//tests/venv_site_packages_libs/nspkg_alpha",
"//tests/venv_site_packages_libs/nspkg_beta",
+ "//tests/venv_site_packages_libs/pkgutil_top",
+ "//tests/venv_site_packages_libs/pkgutil_top_sub",
"@other//nspkg_delta",
"@other//nspkg_gamma",
"@other//nspkg_single",
diff --git a/tests/venv_site_packages_libs/app_files_building/BUILD.bazel b/tests/venv_site_packages_libs/app_files_building/BUILD.bazel
new file mode 100644
index 0000000..60afd34
--- /dev/null
+++ b/tests/venv_site_packages_libs/app_files_building/BUILD.bazel
@@ -0,0 +1,5 @@
+load(":app_files_building_tests.bzl", "app_files_building_test_suite")
+
+app_files_building_test_suite(
+ name = "app_files_building_tests",
+)
diff --git a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl
new file mode 100644
index 0000000..0a0265e
--- /dev/null
+++ b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl
@@ -0,0 +1,247 @@
+""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("@rules_testing//lib:analysis_test.bzl", "analysis_test")
+load("@rules_testing//lib:test_suite.bzl", "test_suite")
+load("//python/private:py_info.bzl", "VenvSymlinkEntry", "VenvSymlinkKind") # buildifier: disable=bzl-visibility
+load("//python/private:venv_runfiles.bzl", "build_link_map") # buildifier: disable=bzl-visibility
+
+_tests = []
+
+def _ctx(workspace_name = "_main"):
+ return struct(
+ workspace_name = workspace_name,
+ )
+
+def _file(short_path):
+ return struct(
+ short_path = short_path,
+ )
+
+def _entry(venv_path, link_to_path, files = [], **kwargs):
+ kwargs.setdefault("kind", VenvSymlinkKind.LIB)
+ kwargs.setdefault("package", None)
+ kwargs.setdefault("version", None)
+
+ def short_pathify(path):
+ path = paths.join(link_to_path, path)
+
+ # In tests, `../` is used to step out of the link_to_path scope.
+ path = paths.normalize(path)
+
+ # Treat paths starting with "+" as external references. This matches
+ # how bzlmod names things.
+ if link_to_path.startswith("+"):
+ # File.short_path to external repos have `../` prefixed
+ path = paths.join("../", path)
+ else:
+ # File.short_path in main repo is main-repo relative
+ _, _, path = path.partition("/")
+ return path
+
+ return VenvSymlinkEntry(
+ venv_path = venv_path,
+ link_to_path = link_to_path,
+ files = depset([
+ _file(short_pathify(f))
+ for f in files
+ ]),
+ **kwargs
+ )
+
+def _test_conflict_merging(name):
+ analysis_test(
+ name = name,
+ impl = _test_conflict_merging_impl,
+ target = "//python:none",
+ )
+
+_tests.append(_test_conflict_merging)
+
+def _test_conflict_merging_impl(env, _):
+ entries = [
+ _entry("a", "+pypi_a/site-packages/a", ["a.txt"]),
+ _entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]),
+ _entry("x", "_main/src/x", ["x.txt"]),
+ _entry("x/p", "_main/src-dev/x/p", ["p.txt"]),
+ _entry("duplicate", "+dupe_a/site-packages/duplicate", ["d.py"]),
+ # This entry also provides a/x.py, but since the "a" entry is shorter
+ # and comes first, its version of x.py should win.
+ _entry("duplicate", "+dupe_b/site-packages/duplicate", ["d.py"]),
+ ]
+
+ actual = build_link_map(_ctx(), entries)
+ expected_libs = {
+ "a/a.txt": _file("../+pypi_a/site-packages/a/a.txt"),
+ "a/b/b.txt": _file("../+pypi_a_b/site-packages/a/b/b.txt"),
+ "duplicate/d.py": _file("../+dupe_a/site-packages/duplicate/d.py"),
+ "x/p/p.txt": _file("src-dev/x/p/p.txt"),
+ "x/x.txt": _file("src/x/x.txt"),
+ }
+ env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs)
+ env.expect.that_dict(actual).keys().contains_exactly([VenvSymlinkKind.LIB])
+
+def _test_package_version_filtering(name):
+ analysis_test(
+ name = name,
+ impl = _test_package_version_filtering_impl,
+ target = "//python:none",
+ )
+
+_tests.append(_test_package_version_filtering)
+
+def _test_package_version_filtering_impl(env, _):
+ entries = [
+ _entry("foo", "+pypi_v1/site-packages/foo", ["foo.txt"], package = "foo", version = "1.0"),
+ _entry("foo", "+pypi_v2/site-packages/foo", ["bar.txt"], package = "foo", version = "2.0"),
+ ]
+
+ actual = build_link_map(_ctx(), entries)
+
+ expected_libs = {
+ "foo": "+pypi_v1/site-packages/foo",
+ }
+ env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs)
+
+def _test_malformed_entry(name):
+ analysis_test(
+ name = name,
+ impl = _test_malformed_entry_impl,
+ target = "//python:none",
+ )
+
+_tests.append(_test_malformed_entry)
+
+def _test_malformed_entry_impl(env, _):
+ entries = [
+ _entry(
+ "a",
+ "+pypi_a/site-packages/a",
+ # This file is outside the link_to_path, so it should be ignored.
+ ["../outside.txt"],
+ ),
+ # A second, conflicting, entry is added to force merging of the known
+ # files. Without this, there's no conflict, so files is never
+ # considered.
+ _entry(
+ "a",
+ "+pypi_b/site-packages/a",
+ ["../outside.txt"],
+ ),
+ ]
+
+ actual = build_link_map(_ctx(), entries)
+ env.expect.that_dict(actual).contains_exactly({
+ VenvSymlinkKind.LIB: {},
+ })
+
+def _test_complex_namespace_packages(name):
+ analysis_test(
+ name = name,
+ impl = _test_complex_namespace_packages_impl,
+ target = "//python:none",
+ )
+
+_tests.append(_test_complex_namespace_packages)
+
+def _test_complex_namespace_packages_impl(env, _):
+ entries = [
+ _entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]),
+ _entry("a/c", "+pypi_a_c/site-packages/a/c", ["c.txt"]),
+ _entry("x/y/z", "+pypi_x_y_z/site-packages/x/y/z", ["z.txt"]),
+ _entry("foo", "+pypi_foo/site-packages/foo", ["foo.txt"]),
+ _entry("foobar", "+pypi_foobar/site-packages/foobar", ["foobar.txt"]),
+ ]
+
+ actual = build_link_map(_ctx(), entries)
+ expected_libs = {
+ "a/b": "+pypi_a_b/site-packages/a/b",
+ "a/c": "+pypi_a_c/site-packages/a/c",
+ "foo": "+pypi_foo/site-packages/foo",
+ "foobar": "+pypi_foobar/site-packages/foobar",
+ "x/y/z": "+pypi_x_y_z/site-packages/x/y/z",
+ }
+ env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs)
+
+def _test_empty_and_trivial_inputs(name):
+ analysis_test(
+ name = name,
+ impl = _test_empty_and_trivial_inputs_impl,
+ target = "//python:none",
+ )
+
+_tests.append(_test_empty_and_trivial_inputs)
+
+def _test_empty_and_trivial_inputs_impl(env, _):
+ # Test with empty list of entries
+ actual = build_link_map(_ctx(), [])
+ env.expect.that_dict(actual).contains_exactly({})
+
+ # Test with an entry with no files
+ entries = [_entry("a", "+pypi_a/site-packages/a", [])]
+ actual = build_link_map(_ctx(), entries)
+ env.expect.that_dict(actual).contains_exactly({
+ VenvSymlinkKind.LIB: {"a": "+pypi_a/site-packages/a"},
+ })
+
+def _test_multiple_venv_symlink_kinds(name):
+ analysis_test(
+ name = name,
+ impl = _test_multiple_venv_symlink_kinds_impl,
+ target = "//python:none",
+ )
+
+_tests.append(_test_multiple_venv_symlink_kinds)
+
+def _test_multiple_venv_symlink_kinds_impl(env, _):
+ entries = [
+ _entry(
+ "libfile",
+ "+pypi_lib/site-packages/libfile",
+ ["lib.txt"],
+ kind =
+ VenvSymlinkKind.LIB,
+ ),
+ _entry(
+ "binfile",
+ "+pypi_bin/bin/binfile",
+ ["bin.txt"],
+ kind = VenvSymlinkKind.BIN,
+ ),
+ _entry(
+ "includefile",
+ "+pypi_include/include/includefile",
+ ["include.h"],
+ kind =
+ VenvSymlinkKind.INCLUDE,
+ ),
+ ]
+
+ actual = build_link_map(_ctx(), entries)
+
+ expected_libs = {
+ "libfile": "+pypi_lib/site-packages/libfile",
+ }
+ env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs)
+
+ expected_bins = {
+ "binfile": "+pypi_bin/bin/binfile",
+ }
+ env.expect.that_dict(actual[VenvSymlinkKind.BIN]).contains_exactly(expected_bins)
+
+ expected_includes = {
+ "includefile": "+pypi_include/include/includefile",
+ }
+ env.expect.that_dict(actual[VenvSymlinkKind.INCLUDE]).contains_exactly(expected_includes)
+
+ env.expect.that_dict(actual).keys().contains_exactly([
+ VenvSymlinkKind.LIB,
+ VenvSymlinkKind.BIN,
+ VenvSymlinkKind.INCLUDE,
+ ])
+
+def app_files_building_test_suite(name):
+ test_suite(
+ name = name,
+ tests = _tests,
+ )
diff --git a/tests/venv_site_packages_libs/bin.py b/tests/venv_site_packages_libs/bin.py
index 7e5838d..772925f 100644
--- a/tests/venv_site_packages_libs/bin.py
+++ b/tests/venv_site_packages_libs/bin.py
@@ -14,14 +14,26 @@
def assert_imported_from_venv(self, module_name):
module = importlib.import_module(module_name)
self.assertEqual(module.__name__, module_name)
+ self.assertIsNotNone(
+ module.__file__,
+ f"Expected module {module_name!r} to have"
+ + f"__file__ set, but got None. {module=}",
+ )
self.assertTrue(
module.__file__.startswith(self.venv),
f"\n{module_name} was imported, but not from the venv.\n"
+ f"venv : {self.venv}\n"
+ f"actual: {module.__file__}",
)
+ return module
def test_imported_from_venv(self):
+ m = self.assert_imported_from_venv("pkgutil_top")
+ self.assertEqual(m.WHOAMI, "pkgutil_top")
+
+ m = self.assert_imported_from_venv("pkgutil_top.sub")
+ self.assertEqual(m.WHOAMI, "pkgutil_top.sub")
+
self.assert_imported_from_venv("nspkg.subnspkg.alpha")
self.assert_imported_from_venv("nspkg.subnspkg.beta")
self.assert_imported_from_venv("nspkg.subnspkg.gamma")
diff --git a/tests/venv_site_packages_libs/pkgutil_top/BUILD.bazel b/tests/venv_site_packages_libs/pkgutil_top/BUILD.bazel
new file mode 100644
index 0000000..c805b1a
--- /dev/null
+++ b/tests/venv_site_packages_libs/pkgutil_top/BUILD.bazel
@@ -0,0 +1,10 @@
+load("//python:py_library.bzl", "py_library")
+
+package(default_visibility = ["//visibility:public"])
+
+py_library(
+ name = "pkgutil_top",
+ srcs = glob(["site-packages/**/*.py"]),
+ experimental_venvs_site_packages = "//python/config_settings:venvs_site_packages",
+ imports = [package_name() + "/site-packages"],
+)
diff --git a/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/__init__.py b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/__init__.py
new file mode 100644
index 0000000..e1809a3
--- /dev/null
+++ b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/__init__.py
@@ -0,0 +1,2 @@
+WHOAMI = "pkgutil_top"
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/top.py b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/top.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/top.py
diff --git a/tests/venv_site_packages_libs/pkgutil_top_sub/BUILD.bazel b/tests/venv_site_packages_libs/pkgutil_top_sub/BUILD.bazel
new file mode 100644
index 0000000..9d77162
--- /dev/null
+++ b/tests/venv_site_packages_libs/pkgutil_top_sub/BUILD.bazel
@@ -0,0 +1,10 @@
+load("//python:py_library.bzl", "py_library")
+
+package(default_visibility = ["//visibility:public"])
+
+py_library(
+ name = "pkgutil_top_sub",
+ srcs = glob(["site-packages/**/*.py"]),
+ experimental_venvs_site_packages = "//python/config_settings:venvs_site_packages",
+ imports = [package_name() + "/site-packages"],
+)
diff --git a/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/__init__.py b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/__init__.py
new file mode 100644
index 0000000..e7fb234
--- /dev/null
+++ b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/__init__.py
@@ -0,0 +1 @@
+WHOAMI = "pkgutil_top.sub"
diff --git a/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/suba.py b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/suba.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/suba.py