fix: correctly merge conflicting paths when files (instead of dirs) are being linked (#3458)
When a file (instead of directory) was duplicated, it was incorrectly be
treated as
a directory. The conflicting path was turned into a directory and all
the files
were made child paths of it. In practice, this usually worked out OK
still because
the conflicting file was an `__init__.py` file for a pkgutil-style
namespace package,
which effectively converted it to an implicit namespace package.
I think this was introduced by
https://github.com/bazel-contrib/rules_python/pull/3448,
but am not 100% sure.
To fix, first check for exact path equality when merging conflicting
paths. If the
candidate file has the same link_to_path as the grouping, then just link
to that file
and skip any remaining files in the group. The rest of the group's files
are skipped
because none of them can contribute links anymore:
* If they have the same path, then they are ignored (first set wins)
* If they are a sub-path, then it violates the preconditions of the
group (all files
in the group should be exact matches or sub-paths of a common prefix)
* If they aren't under the common prefix, then it violates the
preconditions of the
group and are skipped.
---------
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl
index 43fcab6..0c79ea8 100644
--- a/python/private/venv_runfiles.bzl
+++ b/python/private/venv_runfiles.bzl
@@ -163,8 +163,10 @@
current_group = None
current_group_prefix = None
for entry in entries:
- prefix = entry.venv_path
- anchored_prefix = prefix + "/"
+ # NOTE: When a file is being directly linked, the anchored prefix can look
+ # odd, e.g. "foo/__init__.py/". This is OK; it's just used to prevent
+ # incorrect prefix substring matching.
+ anchored_prefix = entry.venv_path + "/"
if (current_group_prefix == None or
not anchored_prefix.startswith(current_group_prefix)):
current_group_prefix = anchored_prefix
@@ -193,26 +195,41 @@
# be symlinked directly, not the directory containing them, due to
# dynamic linker symlink resolution semantics on Linux.
for entry in group:
- prefix = entry.venv_path
+ root_venv_path = entry.venv_path
+ anchored_link_to_path = entry.link_to_path + "/"
for file in entry.files.to_list():
- # Compute the file-specific venv path. i.e. the relative
- # path of the file under entry.venv_path, joined with
- # entry.venv_path
rf_root_path = runfiles_root_path(ctx, file.short_path)
- if not rf_root_path.startswith(entry.link_to_path):
- # This generally shouldn't occur in practice, but just
- # in case, skip them, for lack of a better option.
- continue
- venv_path = "{}/{}".format(
- prefix,
- rf_root_path.removeprefix(entry.link_to_path + "/"),
- )
- # For lack of a better option, first added wins. We happen to
- # go in top-down prefix order, so the highest level namespace
- # package typically wins.
- if venv_path not in keep_map:
- keep_map[venv_path] = file
+ # It's a file (or directory) being directly linked and
+ # must be directly linked.
+ if rf_root_path == entry.link_to_path:
+ # For lack of a better option, first added wins.
+ if entry.venv_path not in keep_map:
+ keep_map[entry.venv_path] = file
+
+ # Skip anything remaining: anything left is either
+ # the same path (first set wins), a suffix (violates
+ # preconditions and can't link anyways), or not under
+ # the prefix (violates preconditions).
+ break
+ else:
+ # Compute the file-specific venv path. i.e. the relative
+ # path of the file under entry.venv_path, joined with
+ # entry.venv_path
+ head, match, rel_venv_path = rf_root_path.partition(anchored_link_to_path)
+ if not match or head:
+ # If link_to_path didn't match, then obviously skip.
+ # If head is non-empty, it means link_to_path wasn't
+ # found at the start
+ # This shouldn't occur in practice, but guard against it
+ # just in case
+ continue
+
+ venv_path = paths.join(root_venv_path, rel_venv_path)
+
+ # For lack of a better option, first added wins.
+ if venv_path not in keep_map:
+ keep_map[venv_path] = file
def get_venv_symlinks(ctx, files, package, version_str, site_packages_root):
"""Compute the VenvSymlinkEntry objects for a library.
diff --git a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl
index fc0b5d0..486293b 100644
--- a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl
+++ b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl
@@ -1,6 +1,5 @@
""
-load("@bazel_skylib//lib:paths.bzl", "paths")
load("@rules_testing//lib:analysis_test.bzl", "analysis_test")
load("@rules_testing//lib:test_suite.bzl", "test_suite")
load("//python/private:py_info.bzl", "VenvSymlinkEntry", "VenvSymlinkKind") # buildifier: disable=bzl-visibility
@@ -58,34 +57,16 @@
))
return sorted(result, key = lambda e: (e.link_to_path, e.venv_path))
-def _entry(venv_path, link_to_path, files = [], **kwargs):
+def _entry(venv_path, link_to_path, files, **kwargs):
kwargs.setdefault("kind", VenvSymlinkKind.LIB)
kwargs.setdefault("package", None)
kwargs.setdefault("version", None)
-
- def short_pathify(path):
- path = paths.join(link_to_path, path)
-
- # In tests, `../` is used to step out of the link_to_path scope.
- path = paths.normalize(path)
-
- # Treat paths starting with "+" as external references. This matches
- # how bzlmod names things.
- if link_to_path.startswith("+"):
- # File.short_path to external repos have `../` prefixed
- path = paths.join("../", path)
- else:
- # File.short_path in main repo is main-repo relative
- _, _, path = path.partition("/")
- return path
+ kwargs.setdefault("link_to_file", None)
return VenvSymlinkEntry(
venv_path = venv_path,
link_to_path = link_to_path,
- files = depset([
- _file(short_pathify(f))
- for f in files
- ]),
+ files = depset(files),
**kwargs
)
@@ -100,15 +81,34 @@
def _test_conflict_merging_impl(env, _):
entries = [
- _entry("a", "+pypi_a/site-packages/a", ["a.txt"]),
- _entry("a-1.0.dist-info", "+pypi_a/site-packages/a-1.0.dist-info", ["METADATA"]),
- _entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]),
- _entry("x", "_main/src/x", ["x.txt"]),
- _entry("x/p", "_main/src-dev/x/p", ["p.txt"]),
- _entry("duplicate", "+dupe_a/site-packages/duplicate", ["d.py"]),
- # This entry also provides a/x.py, but since the "a" entry is shorter
- # and comes first, its version of x.py should win.
- _entry("duplicate", "+dupe_b/site-packages/duplicate", ["d.py"]),
+ _entry("a", "+pypi_a/site-packages/a", [
+ _file("../+pypi_a/site-packages/a/a.txt"),
+ ]),
+ _entry("a-1.0.dist-info", "+pypi_a/site-packages/a-1.0.dist-info", [
+ _file("../+pypi_a/site-packages/a-1.0.dist-info/METADATA"),
+ ]),
+ _entry("a/b", "+pypi_a_b/site-packages/a/b", [
+ _file("../+pypi_a_b/site-packages/a/b/b.txt"),
+ ]),
+ _entry("x", "_main/src/x", [
+ _file("src/x/x.txt"),
+ ]),
+ _entry("x/p", "_main/src-dev/x/p", [
+ _file("src-dev/x/p/p.txt"),
+ ]),
+ _entry("duplicate", "+dupe_a/site-packages/duplicate", [
+ _file("../+dupe_a/site-packages/duplicate/d.py"),
+ ]),
+ _entry("duplicate", "+dupe_b/site-packages/duplicate", [
+ _file("../+dupe_b/site-packages/duplicate/d.py"),
+ ]),
+ # Case: two distributions provide the same file (instead of directory)
+ _entry("ff/fmod.py", "+ff_a/site-packages/ff/fmod.py", [
+ _file("../+ff_a/site-packages/ff/fmod.py"),
+ ]),
+ _entry("ff/fmod.py", "+ff_b/site-packages/ff/fmod.py", [
+ _file("../+ff_b/site-packages/ff/fmod.py"),
+ ]),
]
actual, conflicts = build_link_map(_ctx(), entries, return_conflicts = True)
@@ -117,6 +117,7 @@
"a/a.txt": _file("../+pypi_a/site-packages/a/a.txt"),
"a/b/b.txt": _file("../+pypi_a_b/site-packages/a/b/b.txt"),
"duplicate/d.py": _file("../+dupe_a/site-packages/duplicate/d.py"),
+ "ff/fmod.py": _file("../+ff_a/site-packages/ff/fmod.py"),
"x/p/p.txt": _file("src-dev/x/p/p.txt"),
"x/x.txt": _file("src/x/x.txt"),
}
@@ -274,8 +275,12 @@
def _test_package_version_filtering_impl(env, _):
entries = [
- _entry("foo", "+pypi_v1/site-packages/foo", ["foo.txt"], package = "foo", version = "1.0"),
- _entry("foo", "+pypi_v2/site-packages/foo", ["bar.txt"], package = "foo", version = "2.0"),
+ _entry("foo", "+pypi_v1/site-packages/foo", [
+ _file("../+pypi_v1/site-packages/foo/foo.txt"),
+ ], package = "foo", version = "1.0"),
+ _entry("foo", "+pypi_v2/site-packages/foo", [
+ _file("../+pypi_v2/site-packages/foo/bar.txt"),
+ ], package = "foo", version = "2.0"),
]
actual = build_link_map(_ctx(), entries)
@@ -300,7 +305,7 @@
"a",
"+pypi_a/site-packages/a",
# This file is outside the link_to_path, so it should be ignored.
- ["../outside.txt"],
+ [_file("../+pypi_a/site-packages/outside.txt")],
),
# A second, conflicting, entry is added to force merging of the known
# files. Without this, there's no conflict, so files is never
@@ -308,7 +313,7 @@
_entry(
"a",
"+pypi_b/site-packages/a",
- ["../outside.txt"],
+ [_file("../+pypi_b/site-packages/outside.txt")],
),
]
@@ -328,11 +333,21 @@
def _test_complex_namespace_packages_impl(env, _):
entries = [
- _entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]),
- _entry("a/c", "+pypi_a_c/site-packages/a/c", ["c.txt"]),
- _entry("x/y/z", "+pypi_x_y_z/site-packages/x/y/z", ["z.txt"]),
- _entry("foo", "+pypi_foo/site-packages/foo", ["foo.txt"]),
- _entry("foobar", "+pypi_foobar/site-packages/foobar", ["foobar.txt"]),
+ _entry("a/b", "+pypi_a_b/site-packages/a/b", [
+ _file("../+pypi_a_b/site-packages/a/b/b.txt"),
+ ]),
+ _entry("a/c", "+pypi_a_c/site-packages/a/c", [
+ _file("../+pypi_a_c/site-packages/a/cc.txt"),
+ ]),
+ _entry("x/y/z", "+pypi_x_y_z/site-packages/x/y/z", [
+ _file("../+pypi_x_y_z/site-packages/x/y/z/z.txt"),
+ ]),
+ _entry("foo", "+pypi_foo/site-packages/foo", [
+ _file("../+pypi_foo/site-packages/foo/foo.txt"),
+ ]),
+ _entry("foobar", "+pypi_foobar/site-packages/foobar", [
+ _file("../+pypi_foobar/site-packages/foobar/foobar.txt"),
+ ]),
]
actual = build_link_map(_ctx(), entries)
@@ -380,19 +395,19 @@
_entry(
"libfile",
"+pypi_lib/site-packages/libfile",
- ["lib.txt"],
+ [_file("../+pypi_lib/site-packages/libfile/lib.txt")],
kind = VenvSymlinkKind.LIB,
),
_entry(
"binfile",
"+pypi_bin/bin/binfile",
- ["bin.txt"],
+ [_file("../+pypi_bin/bin/binfile/bin.txt")],
kind = VenvSymlinkKind.BIN,
),
_entry(
"includefile",
"+pypi_include/include/includefile",
- ["include.h"],
+ [_file("../+pypi_include/include/includefile/include.h")],
kind =
VenvSymlinkKind.INCLUDE,
),