fix: insert user imports before runtime site-packages (#2073)
Previously, all the user import paths were put at the end of sys.path.
This was done so that
user import paths didn't hide stdlib modules. However, a side-effect is
that user import
paths came after the runtime's site-packages directory. This prevented
user imports from
overriding non-stdlib modules included in a runtime (e.g. pip).
To fix, we look for the runtime site-packages directory, then insert the
user import paths
before it. A test is used to ensure that the ordering is `[stdlib, user,
runtime site-packages]`
Also fixes a bug introduced by #2076: safe path was being disabled by
default
Fixes https://github.com/bazelbuild/rules_python/issues/2064
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ccff79..8a0792d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,9 @@
Windows. See [#1840](https://github.com/bazelbuild/rules_python/issues/1840).
* (rules) Fixes Mac + `--build_python_zip` + {obj}`--bootstrap_impl=script`
([#2030](https://github.com/bazelbuild/rules_python/issues/2030)).
+* (rules) User dependencies come before runtime site-packages when using
+ {obj}`--bootstrap_impl=script`.
+ ([#2064](https://github.com/bazelbuild/rules_python/issues/2064)).
* (pip) Fixed pypi parse_simpleapi_html function for feeds with package metadata
containing ">" sign
diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh
index 895793b..959e7ba 100644
--- a/python/private/stage1_bootstrap_template.sh
+++ b/python/private/stage1_bootstrap_template.sh
@@ -106,11 +106,13 @@
# See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
# NOTE: Only works for 3.11+
# We inherit the value from the outer environment in case the user wants to
-# opt-out of using PYTHONSAFEPATH.
-# Because empty means false and non-empty means true, we have to distinguish
-# between "defined and empty" and "not defined at all".
+# opt-out of using PYTHONSAFEPATH. To opt-out, they have to set
+# `PYTHONSAFEPATH=` (empty string). This is because Python treats the empty
+# value as false, and any non-empty value as true.
+# ${FOO+WORD} expands to empty if $FOO is undefined, and WORD otherwise.
if [[ -z "${PYTHONSAFEPATH+x}" ]]; then
- interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH+1}")
+ # ${FOO-WORD} expands to WORD if $FOO is undefined, and $FOO otherwise
+ interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH-1}")
fi
if [[ "$IS_ZIPFILE" == "1" ]]; then
diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py
index 69c0dec..29f59d2 100644
--- a/python/private/stage2_bootstrap_template.py
+++ b/python/private/stage2_bootstrap_template.py
@@ -498,9 +498,29 @@
cov_tool = None
sys.stdout.flush()
+
+ # Add the user imports after the stdlib, but before the runtime's
+ # site-packages directory. This gives the stdlib precedence, while allowing
+ # users to override non-stdlib packages that may have been bundled with
+ # the runtime (usually pip).
+ # NOTE: There isn't a good way to identify the stdlib paths, so we just
+ # expect site-packages comes after it, per
+ # https://docs.python.org/3/library/sys_path_init.html#sys-path-init
+ for i, path in enumerate(sys.path):
+ # dist-packages is a debian convention, see
+ # https://wiki.debian.org/Python#Deviations_from_upstream
+ if os.path.basename(path) in ("site-packages", "dist-packages"):
+ sys.path[i:i] = python_path_entries
+ break
+ else:
+ # Otherwise, no site-packages directory was found, which is odd but ok.
+ sys.path.extend(python_path_entries)
+
# NOTE: The sys.path must be modified before coverage is imported/activated
+ # NOTE: Perform this after the user imports are appended. This avoids a
+ # user import accidentally triggering the site-packages logic above.
sys.path[0:0] = prepend_path_entries
- sys.path.extend(python_path_entries)
+
with _maybe_collect_coverage(enable=cov_tool is not None):
# The first arg is this bootstrap, so drop that for the re-invocation.
_run_py(main_filename, args=sys.argv[1:])
diff --git a/tests/base_rules/BUILD.bazel b/tests/base_rules/BUILD.bazel
index e04d314..cd57715 100644
--- a/tests/base_rules/BUILD.bazel
+++ b/tests/base_rules/BUILD.bazel
@@ -13,7 +13,7 @@
# limitations under the License.
load("//python/private:util.bzl", "IS_BAZEL_7_OR_HIGHER") # buildifier: disable=bzl-visibility
-load("//tests/support:sh_py_run_test.bzl", "sh_py_run_test")
+load("//tests/support:sh_py_run_test.bzl", "py_reconfig_test", "sh_py_run_test")
_SUPPORTS_BOOTSTRAP_SCRIPT = select({
"@platforms//os:windows": ["@platforms//:incompatible"],
@@ -52,6 +52,25 @@
target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
)
+py_reconfig_test(
+ name = "sys_path_order_bootstrap_script_test",
+ srcs = ["sys_path_order_test.py"],
+ bootstrap_impl = "script",
+ env = {"BOOTSTRAP": "script"},
+ imports = ["./site-packages"],
+ main = "sys_path_order_test.py",
+ target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
+)
+
+py_reconfig_test(
+ name = "sys_path_order_bootstrap_system_python_test",
+ srcs = ["sys_path_order_test.py"],
+ bootstrap_impl = "system_python",
+ env = {"BOOTSTRAP": "system_python"},
+ imports = ["./site-packages"],
+ main = "sys_path_order_test.py",
+)
+
sh_py_run_test(
name = "inherit_pythonsafepath_env_test",
bootstrap_impl = "script",
diff --git a/tests/base_rules/inherit_pythonsafepath_env_test.sh b/tests/base_rules/inherit_pythonsafepath_env_test.sh
index bf85d26..bc6e2d5 100755
--- a/tests/base_rules/inherit_pythonsafepath_env_test.sh
+++ b/tests/base_rules/inherit_pythonsafepath_env_test.sh
@@ -35,17 +35,35 @@
local expected_pattern=$1
local actual=$2
if ! (echo "$actual" | grep "$expected_pattern" ) >/dev/null; then
- echo "expected output to match: $expected_pattern"
- echo "but got:\n$actual"
+ echo "expected to match: $expected_pattern"
+ echo "===== actual START ====="
+ echo "$actual"
+ echo "===== actual END ====="
+ echo
+ touch EXPECTATION_FAILED
return 1
fi
}
+echo "Check inherited and disabled"
+# Verify setting it to empty string disables safe path
actual=$(PYTHONSAFEPATH= $bin 2>&1)
expect_match "sys.flags.safe_path: False" "$actual"
expect_match "PYTHONSAFEPATH: EMPTY" "$actual"
+echo "Check inherited and propagated"
+# Verify setting it to any string enables safe path and that
+# value is propagated
actual=$(PYTHONSAFEPATH=OUTER $bin 2>&1)
expect_match "sys.flags.safe_path: True" "$actual"
expect_match "PYTHONSAFEPATH: OUTER" "$actual"
+
+echo "Check enabled by default"
+# Verifying doing nothing leaves safepath enabled by default
+actual=$($bin 2>&1)
+expect_match "sys.flags.safe_path: True" "$actual"
+expect_match "PYTHONSAFEPATH: 1" "$actual"
+
+# Exit if any of the expects failed
+[[ ! -e EXPECTATION_FAILED ]]
diff --git a/tests/base_rules/sys_path_order_test.py b/tests/base_rules/sys_path_order_test.py
new file mode 100644
index 0000000..2e33464
--- /dev/null
+++ b/tests/base_rules/sys_path_order_test.py
@@ -0,0 +1,88 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import re
+import sys
+import unittest
+
+
+class SysPathOrderTest(unittest.TestCase):
+ def test_sys_path_order(self):
+ last_stdlib = None
+ first_user = None
+ first_runtime_site = None
+
+ # Classify paths into the three different types we care about: stdlib,
+ # user dependency, or the runtime's site-package's directory.
+ #
+ # Because they often share common prefixes with one another, and vary
+ # subtly between platforms, we do this in two passes: first categorize,
+ # then pick out the indexes. This is just so debugging is easier and
+ # error messages are more informative.
+ categorized_paths = []
+ for i, value in enumerate(sys.path):
+ # The runtime's root repo may be added to sys.path, but it
+ # counts as a user directory, not stdlib directory.
+ if value == sys.prefix:
+ category = "user"
+ elif value.startswith(sys.prefix):
+ # The runtime's site-package directory might be called
+ # dist-packages when using Debian's system python.
+ if os.path.basename(value).endswith("-packages"):
+ category = "runtime-site"
+ else:
+ category = "stdlib"
+ else:
+ category = "user"
+
+ categorized_paths.append((category, value))
+
+ for i, (category, _) in enumerate(categorized_paths):
+ if category == "stdlib":
+ last_stdlib = i
+ elif category == "runtime-site":
+ if first_runtime_site is None:
+ first_runtime_site = i
+ elif category == "user":
+ if first_user is None:
+ first_user = i
+
+ sys_path_str = "\n".join(
+ f"{i}: ({category}) {value}"
+ for i, (category, value) in enumerate(categorized_paths)
+ )
+ if None in (last_stdlib, first_user, first_runtime_site):
+ self.fail(
+ "Failed to find position for one of:\n"
+ + f"{last_stdlib=} {first_user=} {first_runtime_site=}\n"
+ + f"for sys.path:\n{sys_path_str}"
+ )
+
+ if os.environ["BOOTSTRAP"] == "script":
+ self.assertTrue(
+ last_stdlib < first_user < first_runtime_site,
+ f"Expected {last_stdlib=} < {first_user=} < {first_runtime_site=}\n"
+ + f"for sys.path:\n{sys_path_str}",
+ )
+ else:
+ self.assertTrue(
+ first_user < last_stdlib < first_runtime_site,
+ f"Expected {first_user=} < {last_stdlib=} < {first_runtime_site=}\n"
+ + f"for sys.path:\n{sys_path_str}",
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/support/sh_py_run_test.bzl b/tests/support/sh_py_run_test.bzl
index 4b3d22d..35be484 100644
--- a/tests/support/sh_py_run_test.bzl
+++ b/tests/support/sh_py_run_test.bzl
@@ -18,6 +18,7 @@
"""
load("//python:py_binary.bzl", "py_binary")
+load("//python:py_test.bzl", "py_test")
def _perform_transition_impl(input_settings, attr):
settings = dict(input_settings)
@@ -37,7 +38,7 @@
],
)
-def _transition_impl(ctx):
+def _py_reconfig_impl(ctx):
default_info = ctx.attr.target[DefaultInfo]
exe_ext = default_info.files_to_run.executable.extension
if exe_ext:
@@ -66,27 +67,58 @@
DefaultInfo(
executable = executable,
files = depset(default_outputs),
- runfiles = default_info.default_runfiles,
+ # On windows, the other default outputs must also be included
+ # in runfiles so the exe launcher can find the backing file.
+ runfiles = ctx.runfiles(default_outputs).merge(
+ default_info.default_runfiles,
+ ),
),
testing.TestEnvironment(
environment = ctx.attr.env,
),
]
-transition_binary = rule(
- implementation = _transition_impl,
- attrs = {
- "bootstrap_impl": attr.string(),
- "build_python_zip": attr.string(default = "auto"),
- "env": attr.string_dict(),
- "target": attr.label(executable = True, cfg = "target"),
- "_allowlist_function_transition": attr.label(
- default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
- ),
- },
- cfg = _perform_transition,
- executable = True,
-)
+def _make_reconfig_rule(**kwargs):
+ return rule(
+ implementation = _py_reconfig_impl,
+ attrs = {
+ "bootstrap_impl": attr.string(),
+ "build_python_zip": attr.string(default = "auto"),
+ "env": attr.string_dict(),
+ "target": attr.label(executable = True, cfg = "target"),
+ "_allowlist_function_transition": attr.label(
+ default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
+ ),
+ },
+ cfg = _perform_transition,
+ **kwargs
+ )
+
+_py_reconfig_binary = _make_reconfig_rule(executable = True)
+
+_py_reconfig_test = _make_reconfig_rule(test = True)
+
+def py_reconfig_test(*, name, **kwargs):
+ """Create a py_test with customized build settings for testing.
+
+ Args:
+ name: str, name of teset target.
+ **kwargs: kwargs to pass along to _py_reconfig_test and py_test.
+ """
+ reconfig_kwargs = {}
+ reconfig_kwargs["bootstrap_impl"] = kwargs.pop("bootstrap_impl")
+ reconfig_kwargs["env"] = kwargs.get("env")
+ inner_name = "_{}_inner" + name
+ _py_reconfig_test(
+ name = name,
+ target = inner_name,
+ **reconfig_kwargs
+ )
+ py_test(
+ name = inner_name,
+ tags = ["manual"],
+ **kwargs
+ )
def sh_py_run_test(*, name, sh_src, py_src, **kwargs):
bin_name = "_{}_bin".format(name)
@@ -102,7 +134,7 @@
},
)
- transition_binary(
+ _py_reconfig_binary(
name = bin_name,
tags = ["manual"],
target = "_{}_plain_bin".format(name),