fix: insert user imports before runtime site-packages (#2073)

Previously, all the user import paths were put at the end of sys.path.
This was done so that
user import paths didn't hide stdlib modules. However, a side-effect is
that user import
paths came after the runtime's site-packages directory. This prevented
user imports from
overriding non-stdlib modules included in a runtime (e.g. pip).

To fix, we look for the runtime site-packages directory, then insert the
user import paths
before it. A test is used to ensure that the ordering is `[stdlib, user,
runtime site-packages]`

Also fixes a bug introduced by #2076: safe path was being disabled by
default

Fixes https://github.com/bazelbuild/rules_python/issues/2064
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ccff79..8a0792d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,9 @@
   Windows. See [#1840](https://github.com/bazelbuild/rules_python/issues/1840).
 * (rules) Fixes Mac + `--build_python_zip` + {obj}`--bootstrap_impl=script`
   ([#2030](https://github.com/bazelbuild/rules_python/issues/2030)).
+* (rules) User dependencies come before runtime site-packages when using
+  {obj}`--bootstrap_impl=script`.
+  ([#2064](https://github.com/bazelbuild/rules_python/issues/2064)).
 * (pip) Fixed pypi parse_simpleapi_html function for feeds with package metadata
   containing ">" sign
 
diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh
index 895793b..959e7ba 100644
--- a/python/private/stage1_bootstrap_template.sh
+++ b/python/private/stage1_bootstrap_template.sh
@@ -106,11 +106,13 @@
 # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
 # NOTE: Only works for 3.11+
 # We inherit the value from the outer environment in case the user wants to
-# opt-out of using PYTHONSAFEPATH.
-# Because empty means false and non-empty means true, we have to distinguish
-# between "defined and empty" and "not defined at all".
+# opt-out of using PYTHONSAFEPATH. To opt-out, they have to set
+# `PYTHONSAFEPATH=` (empty string). This is because Python treats the empty
+# value as false, and any non-empty value as true.
+# ${FOO+WORD} expands to empty if $FOO is undefined, and WORD otherwise.
 if [[ -z "${PYTHONSAFEPATH+x}" ]]; then
-  interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH+1}")
+  # ${FOO-WORD} expands to WORD if $FOO is undefined, and $FOO otherwise
+  interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH-1}")
 fi
 
 if [[ "$IS_ZIPFILE" == "1" ]]; then
diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py
index 69c0dec..29f59d2 100644
--- a/python/private/stage2_bootstrap_template.py
+++ b/python/private/stage2_bootstrap_template.py
@@ -498,9 +498,29 @@
         cov_tool = None
 
     sys.stdout.flush()
+
+    # Add the user imports after the stdlib, but before the runtime's
+    # site-packages directory. This gives the stdlib precedence, while allowing
+    # users to override non-stdlib packages that may have been bundled with
+    # the runtime (usually pip).
+    # NOTE: There isn't a good way to identify the stdlib paths, so we just
+    # expect site-packages comes after it, per
+    # https://docs.python.org/3/library/sys_path_init.html#sys-path-init
+    for i, path in enumerate(sys.path):
+        # dist-packages is a debian convention, see
+        # https://wiki.debian.org/Python#Deviations_from_upstream
+        if os.path.basename(path) in ("site-packages", "dist-packages"):
+            sys.path[i:i] = python_path_entries
+            break
+    else:
+        # Otherwise, no site-packages directory was found, which is odd but ok.
+        sys.path.extend(python_path_entries)
+
     # NOTE: The sys.path must be modified before coverage is imported/activated
+    # NOTE: Perform this after the user imports are appended. This avoids a
+    # user import accidentally triggering the site-packages logic above.
     sys.path[0:0] = prepend_path_entries
-    sys.path.extend(python_path_entries)
+
     with _maybe_collect_coverage(enable=cov_tool is not None):
         # The first arg is this bootstrap, so drop that for the re-invocation.
         _run_py(main_filename, args=sys.argv[1:])
diff --git a/tests/base_rules/BUILD.bazel b/tests/base_rules/BUILD.bazel
index e04d314..cd57715 100644
--- a/tests/base_rules/BUILD.bazel
+++ b/tests/base_rules/BUILD.bazel
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 load("//python/private:util.bzl", "IS_BAZEL_7_OR_HIGHER")  # buildifier: disable=bzl-visibility
-load("//tests/support:sh_py_run_test.bzl", "sh_py_run_test")
+load("//tests/support:sh_py_run_test.bzl", "py_reconfig_test", "sh_py_run_test")
 
 _SUPPORTS_BOOTSTRAP_SCRIPT = select({
     "@platforms//os:windows": ["@platforms//:incompatible"],
@@ -52,6 +52,25 @@
     target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
 )
 
+py_reconfig_test(
+    name = "sys_path_order_bootstrap_script_test",
+    srcs = ["sys_path_order_test.py"],
+    bootstrap_impl = "script",
+    env = {"BOOTSTRAP": "script"},
+    imports = ["./site-packages"],
+    main = "sys_path_order_test.py",
+    target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
+)
+
+py_reconfig_test(
+    name = "sys_path_order_bootstrap_system_python_test",
+    srcs = ["sys_path_order_test.py"],
+    bootstrap_impl = "system_python",
+    env = {"BOOTSTRAP": "system_python"},
+    imports = ["./site-packages"],
+    main = "sys_path_order_test.py",
+)
+
 sh_py_run_test(
     name = "inherit_pythonsafepath_env_test",
     bootstrap_impl = "script",
diff --git a/tests/base_rules/inherit_pythonsafepath_env_test.sh b/tests/base_rules/inherit_pythonsafepath_env_test.sh
index bf85d26..bc6e2d5 100755
--- a/tests/base_rules/inherit_pythonsafepath_env_test.sh
+++ b/tests/base_rules/inherit_pythonsafepath_env_test.sh
@@ -35,17 +35,35 @@
   local expected_pattern=$1
   local actual=$2
   if ! (echo "$actual" | grep "$expected_pattern" ) >/dev/null; then
-    echo "expected output to match: $expected_pattern"
-    echo "but got:\n$actual"
+    echo "expected to match: $expected_pattern"
+    echo "===== actual START ====="
+    echo "$actual"
+    echo "===== actual END ====="
+    echo
+    touch EXPECTATION_FAILED
     return 1
   fi
 }
 
 
+echo "Check inherited and disabled"
+# Verify setting it to empty string disables safe path
 actual=$(PYTHONSAFEPATH= $bin 2>&1)
 expect_match "sys.flags.safe_path: False" "$actual"
 expect_match "PYTHONSAFEPATH: EMPTY" "$actual"
 
+echo "Check inherited and propagated"
+# Verify setting it to any string enables safe path and that
+# value is propagated
 actual=$(PYTHONSAFEPATH=OUTER $bin 2>&1)
 expect_match "sys.flags.safe_path: True" "$actual"
 expect_match "PYTHONSAFEPATH: OUTER" "$actual"
+
+echo "Check enabled by default"
+# Verifying doing nothing leaves safepath enabled by default
+actual=$($bin 2>&1)
+expect_match "sys.flags.safe_path: True" "$actual"
+expect_match "PYTHONSAFEPATH: 1" "$actual"
+
+# Exit if any of the expects failed
+[[ ! -e EXPECTATION_FAILED ]]
diff --git a/tests/base_rules/sys_path_order_test.py b/tests/base_rules/sys_path_order_test.py
new file mode 100644
index 0000000..2e33464
--- /dev/null
+++ b/tests/base_rules/sys_path_order_test.py
@@ -0,0 +1,88 @@
+# Copyright 2024 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import re
+import sys
+import unittest
+
+
+class SysPathOrderTest(unittest.TestCase):
+    def test_sys_path_order(self):
+        last_stdlib = None
+        first_user = None
+        first_runtime_site = None
+
+        # Classify paths into the three different types we care about: stdlib,
+        # user dependency, or the runtime's site-package's directory.
+        #
+        # Because they often share common prefixes with one another, and vary
+        # subtly between platforms, we do this in two passes: first categorize,
+        # then pick out the indexes. This is just so debugging is easier and
+        # error messages are more informative.
+        categorized_paths = []
+        for i, value in enumerate(sys.path):
+            # The runtime's root repo may be added to sys.path, but it
+            # counts as a user directory, not stdlib directory.
+            if value == sys.prefix:
+                category = "user"
+            elif value.startswith(sys.prefix):
+                # The runtime's site-package directory might be called
+                # dist-packages when using Debian's system python.
+                if os.path.basename(value).endswith("-packages"):
+                    category = "runtime-site"
+                else:
+                    category = "stdlib"
+            else:
+                category = "user"
+
+            categorized_paths.append((category, value))
+
+        for i, (category, _) in enumerate(categorized_paths):
+            if category == "stdlib":
+                last_stdlib = i
+            elif category == "runtime-site":
+                if first_runtime_site is None:
+                    first_runtime_site = i
+            elif category == "user":
+                if first_user is None:
+                    first_user = i
+
+        sys_path_str = "\n".join(
+            f"{i}: ({category}) {value}"
+            for i, (category, value) in enumerate(categorized_paths)
+        )
+        if None in (last_stdlib, first_user, first_runtime_site):
+            self.fail(
+                "Failed to find position for one of:\n"
+                + f"{last_stdlib=} {first_user=} {first_runtime_site=}\n"
+                + f"for sys.path:\n{sys_path_str}"
+            )
+
+        if os.environ["BOOTSTRAP"] == "script":
+            self.assertTrue(
+                last_stdlib < first_user < first_runtime_site,
+                f"Expected {last_stdlib=} < {first_user=} < {first_runtime_site=}\n"
+                + f"for sys.path:\n{sys_path_str}",
+            )
+        else:
+            self.assertTrue(
+                first_user < last_stdlib < first_runtime_site,
+                f"Expected {first_user=} < {last_stdlib=} < {first_runtime_site=}\n"
+                + f"for sys.path:\n{sys_path_str}",
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/support/sh_py_run_test.bzl b/tests/support/sh_py_run_test.bzl
index 4b3d22d..35be484 100644
--- a/tests/support/sh_py_run_test.bzl
+++ b/tests/support/sh_py_run_test.bzl
@@ -18,6 +18,7 @@
 """
 
 load("//python:py_binary.bzl", "py_binary")
+load("//python:py_test.bzl", "py_test")
 
 def _perform_transition_impl(input_settings, attr):
     settings = dict(input_settings)
@@ -37,7 +38,7 @@
     ],
 )
 
-def _transition_impl(ctx):
+def _py_reconfig_impl(ctx):
     default_info = ctx.attr.target[DefaultInfo]
     exe_ext = default_info.files_to_run.executable.extension
     if exe_ext:
@@ -66,27 +67,58 @@
         DefaultInfo(
             executable = executable,
             files = depset(default_outputs),
-            runfiles = default_info.default_runfiles,
+            # On windows, the other default outputs must also be included
+            # in runfiles so the exe launcher can find the backing file.
+            runfiles = ctx.runfiles(default_outputs).merge(
+                default_info.default_runfiles,
+            ),
         ),
         testing.TestEnvironment(
             environment = ctx.attr.env,
         ),
     ]
 
-transition_binary = rule(
-    implementation = _transition_impl,
-    attrs = {
-        "bootstrap_impl": attr.string(),
-        "build_python_zip": attr.string(default = "auto"),
-        "env": attr.string_dict(),
-        "target": attr.label(executable = True, cfg = "target"),
-        "_allowlist_function_transition": attr.label(
-            default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
-        ),
-    },
-    cfg = _perform_transition,
-    executable = True,
-)
+def _make_reconfig_rule(**kwargs):
+    return rule(
+        implementation = _py_reconfig_impl,
+        attrs = {
+            "bootstrap_impl": attr.string(),
+            "build_python_zip": attr.string(default = "auto"),
+            "env": attr.string_dict(),
+            "target": attr.label(executable = True, cfg = "target"),
+            "_allowlist_function_transition": attr.label(
+                default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
+            ),
+        },
+        cfg = _perform_transition,
+        **kwargs
+    )
+
+_py_reconfig_binary = _make_reconfig_rule(executable = True)
+
+_py_reconfig_test = _make_reconfig_rule(test = True)
+
+def py_reconfig_test(*, name, **kwargs):
+    """Create a py_test with customized build settings for testing.
+
+    Args:
+        name: str, name of teset target.
+        **kwargs: kwargs to pass along to _py_reconfig_test and py_test.
+    """
+    reconfig_kwargs = {}
+    reconfig_kwargs["bootstrap_impl"] = kwargs.pop("bootstrap_impl")
+    reconfig_kwargs["env"] = kwargs.get("env")
+    inner_name = "_{}_inner" + name
+    _py_reconfig_test(
+        name = name,
+        target = inner_name,
+        **reconfig_kwargs
+    )
+    py_test(
+        name = inner_name,
+        tags = ["manual"],
+        **kwargs
+    )
 
 def sh_py_run_test(*, name, sh_src, py_src, **kwargs):
     bin_name = "_{}_bin".format(name)
@@ -102,7 +134,7 @@
         },
     )
 
-    transition_binary(
+    _py_reconfig_binary(
         name = bin_name,
         tags = ["manual"],
         target = "_{}_plain_bin".format(name),