Revert internals destruction and add test for internals recreation (#5972)

* Bump internals version

* Prevent internals destruction before all pybind11 types are destroyed

* Use Py_XINCREF and Py_XDECREF

* Hold GIL before decref

* Use weakrefs

* Remove unused code

* Move code location

* Move code location

* Move code location

* Try add tests

* Fix PYTHONPATH

* Fix PYTHONPATH

* Skip tests for subprocess

* Revert to leak internals

* Revert to leak internals

* Revert "Revert to leak internals"

This reverts commit c5ec1cf8862b43c125bdaf582445f3cbb1347017.
This reverts commit 72c2e0aa9b481debd94557b629e86b954543d64b.

* Revert internals version bump

* Reapply to leak internals

This reverts commit 8f25a254e893817c2ed70d97cde7f62778a14de2.

* Add re-entrancy detection for internals creation

Prevent re-creation of internals after destruction during interpreter
shutdown. If pybind11 code runs after internals have been destroyed,
fail early with a clear error message instead of silently creating
new empty internals that would cause type lookup failures.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* Fix C++11/C++14 support

* Add lock under multiple interpreters

* Try fix tests

* Try fix tests

* Try fix tests

* Update comments and assertion messages

* Update comments and assertion messages

* Update comments

* Update lock scope

* Use original pointer type for Windows

* Change hard error to warning

* Update lock scope

* Update lock scope to resolve deadlock

* Remove scope release of GIL

* Update comments

* Lock pp on reset

* Mark content created after assignment

* Update comments

* Simplify implementation

* Update lock scope when delete unique_ptr

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/docs/advanced/classes.rst b/docs/advanced/classes.rst
index faaba38..bfbaea6 100644
--- a/docs/advanced/classes.rst
+++ b/docs/advanced/classes.rst
@@ -1381,11 +1381,22 @@
 
 .. code-block:: cpp
 
-   struct OwnsPythonObjects {
-       py::object value = py::none();
+   struct ContainerOwnsPythonObjects {
+       std::vector<py::object> list;
+
+       void append(const py::object &obj) { list.emplace_back(obj); }
+       py::object at(py::ssize_t index) const {
+           if (index >= size() || index < 0) {
+               throw py::index_error("Index out of range");
+           }
+           return list.at(py::size_t(index));
+       }
+       py::ssize_t size() const { return py::ssize_t_cast(list.size()); }
+       void clear() { list.clear(); }
    };
-   py::class_<OwnsPythonObjects> cls(
-       m, "OwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) {
+
+   py::class_<ContainerOwnsPythonObjects> cls(
+       m, "ContainerOwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) {
            auto *type = &heap_type->ht_type;
            type->tp_flags |= Py_TPFLAGS_HAVE_GC;
            type->tp_traverse = [](PyObject *self_base, visitproc visit, void *arg) {
@@ -1394,20 +1405,28 @@
                Py_VISIT(Py_TYPE(self_base));
    #endif
                if (py::detail::is_holder_constructed(self_base)) {
-                   auto &self = py::cast<OwnsPythonObjects&>(py::handle(self_base));
-                   Py_VISIT(self.value.ptr());
+                   auto &self = py::cast<ContainerOwnsPythonObjects &>(py::handle(self_base));
+                   for (auto &item : self.list) {
+                       Py_VISIT(item.ptr());
+                   }
                }
                return 0;
            };
            type->tp_clear = [](PyObject *self_base) {
                if (py::detail::is_holder_constructed(self_base)) {
-                   auto &self = py::cast<OwnsPythonObjects&>(py::handle(self_base));
-                   self.value = py::none();
+                   auto &self = py::cast<ContainerOwnsPythonObjects &>(py::handle(self_base));
+                   for (auto &item : self.list) {
+                       Py_CLEAR(item.ptr());
+                   }
+                   self.list.clear();
                }
                return 0;
            };
        }));
    cls.def(py::init<>());
-   cls.def_readwrite("value", &OwnsPythonObjects::value);
+   cls.def("append", &ContainerOwnsPythonObjects::append);
+   cls.def("at", &ContainerOwnsPythonObjects::at);
+   cls.def("size", &ContainerOwnsPythonObjects::size);
+   cls.def("clear", &ContainerOwnsPythonObjects::clear);
 
 .. versionadded:: 2.8
diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h
index 9b3e69f..7cfa5da 100644
--- a/include/pybind11/detail/internals.h
+++ b/include/pybind11/detail/internals.h
@@ -337,19 +337,7 @@
     internals(internals &&other) = delete;
     internals &operator=(const internals &other) = delete;
     internals &operator=(internals &&other) = delete;
-    ~internals() {
-        // Normally this destructor runs during interpreter finalization and it may DECREF things.
-        // In odd finalization scenarios it might end up running after the interpreter has
-        // completely shut down, In that case, we should not decref these objects because pymalloc
-        // is gone.  This also applies across sub-interpreters, we should only DECREF when the
-        // original owning interpreter is active.
-        auto *cur_istate = get_interpreter_state_unchecked();
-        if (cur_istate && cur_istate == istate) {
-            Py_CLEAR(instance_base);
-            Py_CLEAR(default_metaclass);
-            Py_CLEAR(static_property_type);
-        }
-    }
+    ~internals() = default;
 };
 
 // the internals struct (above) is shared between all the modules. local_internals are only
@@ -359,8 +347,6 @@
 // impact any other modules, because the only things accessing the local internals is the
 // module that contains them.
 struct local_internals {
-    local_internals() : istate(get_interpreter_state_unchecked()) {}
-
     // It should be safe to use fast_type_map here because this entire
     // data structure is scoped to our single module, and thus a single
     // DSO and single instance of type_info for any particular type.
@@ -368,19 +354,6 @@
 
     std::forward_list<ExceptionTranslator> registered_exception_translators;
     PyTypeObject *function_record_py_type = nullptr;
-    PyInterpreterState *istate = nullptr;
-
-    ~local_internals() {
-        // Normally this destructor runs during interpreter finalization and it may DECREF things.
-        // In odd finalization scenarios it might end up running after the interpreter has
-        // completely shut down, In that case, we should not decref these objects because pymalloc
-        // is gone.  This also applies across sub-interpreters, we should only DECREF when the
-        // original owning interpreter is active.
-        auto *cur_istate = get_interpreter_state_unchecked();
-        if (cur_istate && cur_istate == istate) {
-            Py_CLEAR(function_record_py_type);
-        }
-    }
 };
 
 enum class holder_enum_t : uint8_t {
@@ -576,6 +549,10 @@
 }
 #endif
 
+// Sentinel value for the `dtor` parameter of `atomic_get_or_create_in_state_dict`.
+// Indicates no destructor was explicitly provided (distinct from nullptr, which means "leak").
+#define PYBIND11_DTOR_USE_DELETE (reinterpret_cast<void (*)(PyObject *)>(1))
+
 // Get or create per-storage capsule in the current interpreter's state dict.
 //   - The storage is interpreter-dependent: different interpreters will have different storage.
 //     This is important when using multiple-interpreters, to avoid sharing unshareable objects
@@ -592,9 +569,14 @@
 //
 // Returns: pair of (pointer to storage, bool indicating if newly created).
 //          The bool follows std::map::insert convention: true = created, false = existed.
+// `dtor`: optional destructor called when the interpreter shuts down.
+//   - If not provided: the storage will be deleted using `delete`.
+//   - If nullptr: the storage will be leaked (useful for singletons that outlive the interpreter).
+//   - If a function: that function will be called with the capsule object.
 template <typename Payload>
 std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
-                                                              void (*dtor)(PyObject *) = nullptr) {
+                                                              void (*dtor)(PyObject *)
+                                                              = PYBIND11_DTOR_USE_DELETE) {
     error_scope err_scope; // preserve any existing Python error states
 
     auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
@@ -640,7 +622,7 @@
         // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state
         //   dict will incref it.  We need to set the caller's destructor on it, which will be
         //   called when the interpreter shuts down.
-        if (created && dtor) {
+        if (created && dtor != PYBIND11_DTOR_USE_DELETE) {
             if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) {
                 throw error_already_set();
             }
@@ -657,6 +639,8 @@
     return std::pair<Payload *, bool>(static_cast<Payload *>(raw_ptr), created);
 }
 
+#undef PYBIND11_DTOR_USE_DELETE
+
 template <typename InternalsType>
 class internals_pp_manager {
 public:
@@ -713,42 +697,75 @@
             // this could be called without an active interpreter, just use what was cached
             if (!tstate || tstate->interp == last_istate_tls()) {
                 auto tpp = internals_p_tls();
-
-                delete tpp;
+                {
+                    std::lock_guard<std::mutex> lock(pp_set_mutex_);
+                    pps_have_created_content_.erase(tpp); // untrack deleted pp
+                }
+                delete tpp; // may call back into Python
             }
             unref();
             return;
         }
 #endif
-        delete internals_singleton_pp_;
+        {
+            std::lock_guard<std::mutex> lock(pp_set_mutex_);
+            pps_have_created_content_.erase(internals_singleton_pp_); // untrack deleted pp
+        }
+        delete internals_singleton_pp_; // may call back into Python
         unref();
     }
 
+    void create_pp_content_once(std::unique_ptr<InternalsType> *const pp) {
+        // Assume the GIL is held here. May call back into Python. We cannot hold the lock with our
+        // mutex here. So there may be multiple threads creating the content at the same time. Only
+        // one will install its content to pp below. Others will be freed when going out of scope.
+        auto tmp = std::unique_ptr<InternalsType>(new InternalsType());
+
+        {
+            // Lock scope must not include Python calls, which may require the GIL and cause
+            // deadlocks.
+            std::lock_guard<std::mutex> lock(pp_set_mutex_);
+
+            if (*pp) {
+                // Already created in another thread.
+                return;
+            }
+
+            // At this point, pp->get() is nullptr.
+            // The content is either not yet created, or was previously destroyed via pp->reset().
+
+            // Detect re-creation of internals after destruction during interpreter shutdown.
+            // If pybind11 code (e.g., tp_traverse/tp_clear calling py::cast) runs after internals
+            // have been destroyed, a new empty internals would be created, causing type lookup
+            // failures. See also get_or_create_pp_in_state_dict() comments.
+            if (pps_have_created_content_.find(pp) != pps_have_created_content_.end()) {
+                pybind11_fail(
+                    "pybind11::detail::internals_pp_manager::create_pp_content_once() "
+                    "FAILED: reentrant call detected while fetching pybind11 internals!");
+            }
+
+            // Each interpreter can only create its internals once.
+            pps_have_created_content_.insert(pp);
+            // Install the created content.
+            pp->swap(tmp);
+        }
+    }
+
 private:
     internals_pp_manager(char const *id, on_fetch_function *on_fetch)
         : holder_id_(id), on_fetch_(on_fetch) {}
 
-    static void internals_shutdown(PyObject *capsule) {
-        auto *pp = static_cast<std::unique_ptr<InternalsType> *>(
-            PyCapsule_GetPointer(capsule, nullptr));
-        if (pp) {
-            pp->reset();
-        }
-        // We reset the unique_ptr's contents but cannot delete the unique_ptr itself here.
-        // The pp_manager in this module (and possibly other modules sharing internals) holds
-        // a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now.
-        //
-        // For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is
-        // called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the
-        // unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension
-        // loaded into an external interpreter), destroy() is never called and the unique_ptr
-        // shell (8 bytes, not its contents) is leaked.
-        // (See PR #5958 for ideas to eliminate this leak.)
-    }
-
     std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict() {
+        // The `unique_ptr<InternalsType>` is intentionally leaked on interpreter shutdown.
+        // Once an instance is created, it will never be deleted until the process exits (compare
+        // to interpreter shutdown in multiple-interpreter scenarios).
+        // We cannot guarantee the destruction order of capsules in the interpreter state dict on
+        // interpreter shutdown, so deleting internals too early could cause undefined behavior
+        // when other pybind11 objects access `get_internals()` during finalization (which would
+        // recreate empty internals). See also create_pp_content_once() above.
+        // See https://github.com/pybind/pybind11/pull/5958#discussion_r2717645230.
         auto result = atomic_get_or_create_in_state_dict<std::unique_ptr<InternalsType>>(
-            holder_id_, &internals_shutdown);
+            holder_id_, /*dtor=*/nullptr /* leak the capsule content */);
         auto *pp = result.first;
         bool created = result.second;
         // Only call on_fetch_ when fetching existing internals, not when creating new ones.
@@ -774,7 +791,12 @@
     on_fetch_function *on_fetch_ = nullptr;
     // Pointer-to-pointer to the singleton internals for the first seen interpreter (may not be the
     // main interpreter)
-    std::unique_ptr<InternalsType> *internals_singleton_pp_;
+    std::unique_ptr<InternalsType> *internals_singleton_pp_ = nullptr;
+
+    // Track pointer-to-pointers whose internals have been created, to detect re-entrancy.
+    // Use instance member over static due to singleton pattern of this class.
+    std::unordered_set<std::unique_ptr<InternalsType> *> pps_have_created_content_;
+    std::mutex pp_set_mutex_;
 };
 
 // If We loaded the internals through `state_dict`, our `error_already_set`
@@ -815,7 +837,8 @@
         // Slow path, something needs fetched from the state dict or created
         gil_scoped_acquire_simple gil;
         error_scope err_scope;
-        internals_ptr.reset(new internals());
+
+        ppmgr.create_pp_content_once(&internals_ptr);
 
         if (!internals_ptr->instance_base) {
             // This calls get_internals, so cannot be called from within the internals constructor
@@ -826,6 +849,31 @@
     return *internals_ptr;
 }
 
+/// Return the PyObject* for the internals capsule (borrowed reference).
+/// Returns nullptr if the capsule doesn't exist yet.
+inline PyObject *get_internals_capsule() {
+    auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
+    return dict_getitemstring(state_dict.ptr(), PYBIND11_INTERNALS_ID);
+}
+
+/// Return the key used for local_internals in the state dict.
+/// This function ensures a consistent key is used across all call sites within the same
+/// compilation unit. The key includes the address of a static variable to make it unique per
+/// module (DSO), matching the behavior of get_local_internals_pp_manager().
+inline const std::string &get_local_internals_key() {
+    static const std::string key
+        = PYBIND11_MODULE_LOCAL_ID + std::to_string(reinterpret_cast<uintptr_t>(&key));
+    return key;
+}
+
+/// Return the PyObject* for the local_internals capsule (borrowed reference).
+/// Returns nullptr if the capsule doesn't exist yet.
+inline PyObject *get_local_internals_capsule() {
+    const auto &key = get_local_internals_key();
+    auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
+    return dict_getitemstring(state_dict.ptr(), key.c_str());
+}
+
 inline void ensure_internals() {
     pybind11::detail::get_internals_pp_manager().unref();
 #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
@@ -837,12 +885,10 @@
 }
 
 inline internals_pp_manager<local_internals> &get_local_internals_pp_manager() {
-    // Use the address of this static itself as part of the key, so that the value is uniquely tied
+    // Use the address of a static variable as part of the key, so that the value is uniquely tied
     // to where the module is loaded in memory
-    static const std::string this_module_idstr
-        = PYBIND11_MODULE_LOCAL_ID
-          + std::to_string(reinterpret_cast<uintptr_t>(&this_module_idstr));
-    return internals_pp_manager<local_internals>::get_instance(this_module_idstr.c_str(), nullptr);
+    return internals_pp_manager<local_internals>::get_instance(get_local_internals_key().c_str(),
+                                                               nullptr);
 }
 
 /// Works like `get_internals`, but for things which are locally registered.
@@ -850,7 +896,10 @@
     auto &ppmgr = get_local_internals_pp_manager();
     auto &internals_ptr = *ppmgr.get_pp();
     if (!internals_ptr) {
-        internals_ptr.reset(new local_internals());
+        gil_scoped_acquire_simple gil;
+        error_scope err_scope;
+
+        ppmgr.create_pp_content_once(&internals_ptr);
     }
     return *internals_ptr;
 }
diff --git a/tests/env.py b/tests/env.py
index ee932ad..8c06178 100644
--- a/tests/env.py
+++ b/tests/env.py
@@ -29,3 +29,31 @@
     or GRAALPY
     or (CPYTHON and PY_GIL_DISABLED and (3, 13) <= sys.version_info < (3, 14))
 )
+
+
+def check_script_success_in_subprocess(code: str, *, rerun: int = 8) -> None:
+    """Runs the given code in a subprocess."""
+    import os
+    import subprocess
+    import sys
+    import textwrap
+
+    code = textwrap.dedent(code).strip()
+    try:
+        for _ in range(rerun):  # run flakily failing test multiple times
+            subprocess.check_output(
+                [sys.executable, "-c", code],
+                cwd=os.getcwd(),
+                stderr=subprocess.STDOUT,
+                text=True,
+            )
+    except subprocess.CalledProcessError as ex:
+        raise RuntimeError(
+            f"Subprocess failed with exit code {ex.returncode}.\n\n"
+            f"Code:\n"
+            f"```python\n"
+            f"{code}\n"
+            f"```\n\n"
+            f"Output:\n"
+            f"{ex.output}"
+        ) from None
diff --git a/tests/test_custom_type_setup.cpp b/tests/test_custom_type_setup.cpp
index 35d30ab..15516b2 100644
--- a/tests/test_custom_type_setup.cpp
+++ b/tests/test_custom_type_setup.cpp
@@ -7,22 +7,67 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
+#include <pybind11/detail/internals.h>
 #include <pybind11/pybind11.h>
 
 #include "pybind11_tests.h"
 
+#include <vector>
+
 namespace py = pybind11;
 
 namespace {
+struct ContainerOwnsPythonObjects {
+    std::vector<py::object> list;
 
-struct OwnsPythonObjects {
-    py::object value = py::none();
+    void append(const py::object &obj) { list.emplace_back(obj); }
+    py::object at(py::ssize_t index) const {
+        if (index >= size() || index < 0) {
+            throw py::index_error("Index out of range");
+        }
+        return list.at(py::size_t(index));
+    }
+    py::ssize_t size() const { return py::ssize_t_cast(list.size()); }
+    void clear() { list.clear(); }
 };
+
+void add_gc_checkers_with_weakrefs(const py::object &obj) {
+    py::handle global_capsule = py::detail::get_internals_capsule();
+    if (!global_capsule) {
+        throw std::runtime_error("No global internals capsule found");
+    }
+    (void) py::weakref(obj, py::cpp_function([global_capsule](py::handle weakref) -> void {
+                           py::handle current_global_capsule = py::detail::get_internals_capsule();
+                           if (!current_global_capsule.is(global_capsule)) {
+                               throw std::runtime_error(
+                                   "Global internals capsule was destroyed prematurely");
+                           }
+                           weakref.dec_ref();
+                       }))
+        .release();
+
+    py::handle local_capsule = py::detail::get_local_internals_capsule();
+    if (!local_capsule) {
+        throw std::runtime_error("No local internals capsule found");
+    }
+    (void) py::weakref(
+        obj, py::cpp_function([local_capsule](py::handle weakref) -> void {
+            py::handle current_local_capsule = py::detail::get_local_internals_capsule();
+            if (!current_local_capsule.is(local_capsule)) {
+                throw std::runtime_error("Local internals capsule was destroyed prematurely");
+            }
+            weakref.dec_ref();
+        }))
+        .release();
+}
 } // namespace
 
 TEST_SUBMODULE(custom_type_setup, m) {
-    py::class_<OwnsPythonObjects> cls(
-        m, "OwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) {
+    py::class_<ContainerOwnsPythonObjects> cls(
+        m,
+        "ContainerOwnsPythonObjects",
+        // Please review/update docs/advanced/classes.rst after making changes here.
+        py::custom_type_setup([](PyHeapTypeObject *heap_type) {
             auto *type = &heap_type->ht_type;
             type->tp_flags |= Py_TPFLAGS_HAVE_GC;
             type->tp_traverse = [](PyObject *self_base, visitproc visit, void *arg) {
@@ -31,19 +76,29 @@
                 Py_VISIT(Py_TYPE(self_base));
 #endif
                 if (py::detail::is_holder_constructed(self_base)) {
-                    auto &self = py::cast<OwnsPythonObjects &>(py::handle(self_base));
-                    Py_VISIT(self.value.ptr());
+                    auto &self = py::cast<ContainerOwnsPythonObjects &>(py::handle(self_base));
+                    for (auto &item : self.list) {
+                        Py_VISIT(item.ptr());
+                    }
                 }
                 return 0;
             };
             type->tp_clear = [](PyObject *self_base) {
                 if (py::detail::is_holder_constructed(self_base)) {
-                    auto &self = py::cast<OwnsPythonObjects &>(py::handle(self_base));
-                    self.value = py::none();
+                    auto &self = py::cast<ContainerOwnsPythonObjects &>(py::handle(self_base));
+                    for (auto &item : self.list) {
+                        Py_CLEAR(item.ptr());
+                    }
+                    self.list.clear();
                 }
                 return 0;
             };
         }));
     cls.def(py::init<>());
-    cls.def_readwrite("value", &OwnsPythonObjects::value);
+    cls.def("append", &ContainerOwnsPythonObjects::append);
+    cls.def("at", &ContainerOwnsPythonObjects::at);
+    cls.def("size", &ContainerOwnsPythonObjects::size);
+    cls.def("clear", &ContainerOwnsPythonObjects::clear);
+
+    m.def("add_gc_checkers_with_weakrefs", &add_gc_checkers_with_weakrefs);
 }
diff --git a/tests/test_custom_type_setup.py b/tests/test_custom_type_setup.py
index bb2865c..4c6b951 100644
--- a/tests/test_custom_type_setup.py
+++ b/tests/test_custom_type_setup.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
 import gc
+import os
+import sys
 import weakref
 
 import pytest
 
-import env  # noqa: F401
+import env
+import pybind11_tests
 from pybind11_tests import custom_type_setup as m
 
 
@@ -36,15 +39,46 @@
 # PyPy does not seem to reliably garbage collect.
 @pytest.mark.skipif("env.PYPY or env.GRAALPY")
 def test_self_cycle(gc_tester):
-    obj = m.OwnsPythonObjects()
-    obj.value = obj
+    obj = m.ContainerOwnsPythonObjects()
+    obj.append(obj)
     gc_tester(obj)
 
 
 # PyPy does not seem to reliably garbage collect.
 @pytest.mark.skipif("env.PYPY or env.GRAALPY")
 def test_indirect_cycle(gc_tester):
-    obj = m.OwnsPythonObjects()
-    obj_list = [obj]
-    obj.value = obj_list
+    obj = m.ContainerOwnsPythonObjects()
+    obj.append([obj])
     gc_tester(obj)
+
+
+@pytest.mark.skipif(
+    env.IOS or sys.platform.startswith("emscripten"),
+    reason="Requires subprocess support",
+)
+@pytest.mark.skipif("env.PYPY or env.GRAALPY")
+def test_py_cast_useable_on_shutdown():
+    """Test that py::cast works during interpreter shutdown.
+
+    See PR #5972 and https://github.com/pybind/pybind11/pull/5958#discussion_r2717645230.
+    """
+    env.check_script_success_in_subprocess(
+        f"""
+        import sys
+
+        sys.path.insert(0, {os.path.dirname(env.__file__)!r})
+        sys.path.insert(0, {os.path.dirname(pybind11_tests.__file__)!r})
+
+        from pybind11_tests import custom_type_setup as m
+
+        # Create a self-referential cycle that will be collected during shutdown.
+        # The tp_traverse and tp_clear callbacks call py::cast, which requires
+        # internals to still be valid.
+        obj = m.ContainerOwnsPythonObjects()
+        obj.append(obj)
+
+        # Add weakref callbacks that verify the capsule is still alive when the
+        # pybind11 object is garbage collected during shutdown.
+        m.add_gc_checkers_with_weakrefs(obj)
+        """
+    )
diff --git a/tests/test_multiple_interpreters.py b/tests/test_multiple_interpreters.py
index 44877e7..56d303a 100644
--- a/tests/test_multiple_interpreters.py
+++ b/tests/test_multiple_interpreters.py
@@ -3,7 +3,6 @@
 import contextlib
 import os
 import pickle
-import subprocess
 import sys
 import textwrap
 
@@ -219,6 +218,7 @@
     def test():
         import sys
 
+        sys.path.insert(0, {os.path.dirname(env.__file__)!r})
         sys.path.insert(0, {os.path.dirname(pybind11_tests.__file__)!r})
 
         import collections
@@ -269,36 +269,13 @@
         interp.exec(code)
 
 
-def check_script_success_in_subprocess(code: str, *, rerun: int = 8) -> None:
-    """Runs the given code in a subprocess."""
-    code = textwrap.dedent(code).strip()
-    try:
-        for _ in range(rerun):  # run flakily failing test multiple times
-            subprocess.check_output(
-                [sys.executable, "-c", code],
-                cwd=os.getcwd(),
-                stderr=subprocess.STDOUT,
-                text=True,
-            )
-    except subprocess.CalledProcessError as ex:
-        raise RuntimeError(
-            f"Subprocess failed with exit code {ex.returncode}.\n\n"
-            f"Code:\n"
-            f"```python\n"
-            f"{code}\n"
-            f"```\n\n"
-            f"Output:\n"
-            f"{ex.output}"
-        ) from None
-
-
 @pytest.mark.skipif(
     sys.platform.startswith("emscripten"), reason="Requires loadable modules"
 )
 @pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+")
 def test_import_in_subinterpreter_after_main():
     """Tests that importing a module in a subinterpreter after the main interpreter works correctly"""
-    check_script_success_in_subprocess(
+    env.check_script_success_in_subprocess(
         PREAMBLE_CODE
         + textwrap.dedent(
             """
@@ -319,7 +296,7 @@
         )
     )
 
-    check_script_success_in_subprocess(
+    env.check_script_success_in_subprocess(
         PREAMBLE_CODE
         + textwrap.dedent(
             """
@@ -354,7 +331,7 @@
 @pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+")
 def test_import_in_subinterpreter_before_main():
     """Tests that importing a module in a subinterpreter before the main interpreter works correctly"""
-    check_script_success_in_subprocess(
+    env.check_script_success_in_subprocess(
         PREAMBLE_CODE
         + textwrap.dedent(
             """
@@ -375,7 +352,7 @@
         )
     )
 
-    check_script_success_in_subprocess(
+    env.check_script_success_in_subprocess(
         PREAMBLE_CODE
         + textwrap.dedent(
             """
@@ -401,7 +378,7 @@
         )
     )
 
-    check_script_success_in_subprocess(
+    env.check_script_success_in_subprocess(
         PREAMBLE_CODE
         + textwrap.dedent(
             """
@@ -434,7 +411,7 @@
 @pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+")
 def test_import_in_subinterpreter_concurrently():
     """Tests that importing a module in multiple subinterpreters concurrently works correctly"""
-    check_script_success_in_subprocess(
+    env.check_script_success_in_subprocess(
         PREAMBLE_CODE
         + textwrap.dedent(
             """