kernel: handle thread self-aborts on idle thread

Fixes races where threads on another CPU are joining the
exiting thread, since it could still be running when
the joiners wake up on a different CPU.

Fixes problems where the thread object is still being
used by the kernel when the fn_abort() function is called,
preventing the thread object from being recycled or
freed back to a slab pool.

Fixes a race where a thread is aborted from one CPU while
it self-aborts on another CPU, that was currently worked
around with a busy-wait.

Precedent for doing this comes from FreeRTOS, which also
performs final thread cleanup in the idle thread.

Some logic in z_thread_single_abort() rearranged such that
when we release sched_spinlock, the thread object pointer
is never dereferenced by the kernel again; join waiters
or fn_abort() logic may free it immediately.

An assertion added to z_thread_single_abort() to ensure
it never gets called with thread == _current outside of an ISR.

Some logic has been added to ensure z_thread_single_abort()
tasks don't run more than once.

Fixes: #26486
Related to: #23063 #23062

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
diff --git a/kernel/sched.c b/kernel/sched.c
index 0d1c12a..b9c7e3b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -182,7 +182,16 @@
 
 static ALWAYS_INLINE struct k_thread *next_up(void)
 {
-	struct k_thread *thread = _priq_run_best(&_kernel.ready_q.runq);
+	struct k_thread *thread;
+
+	/* If a thread self-aborted we need the idle thread to clean it up
+	 * before any other thread can run on this CPU
+	 */
+	if (_current_cpu->pending_abort != NULL) {
+		return _current_cpu->idle_thread;
+	}
+
+	thread = _priq_run_best(&_kernel.ready_q.runq);
 
 #if (CONFIG_NUM_METAIRQ_PRIORITIES > 0) && (CONFIG_NUM_COOP_PRIORITIES > 0)
 	/* MetaIRQs must always attempt to return back to a
@@ -366,7 +375,7 @@
 	    !is_preempt(_current)) {
 		/* Record new preemption */
 		_current_cpu->metairq_preempted = _current;
-	} else if (!is_metairq(thread)) {
+	} else if (!is_metairq(thread) && !z_is_idle_thread_object(thread)) {
 		/* Returning from existing preemption */
 		_current_cpu->metairq_preempted = NULL;
 	}
@@ -497,12 +506,25 @@
 
 void z_thread_single_abort(struct k_thread *thread)
 {
+	void (*fn_abort)(void) = NULL;
+
 	__ASSERT(!(thread->base.user_options & K_ESSENTIAL),
 		 "essential thread aborted");
+	__ASSERT(thread != _current || arch_is_in_isr(),
+		 "self-abort detected");
 
-	if (thread->fn_abort != NULL) {
-		thread->fn_abort();
+	/* Prevent any of the further logic in this function from running more
+	 * than once
+	 */
+	k_spinlock_key_t key = k_spin_lock(&sched_spinlock);
+	if ((thread->base.thread_state &
+	     (_THREAD_ABORTING | _THREAD_DEAD)) != 0) {
+		LOG_DBG("Thread %p already dead or on the way out", thread);
+		k_spin_unlock(&sched_spinlock, key);
+		return;
 	}
+	thread->base.thread_state |= _THREAD_ABORTING;
+	k_spin_unlock(&sched_spinlock, key);
 
 	(void)z_abort_thread_timeout(thread);
 
@@ -511,6 +533,7 @@
 	}
 
 	LOCKED(&sched_spinlock) {
+		LOG_DBG("Cleanup aborting thread %p", thread);
 		struct k_thread *waiter;
 
 		if (z_is_thread_ready(thread)) {
@@ -529,37 +552,6 @@
 			}
 		}
 
-		uint32_t mask = _THREAD_DEAD;
-
-		/* If the abort is happening in interrupt context,
-		 * that means that execution will never return to the
-		 * thread's stack and that the abort is known to be
-		 * complete.  Otherwise the thread still runs a bit
-		 * until it can swap, requiring a delay.
-		 */
-		if (IS_ENABLED(CONFIG_SMP) && arch_is_in_isr()) {
-			mask |= _THREAD_ABORTED_IN_ISR;
-		}
-
-		thread->base.thread_state |= mask;
-
-#ifdef CONFIG_USERSPACE
-		/* Clear initialized state so that this thread object may be
-		 * re-used and triggers errors if API calls are made on it from
-		 * user threads
-		 *
-		 * For a whole host of reasons this is not ideal and should be
-		 * iterated on.
-		 */
-		z_object_uninit(thread->stack_obj);
-		z_object_uninit(thread);
-
-		/* Revoke permissions on thread's ID so that it may be
-		 * recycled
-		 */
-		z_thread_perms_all_clear(thread);
-#endif
-
 		/* Wake everybody up who was trying to join with this thread.
 		 * A reschedule is invoked later by k_thread_abort().
 		 */
@@ -572,8 +564,49 @@
 			arch_thread_return_value_set(waiter, 0);
 			ready_thread(waiter);
 		}
+
+		if (z_is_idle_thread_object(_current)) {
+			update_cache(1);
+		}
+
+		thread->base.thread_state |= _THREAD_DEAD;
+
+		/* Read this here from the thread struct now instead of
+		 * after we unlock
+		 */
+		fn_abort = thread->fn_abort;
+
+		/* Keep inside the spinlock as these may use the contents
+		 * of the thread object. As soon as we release this spinlock,
+		 * the thread object could be destroyed at any time.
+		 */
 		sys_trace_thread_abort(thread);
 		z_thread_monitor_exit(thread);
+
+#ifdef CONFIG_USERSPACE
+		/* Revoke permissions on thread's ID so that it may be
+		 * recycled
+		 */
+		z_thread_perms_all_clear(thread);
+
+		/* Clear initialized state so that this thread object may be
+		 * re-used and triggers errors if API calls are made on it from
+		 * user threads
+		 */
+		z_object_uninit(thread->stack_obj);
+		z_object_uninit(thread);
+#endif
+		/* Kernel should never look at the thread object again past
+		 * this point unless another thread API is called. If the
+		 * object doesn't get corrupted, we'll catch other
+		 * k_thread_abort()s on this object, although this is
+		 * somewhat undefined behavoir. It must be safe to call
+		 * k_thread_create() or free the object at this point.
+		 */
+	}
+
+	if (fn_abort != NULL) {
+		fn_abort();
 	}
 }
 
@@ -1343,9 +1376,6 @@
 	 * it locally.  Not all architectures support that, alas.  If
 	 * we don't have it, we need to wait for some other interrupt.
 	 */
-	key = k_spin_lock(&sched_spinlock);
-	thread->base.thread_state |= _THREAD_ABORTING;
-	k_spin_unlock(&sched_spinlock, key);
 #ifdef CONFIG_SCHED_IPI_SUPPORTED
 	arch_sched_ipi();
 #endif
@@ -1369,16 +1399,6 @@
 			k_busy_wait(100);
 		}
 	}
-
-	/* If the thread self-aborted (e.g. its own exit raced with
-	 * this external abort) then even though it is flagged DEAD,
-	 * it's still running until its next swap and thus the thread
-	 * object is still in use.  We have to resort to a fallback
-	 * delay in that circumstance.
-	 */
-	if ((thread->base.thread_state & _THREAD_ABORTED_IN_ISR) == 0U) {
-		k_busy_wait(THREAD_ABORT_DELAY_US);
-	}
 }
 #endif