kernel: Add new k_thread_abort()/k_thread_join()

Add a newer, much smaller and simpler implementation of abort and
join.  No need to involve the idle thread.  No need for a special code
path for self-abort.  Joining a thread and waiting for an aborting one
to terminate elsewhere share an implementation.  All work in both
calls happens under a single locked path with no unexpected
synchronization points.

This fixes a bug with the current implementation where the action of
z_sched_single_abort() was nonatomic, releasing the lock internally at
a point where the thread to be aborted could self-abort and confuse
the state such that it failed to abort at all.

Note that the arm32 and native_posix architectures, which have their
own thread abort implementations, now see a much simplified
"z_thread_abort()" internal API.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
diff --git a/kernel/include/kernel_internal.h b/kernel/include/kernel_internal.h
index 7ca6c9f..8ced654 100644
--- a/kernel/include/kernel_internal.h
+++ b/kernel/include/kernel_internal.h
@@ -151,7 +151,7 @@
 /* Memory domain setup hook, called from z_setup_new_thread() */
 void z_mem_domain_init_thread(struct k_thread *thread);
 
-/* Memory domain teardown hook, called from z_thread_single_abort() */
+/* Memory domain teardown hook, called from z_thread_abort() */
 void z_mem_domain_exit_thread(struct k_thread *thread);
 
 /* This spinlock:
diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h
index 23e48a5..8045429 100644
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@@ -64,10 +64,9 @@
 void z_sched_ipi(void);
 void z_sched_start(struct k_thread *thread);
 void z_ready_thread(struct k_thread *thread);
-void z_thread_single_abort(struct k_thread *thread);
-FUNC_NORETURN void z_self_abort(void);
 void z_requeue_current(struct k_thread *curr);
 struct k_thread *z_swap_next_thread(void);
+void z_thread_abort(struct k_thread *thread);
 
 static inline void z_pend_curr_unlocked(_wait_q_t *wait_q, k_timeout_t timeout)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 2fe36a1..7e30486 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -52,6 +52,7 @@
 struct k_spinlock sched_spinlock;
 
 static void update_cache(int);
+static void end_thread(struct k_thread *thread);
 
 static inline int is_preempt(struct k_thread *thread)
 {
@@ -216,6 +217,11 @@
 }
 #endif
 
+static inline bool is_aborting(struct k_thread *thread)
+{
+	return (thread->base.thread_state & _THREAD_ABORTING) != 0;
+}
+
 static ALWAYS_INLINE struct k_thread *next_up(void)
 {
 	struct k_thread *thread;
@@ -259,6 +265,10 @@
 	 * "ready", it means "is _current already added back to the
 	 * queue such that we don't want to re-add it".
 	 */
+	if (is_aborting(_current)) {
+		end_thread(_current);
+	}
+
 	int queued = z_is_thread_queued(_current);
 	int active = !z_is_thread_prevented_from_running(_current);
 
@@ -1427,6 +1437,123 @@
 
 #endif /* CONFIG_SCHED_CPU_MASK */
 
+static inline void unpend_all(_wait_q_t *wait_q)
+{
+	struct k_thread *thread;
+
+	while ((thread = z_waitq_head(wait_q)) != NULL) {
+		unpend_thread_no_timeout(thread);
+		(void)z_abort_thread_timeout(thread);
+		arch_thread_return_value_set(thread, 0);
+		ready_thread(thread);
+	}
+}
+
+static void end_thread(struct k_thread *thread)
+{
+	/* We hold the lock, and the thread is known not to be running
+	 * anywhere.
+	 */
+	if ((thread->base.thread_state & _THREAD_DEAD) == 0) {
+		thread->base.thread_state |= _THREAD_DEAD;
+		thread->base.thread_state &= ~_THREAD_ABORTING;
+		if (z_is_thread_queued(thread)) {
+			dequeue_thread(&_kernel.ready_q.runq, thread);
+		}
+		if (thread->base.pended_on != NULL) {
+			unpend_thread_no_timeout(thread);
+		}
+		(void)z_abort_thread_timeout(thread);
+		unpend_all(&thread->join_queue);
+		update_cache(1);
+
+		sys_trace_thread_abort(thread);
+		z_thread_monitor_exit(thread);
+
+#ifdef CONFIG_USERSPACE
+		z_mem_domain_exit_thread(thread);
+		z_thread_perms_all_clear(thread);
+		z_object_uninit(thread->stack_obj);
+		z_object_uninit(thread);
+#endif
+	}
+}
+
+void z_thread_abort(struct k_thread *thread)
+{
+	k_spinlock_key_t key = k_spin_lock(&sched_spinlock);
+
+	if (thread->base.thread_state & _THREAD_DEAD) {
+		k_spin_unlock(&sched_spinlock, key);
+		return;
+	}
+
+#ifdef CONFIG_SMP
+	if (is_aborting(thread) && thread == _current && arch_is_in_isr()) {
+		/* Another CPU is spinning for us, don't deadlock */
+		end_thread(thread);
+	}
+
+	bool active = thread_active_elsewhere(thread);
+
+	if (active) {
+		/* It's running somewhere else, flag and poke */
+		thread->base.thread_state |= _THREAD_ABORTING;
+		arch_sched_ipi();
+	}
+
+	if (is_aborting(thread) && thread != _current) {
+		if (arch_is_in_isr()) {
+			/* ISRs can only spin waiting another CPU */
+			k_spin_unlock(&sched_spinlock, key);
+			while (is_aborting(thread)) {
+			}
+		} else if (active) {
+			/* Threads can join */
+			add_to_waitq_locked(_current, &thread->join_queue);
+			z_swap(&sched_spinlock, key);
+		}
+		return; /* lock has been released */
+	}
+#endif
+	end_thread(thread);
+	if (thread == _current && !arch_is_in_isr()) {
+		z_swap(&sched_spinlock, key);
+		__ASSERT(false, "aborted _current back from dead");
+	}
+	k_spin_unlock(&sched_spinlock, key);
+}
+
+#if !defined(CONFIG_ARCH_HAS_THREAD_ABORT)
+void z_impl_k_thread_abort(struct k_thread *thread)
+{
+	z_thread_abort(thread);
+}
+#endif
+
+int z_impl_k_thread_join(struct k_thread *thread, k_timeout_t timeout)
+{
+	k_spinlock_key_t key = k_spin_lock(&sched_spinlock);
+	int ret = 0;
+
+	if (thread->base.thread_state & _THREAD_DEAD) {
+		ret = 0;
+	} else if (K_TIMEOUT_EQ(timeout, K_NO_WAIT)) {
+		ret = -EBUSY;
+	} else if (thread == _current ||
+		   thread->base.pended_on == &_current->join_queue) {
+		ret = -EDEADLK;
+	} else {
+		__ASSERT(!arch_is_in_isr(), "cannot join in ISR");
+		add_to_waitq_locked(_current, &thread->join_queue);
+		add_thread_timeout(_current, timeout);
+		return z_swap(&sched_spinlock, key);
+	}
+
+	k_spin_unlock(&sched_spinlock, key);
+	return ret;
+}
+
 #ifdef CONFIG_USERSPACE
 /* Special case: don't oops if the thread is uninitialized.  This is because
  * the initialization bit does double-duty for thread objects; if false, means
diff --git a/kernel/thread.c b/kernel/thread.c
index 8939f0b..989f17f 100644
--- a/kernel/thread.c
+++ b/kernel/thread.c
@@ -552,6 +552,7 @@
 	/* Any given thread has access to itself */
 	k_object_access_grant(new_thread, new_thread);
 #endif
+	z_waitq_init(&new_thread->join_queue);
 
 	/* Initialize various struct k_thread members */
 	z_init_thread_base(&new_thread->base, prio, _THREAD_PRESTART, options);