kernel: Add CONFIG_SCHED_IPI_CASCADE

When this new Kconfig option is enabled, preempted threads on an SMP
system may generate additional IPIs when they are switched out.

Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
diff --git a/kernel/Kconfig.smp b/kernel/Kconfig.smp
index da83d16..624d13c 100644
--- a/kernel/Kconfig.smp
+++ b/kernel/Kconfig.smp
@@ -63,6 +63,18 @@
 	  thread to take an interrupt, which can be arbitrarily far in the
 	  future).
 
+config SCHED_IPI_CASCADE
+	bool "Use cascading IPIs to correct localized scheduling"
+	depends on SCHED_CPU_MASK && !SCHED_CPU_MASK_PIN_ONLY
+	default n
+	help
+	  Threads that are preempted by a local thread (a thread that is
+	  restricted by its CPU mask to execute on a subset of all CPUs) may
+	  trigger additional IPIs when the preempted thread is of higher
+	  priority than a currently executing thread on another CPU. Although
+	  these cascading IPIs will ensure that the system will settle upon a
+	  valid set of high priority threads, it comes at a performance cost.
+
 config TRACE_SCHED_IPI
 	bool "Test IPI"
 	help
diff --git a/kernel/sched.c b/kernel/sched.c
index 8cd401b..501b273 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -931,12 +931,15 @@
 		z_sched_usage_switch(new_thread);
 
 		if (old_thread != new_thread) {
+			uint8_t  cpu_id;
+
 			update_metairq_preempt(new_thread);
 			z_sched_switch_spin(new_thread);
 			arch_cohere_stacks(old_thread, interrupted, new_thread);
 
 			_current_cpu->swap_ok = 0;
-			new_thread->base.cpu = arch_curr_cpu()->id;
+			cpu_id = arch_curr_cpu()->id;
+			new_thread->base.cpu = cpu_id;
 			set_current(new_thread);
 
 #ifdef CONFIG_TIMESLICING
@@ -959,6 +962,12 @@
 			 * will not return into it.
 			 */
 			if (z_is_thread_queued(old_thread)) {
+#ifdef CONFIG_SCHED_IPI_CASCADE
+				if ((new_thread->base.cpu_mask != -1) &&
+				    (old_thread->base.cpu_mask != BIT(cpu_id))) {
+					flag_ipi(ipi_mask_create(old_thread));
+				}
+#endif
 				runq_add(old_thread);
 			}
 		}