ARC: fix SMP race in ASM ARC interrupt handling code

In interrupt chandler code we don't save full current task context
on stack (we don't save callee regs) before z_get_next_switch_handle()
call, but we passing _current to it, so z_get_next_switch_handle
saves current task to switch_handle, which means that this CPU
current task can be picked by other CPU before we fully store it
context on this CPU.

Signed-off-by: Evgeniy Paltsev <PaltsevEvgeniy@gmail.com>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
diff --git a/arch/arc/core/fast_irq.S b/arch/arc/core/fast_irq.S
index 08d498a..aebf141 100644
--- a/arch/arc/core/fast_irq.S
+++ b/arch/arc/core/fast_irq.S
@@ -145,14 +145,13 @@
 
 	jne _firq_no_switch
 
-	/* sp is struct k_thread **old of z_arc_switch_in_isr
-	 * which is a wrapper of z_get_next_switch_handle.
-	 * r0 contains the 1st thread in ready queue. if
-	 * it equals _current(r2) ,then do swap, or no swap.
+	/* sp is struct k_thread **old of z_arc_switch_in_isr which is a wrapper of
+	 * z_get_next_switch_handle. r0 contains the 1st thread in ready queue. If it isn't NULL,
+	 * then do switch to this thread.
 	 */
 	_get_next_switch_handle
 
-	cmp r0, r2
+	CMPR r0, 0
 	bne _firq_switch
 
 	/* fall to no switch */
@@ -239,10 +238,10 @@
 	ld r2, [r1, -8]
 #endif
 	/* r2 is old thread */
-	_irq_store_old_thread_callee_regs
-
 	st _CAUSE_FIRQ, [r2, _thread_offset_to_relinquish_cause]
 
+	_irq_store_old_thread_callee_regs
+
 	/* mov new thread (r0) to r2 */
 
 	mov r2, r0
diff --git a/arch/arc/core/fault_s.S b/arch/arc/core/fault_s.S
index 30d5430..a99003a 100644
--- a/arch/arc/core/fault_s.S
+++ b/arch/arc/core/fault_s.S
@@ -119,7 +119,7 @@
 
 	_get_next_switch_handle
 
-	BREQR r0, r2, _exc_return_from_exc
+	BREQR r0, 0, _exc_return_from_exc
 
 	MOVR r2, r0
 
diff --git a/arch/arc/core/regular_irq.S b/arch/arc/core/regular_irq.S
index c0afb5e..9248716 100644
--- a/arch/arc/core/regular_irq.S
+++ b/arch/arc/core/regular_irq.S
@@ -239,14 +239,13 @@
 
 	jne _rirq_no_switch
 
-	/* sp is struct k_thread **old of z_arc_switch_in_isr
-	 * which is a wrapper of z_get_next_switch_handle.
-	 * r0 contains the 1st thread in ready queue. if
-	 * it equals _current(r2) ,then do swap, or no swap.
+	/* sp is struct k_thread **old of z_arc_switch_in_isr which is a wrapper of
+	 * z_get_next_switch_handle. r0 contains the 1st thread in ready queue. If it isn't NULL,
+	 * then do switch to this thread.
 	 */
 	_get_next_switch_handle
 
-	CMPR r0, r2
+	CMPR r0, 0
 	beq _rirq_no_switch
 
 #ifdef CONFIG_ARC_SECURE_FIRMWARE
@@ -255,11 +254,12 @@
 	push_s r3
 #endif
 
-	/* r2 is old thread */
-	_irq_store_old_thread_callee_regs
+	/* r2 is old thread
+	 * _thread_arch.relinquish_cause is 32 bit despite of platform bittnes
+	 */
+	_st32_huge_offset _CAUSE_RIRQ, r2, _thread_offset_to_relinquish_cause, r1
 
-	/* _thread_arch.relinquish_cause is 32 bit despite of platform bittnes */
-	_st32_huge_offset _CAUSE_RIRQ, r2, _thread_offset_to_relinquish_cause, r2
+	_irq_store_old_thread_callee_regs
 
 	/* mov new thread (r0) to r2 */
 	MOVR r2, r0
diff --git a/arch/arc/core/thread.c b/arch/arc/core/thread.c
index 1f075e6..6dc4a2e 100644
--- a/arch/arc/core/thread.c
+++ b/arch/arc/core/thread.c
@@ -198,7 +198,7 @@
 {
 	*old_thread =  _current;
 
-	return z_get_next_switch_handle(*old_thread);
+	return z_get_next_switch_handle(NULL);
 }
 
 #ifdef CONFIG_USERSPACE
diff --git a/arch/arc/include/swap_macros.h b/arch/arc/include/swap_macros.h
index 6dcc421..49751ce 100644
--- a/arch/arc/include/swap_macros.h
+++ b/arch/arc/include/swap_macros.h
@@ -412,12 +412,15 @@
 .macro _store_old_thread_callee_regs
 
 	_save_callee_saved_regs
-#ifdef CONFIG_SMP
-	/* save old thread into switch handle which is required by
-	 * wait_for_switch
+	/* Save old thread into switch handle which is required by wait_for_switch.
+	 * NOTE: we shouldn't save anything related to old thread context after this point!
+	 * TODO: we should add SMP write-after-write data memory barrier here, as we want all
+	 * previous writes completed before setting switch_handle which is polled by other cores
+	 * in wait_for_switch in case of SMP. Though it's not likely that this issue
+	 * will reproduce in real world as there is some gap before reading switch_handle and
+	 * reading rest of the data we've stored before.
 	 */
 	STR r2, r2, ___thread_t_switch_handle_OFFSET
-#endif
 .endm
 
 /* macro to store old thread call regs  in interrupt*/