arm: AArch64: Add support for nested exception handlers

In the current implementation both SPSR and ELR registers are saved with
the callee-saved registers and restored by the context-switch routine.
To support nested IRQs we have to save those on the stack when entering
and exiting from an ISR.

Since the values are now carried on the stack we can now add those to
the ESF and the initial stack and take care to restore them for new
threads using the new thread wrapper routine.

Signed-off-by: Carlo Caione <ccaione@baylibre.com>
diff --git a/arch/arm/core/aarch64/irq_manage.c b/arch/arm/core/aarch64/irq_manage.c
index ce5aaae..8219d01 100644
--- a/arch/arm/core/aarch64/irq_manage.c
+++ b/arch/arm/core/aarch64/irq_manage.c
@@ -53,6 +53,17 @@
 }
 #endif /* !CONFIG_ARM_CUSTOM_INTERRUPT_CONTROLLER */
 
+#ifdef CONFIG_DYNAMIC_INTERRUPTS
+int arch_irq_connect_dynamic(unsigned int irq, unsigned int priority,
+			     void (*routine)(void *parameter), void *parameter,
+			     u32_t flags)
+{
+	z_isr_install(irq, routine, parameter);
+	z_arm64_irq_priority_set(irq, priority, flags);
+	return irq;
+}
+#endif
+
 void z_irq_spurious(void *unused)
 {
 	ARG_UNUSED(unused);
diff --git a/arch/arm/core/aarch64/isr_wrapper.S b/arch/arm/core/aarch64/isr_wrapper.S
index cb6d251..cf18d8b 100644
--- a/arch/arm/core/aarch64/isr_wrapper.S
+++ b/arch/arm/core/aarch64/isr_wrapper.S
@@ -54,12 +54,18 @@
 	stp x0, x1, [sp, #-16]!
 	lsl x0, x0, #4 /* table is 16-byte wide */
 
-	/* Call interrupt service routine */
+	/* Retrieve the interrupt service routine */
 	ldr	x1, =_sw_isr_table
 	add	x1, x1, x0
 	ldp	x0, x3, [x1] /* arg in x0, ISR in x3 */
 
+	/*
+	 * Call the ISR. Unmask and mask again the IRQs to support nested
+	 * exception handlers
+	 */
+	msr	daifclr, #(DAIFSET_IRQ)
 	blr	x3
+	msr	daifset, #(DAIFSET_IRQ)
 
 	/* Signal end-of-interrupt */
 	ldp x0, x1, [sp], #16
@@ -79,6 +85,9 @@
 	sub	x2, x2, #1
 	str	x2, [x1, #_kernel_offset_to_nested]
 
+	cmp	x2, #0
+	bne	exit
+
 	/* Check if we need to context switch */
 	ldr	x2, [x1, #_kernel_offset_to_current]
 	ldr	x3, [x1, #_kernel_offset_to_ready_q_cache]
diff --git a/arch/arm/core/aarch64/macro.inc b/arch/arm/core/aarch64/macro.inc
index 6140e0a..e54e445 100644
--- a/arch/arm/core/aarch64/macro.inc
+++ b/arch/arm/core/aarch64/macro.inc
@@ -48,6 +48,25 @@
 	stp	x14, x15, [sp, #-16]!
 	stp     x16, x17, [sp, #-16]!
 	stp     x18, x30, [sp, #-16]!
+
+	/*
+	 * Store SPSR_ELn and ELR_ELn. This is needed to support nested
+	 * exception handlers
+	 */
+	switch_el x3, 3f, 2f, 1f
+3:
+	mrs	x0, spsr_el3
+	mrs	x1, elr_el3
+	b	0f
+2:
+	mrs	x0, spsr_el2
+	mrs	x1, elr_el2
+	b	0f
+1:
+	mrs	x0, spsr_el1
+	mrs	x1, elr_el1
+0:
+	stp	x0, x1, [sp, #-16]!
 .endm
 
 /**
@@ -64,6 +83,24 @@
 
 .macro z_arm64_exit_exc
 	/*
+	 * Restore SPSR_ELn and ELR_ELn. This is needed to support nested
+	 * exception handlers
+	 */
+	ldp	x0, x1, [sp], #16
+	switch_el x3, 3f, 2f, 1f
+3:
+	msr	spsr_el3, x0
+	msr	elr_el3, x1
+	b	0f
+2:
+	msr	spsr_el2, x0
+	msr	elr_el2, x1
+	b	0f
+1:
+	msr	spsr_el1, x0
+	msr	elr_el1, x1
+0:
+	/*
 	 * In x30 we can have:
 	 *
 	 * - The address of irq_unlock() in swap.c when swapping in a thread
diff --git a/arch/arm/core/aarch64/swap_helper.S b/arch/arm/core/aarch64/swap_helper.S
index 9956d14..03803d0 100644
--- a/arch/arm/core/aarch64/swap_helper.S
+++ b/arch/arm/core/aarch64/swap_helper.S
@@ -46,7 +46,7 @@
 	ldr	x0, =_thread_offset_to_callee_saved
 	add	x0, x0, x2
 
-	/* Store rest of process context including x30, SPSR_ELn and ELR_ELn */
+	/* Store rest of process context including x30 */
 	stp	x19, x20, [x0], #16
 	stp	x21, x22, [x0], #16
 	stp	x23, x24, [x0], #16
@@ -54,21 +54,6 @@
 	stp	x27, x28, [x0], #16
 	stp	x29, x30, [x0], #16
 
-	switch_el x3, 3f, 2f, 1f
-3:
-	mrs	x4, spsr_el3
-	mrs	x5, elr_el3
-	b	0f
-2:
-	mrs	x4, spsr_el2
-	mrs	x5, elr_el2
-	b	0f
-1:
-	mrs	x4, spsr_el1
-	mrs	x5, elr_el1
-0:
-	stp	x4, x5, [x0], #16
-
 	/* Save the current SP */
 	mov	x6, sp
 	str	x6, [x0]
@@ -81,7 +66,7 @@
 	ldr	x0, =_thread_offset_to_callee_saved
 	add	x0, x0, x2
 
-	/* Restore x19-x29 plus x30, SPSR_ELn and ELR_ELn */
+	/* Restore x19-x29 plus x30 */
 	ldp	x19, x20, [x0], #16
 	ldp	x21, x22, [x0], #16
 	ldp	x23, x24, [x0], #16
@@ -89,21 +74,6 @@
 	ldp	x27, x28, [x0], #16
 	ldp	x29, x30, [x0], #16
 
-	ldp	x4, x5, [x0], #16
-
-	switch_el x3, 3f, 2f, 1f
-3:
-	msr	spsr_el3, x4
-	msr	elr_el3, x5
-	b	0f
-2:
-	msr	spsr_el2, x4
-	msr	elr_el2, x5
-	b	0f
-1:
-	msr	spsr_el1, x4
-	msr	elr_el1, x5
-0:
 	ldr	x6, [x0]
 	mov	sp, x6
 
@@ -137,6 +107,24 @@
 GTEXT(z_thread_entry_wrapper)
 SECTION_FUNC(TEXT, z_thread_entry_wrapper)
 	/*
+	 * Restore SPSR_ELn and ELR_ELn saved in the temporary stack by
+	 * arch_new_thread()
+	 */
+	ldp	x0, x1, [sp], #16
+	switch_el x3, 3f, 2f, 1f
+3:
+	msr	spsr_el3, x0
+	msr	elr_el3, x1
+	b	0f
+2:
+	msr	spsr_el2, x0
+	msr	elr_el2, x1
+	b	0f
+1:
+	msr	spsr_el1, x0
+	msr	elr_el1, x1
+0:
+	/*
 	 * z_thread_entry_wrapper is called for every new thread upon the return
 	 * of arch_swap() or ISR. Its address, as well as its input function
 	 * arguments thread_entry_t, void *, void *, void * are restored from
diff --git a/arch/arm/core/aarch64/thread.c b/arch/arm/core/aarch64/thread.c
index d9cc518..6ae647e 100644
--- a/arch/arm/core/aarch64/thread.c
+++ b/arch/arm/core/aarch64/thread.c
@@ -41,6 +41,25 @@
 
 void z_thread_entry_wrapper(k_thread_entry_t k, void *p1, void *p2, void *p3);
 
+struct init_stack_frame {
+	/* top of the stack / most recently pushed */
+
+	/* SPSL_ELn and ELR_ELn */
+	u64_t spsr;
+	u64_t elr;
+
+	/*
+	 * Used by z_thread_entry_wrapper. pulls these off the stack and
+	 * into argument registers before calling z_thread_entry()
+	 */
+	u64_t entry_point;
+	u64_t arg1;
+	u64_t arg2;
+	u64_t arg3;
+
+	/* least recently pushed */
+};
+
 void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
 		     size_t stackSize, k_thread_entry_t pEntry,
 		     void *parameter1, void *parameter2, void *parameter3,
@@ -48,19 +67,29 @@
 {
 	char *pStackMem = Z_THREAD_STACK_BUFFER(stack);
 	char *stackEnd;
-	struct __esf *pInitCtx;
+	struct init_stack_frame *pInitCtx;
 
 	stackEnd = pStackMem + stackSize;
 
 	z_new_thread_init(thread, pStackMem, stackSize, priority, options);
 
-	pInitCtx = (struct __esf *)(STACK_ROUND_DOWN(stackEnd -
-				    sizeof(struct __basic_sf)));
+	pInitCtx = (struct init_stack_frame *)(STACK_ROUND_DOWN(stackEnd -
+				    sizeof(struct init_stack_frame)));
 
-	pInitCtx->basic.regs[0] = (u64_t)pEntry;
-	pInitCtx->basic.regs[1] = (u64_t)parameter1;
-	pInitCtx->basic.regs[2] = (u64_t)parameter2;
-	pInitCtx->basic.regs[3] = (u64_t)parameter3;
+	pInitCtx->entry_point = (u64_t)pEntry;
+	pInitCtx->arg1 = (u64_t)parameter1;
+	pInitCtx->arg2 = (u64_t)parameter2;
+	pInitCtx->arg3 = (u64_t)parameter3;
+
+	/*
+	 * - ELR_ELn: to be used by eret in z_thread_entry_wrapper() to return
+	 *   to z_thread_entry() with pEntry in x0(entry_point) and the parameters
+	 *   already in place in x1(arg1), x2(arg2), x3(arg3).
+	 * - SPSR_ELn: to enable IRQs (we are masking debug exceptions, SError
+	 *   interrupts and FIQs).
+	 */
+	pInitCtx->elr = (u64_t)z_thread_entry;
+	pInitCtx->spsr = SPSR_MODE_EL1H | DAIF_FIQ;
 
 	/*
 	 * We are saving:
@@ -69,15 +98,8 @@
 	 *   z_thread_entry_wrapper().
 	 * - x30: to be used by ret in z_arm64_context_switch() when the new
 	 *   task is first scheduled.
-	 * - ELR_EL1: to be used by eret in z_thread_entry_wrapper() to return
-	 *   to z_thread_entry() with pEntry in x0 and the parameters already
-	 *   in place in x1, x2, x3.
-	 * - SPSR_EL1: to enable IRQs (we are masking debug exceptions, SError
-	 *   interrupts and FIQs).
 	 */
 
 	thread->callee_saved.sp = (u64_t)pInitCtx;
 	thread->callee_saved.x30 = (u64_t)z_thread_entry_wrapper;
-	thread->callee_saved.elr = (u64_t)z_thread_entry;
-	thread->callee_saved.spsr = SPSR_MODE_EL1H | DAIF_FIQ;
 }
diff --git a/boards/arm/qemu_cortex_a53/qemu_cortex_a53.yaml b/boards/arm/qemu_cortex_a53/qemu_cortex_a53.yaml
index dd37fc3..2a9c6be 100644
--- a/boards/arm/qemu_cortex_a53/qemu_cortex_a53.yaml
+++ b/boards/arm/qemu_cortex_a53/qemu_cortex_a53.yaml
@@ -9,5 +9,3 @@
 ram: 128
 testing:
   default: true
-  ignore_tags:
-    - interrupt
diff --git a/include/arch/arm/aarch64/exc.h b/include/arch/arm/aarch64/exc.h
index 3d1f2d0..dfc1100 100644
--- a/include/arch/arm/aarch64/exc.h
+++ b/include/arch/arm/aarch64/exc.h
@@ -27,6 +27,8 @@
 struct __esf {
 	struct __basic_sf {
 		u64_t regs[20];
+		u64_t spsr;
+		u64_t elr;
 	} basic;
 };
 
diff --git a/include/arch/arm/aarch64/thread.h b/include/arch/arm/aarch64/thread.h
index 145d7f7..08e31ff 100644
--- a/include/arch/arm/aarch64/thread.h
+++ b/include/arch/arm/aarch64/thread.h
@@ -35,8 +35,6 @@
 	u64_t x28;
 	u64_t x29; /* FP */
 	u64_t x30; /* LR */
-	u64_t spsr;
-	u64_t elr;
 	u64_t sp;
 };