| /* |
| * Copyright (c) 2021 BayLibre SAS |
| * Written by: Nicolas Pitre |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| */ |
| |
| #include <zephyr/kernel.h> |
| #include <zephyr/kernel_structs.h> |
| #include <kernel_arch_interface.h> |
| #include <zephyr/arch/cpu.h> |
| |
| /* to be found in fpu.S */ |
| extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context); |
| extern void z_arm64_fpu_restore(struct z_arm64_fp_context *saved_fp_context); |
| |
| #define FPU_DEBUG 0 |
| |
| #if FPU_DEBUG |
| |
| /* |
| * Debug traces have to be produced without printk() or any other functions |
| * using a va_list as va_start() always copy the FPU registers that could be |
| * used to pass float arguments, and that triggers an FPU access trap. |
| */ |
| |
| #include <string.h> |
| |
| static void DBG(char *msg, struct k_thread *th) |
| { |
| char buf[80], *p; |
| unsigned int v; |
| |
| strcpy(buf, "CPU# exc# "); |
| buf[3] = '0' + _current_cpu->id; |
| buf[8] = '0' + arch_exception_depth(); |
| strcat(buf, _current->name); |
| strcat(buf, ": "); |
| strcat(buf, msg); |
| strcat(buf, " "); |
| strcat(buf, th->name); |
| |
| |
| v = *(unsigned char *)&th->arch.saved_fp_context; |
| p = buf + strlen(buf); |
| *p++ = ' '; |
| *p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a'); |
| *p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a'); |
| *p++ = '\n'; |
| *p = 0; |
| |
| k_str_out(buf, p - buf); |
| } |
| |
| #else |
| |
| static inline void DBG(char *msg, struct k_thread *t) { } |
| |
| #endif /* FPU_DEBUG */ |
| |
| /* |
| * Flush FPU content and disable access. |
| * This is called locally and also from flush_fpu_ipi_handler(). |
| */ |
| void z_arm64_flush_local_fpu(void) |
| { |
| __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); |
| |
| struct k_thread *owner = _current_cpu->arch.fpu_owner; |
| |
| if (owner != NULL) { |
| uint64_t cpacr = read_cpacr_el1(); |
| |
| /* turn on FPU access */ |
| write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP); |
| isb(); |
| |
| /* save current owner's content */ |
| z_arm64_fpu_save(&owner->arch.saved_fp_context); |
| /* make sure content made it to memory before releasing */ |
| dsb(); |
| /* release ownership */ |
| _current_cpu->arch.fpu_owner = NULL; |
| DBG("disable", owner); |
| |
| /* disable FPU access */ |
| write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); |
| } |
| } |
| |
| #ifdef CONFIG_SMP |
| static void flush_owned_fpu(struct k_thread *thread) |
| { |
| __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); |
| |
| int i; |
| |
| /* search all CPUs for the owner we want */ |
| for (i = 0; i < CONFIG_MP_NUM_CPUS; i++) { |
| if (_kernel.cpus[i].arch.fpu_owner != thread) { |
| continue; |
| } |
| /* we found it live on CPU i */ |
| if (i == _current_cpu->id) { |
| z_arm64_flush_local_fpu(); |
| } else { |
| /* the FPU context is live on another CPU */ |
| z_arm64_flush_fpu_ipi(i); |
| |
| /* |
| * Wait for it only if this is about the thread |
| * currently running on this CPU. Otherwise the |
| * other CPU running some other thread could regain |
| * ownership the moment it is removed from it and |
| * we would be stuck here. |
| * |
| * Also, if this is for the thread running on this |
| * CPU, then we preemptively flush any live context |
| * on this CPU as well since we're likely to |
| * replace it, and this avoids a deadlock where |
| * two CPUs want to pull each other's FPU context. |
| */ |
| if (thread == _current) { |
| z_arm64_flush_local_fpu(); |
| while (_kernel.cpus[i].arch.fpu_owner == thread) { |
| dsb(); |
| } |
| } |
| } |
| break; |
| } |
| } |
| #endif |
| |
| void z_arm64_fpu_enter_exc(void) |
| { |
| __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); |
| |
| /* always deny FPU access whenever an exception is entered */ |
| write_cpacr_el1(read_cpacr_el1() & ~CPACR_EL1_FPEN_NOTRAP); |
| isb(); |
| } |
| |
| /* |
| * Simulate some FPU store instructions. |
| * |
| * In many cases, the FPU trap is triggered by va_start() that copies |
| * the content of FP registers used for floating point argument passing |
| * into the va_list object in case there were actual float arguments from |
| * the caller. In practice this is almost never the case, especially if |
| * FPU access is disabled and we're trapped while in exception context. |
| * Rather than flushing the FPU context to its owner and enabling access |
| * just to let the corresponding STR instructions execute, we simply |
| * simulate them and leave the FPU access disabled. This also avoids the |
| * need for disabling interrupts in syscalls and IRQ handlers as well. |
| */ |
| static bool simulate_str_q_insn(z_arch_esf_t *esf) |
| { |
| /* |
| * Support only the "FP in exception" cases for now. |
| * We know there is no saved FPU context to check nor any |
| * userspace stack memory to validate in that case. |
| */ |
| if (arch_exception_depth() <= 1) { |
| return false; |
| } |
| |
| uint32_t *pc = (uint32_t *)esf->elr; |
| /* The original (interrupted) sp is the top of the esf structure */ |
| uintptr_t sp = (uintptr_t)esf + sizeof(*esf); |
| |
| for (;;) { |
| uint32_t insn = *pc; |
| |
| /* |
| * We're looking for STR (immediate, SIMD&FP) of the form: |
| * |
| * STR Q<n>, [SP, #<pimm>] |
| * |
| * where 0 <= <n> <= 7 and <pimm> is a 12-bits multiple of 16. |
| */ |
| if ((insn & 0xffc003f8) != 0x3d8003e0) |
| break; |
| |
| uint32_t pimm = (insn >> 10) & 0xfff; |
| |
| /* Zero the location as the above STR would have done */ |
| *(__int128 *)(sp + pimm * 16) = 0; |
| |
| /* move to the next instruction */ |
| pc++; |
| } |
| |
| /* did we do something? */ |
| if (pc != (uint32_t *)esf->elr) { |
| /* resume execution past the simulated instructions */ |
| esf->elr = (uintptr_t)pc; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* |
| * Process the FPU trap. |
| * |
| * This usually means that FP regs belong to another thread. Save them |
| * to that thread's save area and restore the current thread's content. |
| * |
| * We also get here when FP regs are used while in exception as FP access |
| * is always disabled by default in that case. If so we save the FPU content |
| * to the owning thread and simply enable FPU access. Exceptions should be |
| * short and don't have persistent register contexts when they're done so |
| * there is nothing to save/restore for that context... as long as we |
| * don't get interrupted that is. To ensure that we mask interrupts to |
| * the triggering exception context. |
| */ |
| void z_arm64_fpu_trap(z_arch_esf_t *esf) |
| { |
| __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); |
| |
| /* check if a quick simulation can do it */ |
| if (simulate_str_q_insn(esf)) { |
| return; |
| } |
| |
| /* turn on FPU access */ |
| write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP); |
| isb(); |
| |
| /* save current owner's content if any */ |
| struct k_thread *owner = _current_cpu->arch.fpu_owner; |
| |
| if (owner) { |
| z_arm64_fpu_save(&owner->arch.saved_fp_context); |
| dsb(); |
| _current_cpu->arch.fpu_owner = NULL; |
| DBG("save", owner); |
| } |
| |
| if (arch_exception_depth() > 1) { |
| /* |
| * We were already in exception when the FPU access trap. |
| * We give it access and prevent any further IRQ recursion |
| * by disabling IRQs as we wouldn't be able to preserve the |
| * interrupted exception's FPU context. |
| */ |
| esf->spsr |= DAIF_IRQ_BIT; |
| return; |
| } |
| |
| #ifdef CONFIG_SMP |
| /* |
| * Make sure the FPU context we need isn't live on another CPU. |
| * The current CPU's FPU context is NULL at this point. |
| */ |
| flush_owned_fpu(_current); |
| #endif |
| |
| /* become new owner */ |
| _current_cpu->arch.fpu_owner = _current; |
| |
| /* restore our content */ |
| z_arm64_fpu_restore(&_current->arch.saved_fp_context); |
| DBG("restore", _current); |
| } |
| |
| /* |
| * Perform lazy FPU context switching by simply granting or denying |
| * access to FP regs based on FPU ownership before leaving the last |
| * exception level in case of exceptions, or during a thread context |
| * switch with the exception level of the new thread being 0. |
| * If current thread doesn't own the FP regs then it will trap on its |
| * first access and then the actual FPU context switching will occur. |
| */ |
| static void fpu_access_update(unsigned int exc_update_level) |
| { |
| __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); |
| |
| uint64_t cpacr = read_cpacr_el1(); |
| |
| if (arch_exception_depth() == exc_update_level) { |
| /* We're about to execute non-exception code */ |
| if (_current_cpu->arch.fpu_owner == _current) { |
| /* turn on FPU access */ |
| write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP); |
| } else { |
| /* deny FPU access */ |
| write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); |
| } |
| } else { |
| /* |
| * Any new exception level should always trap on FPU |
| * access as we want to make sure IRQs are disabled before |
| * granting it access (see z_arm64_fpu_trap() documentation). |
| */ |
| write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); |
| } |
| } |
| |
| /* |
| * This is called on every exception exit except for z_arm64_fpu_trap(). |
| * In that case the exception level of interest is 1 (soon to be 0). |
| */ |
| void z_arm64_fpu_exit_exc(void) |
| { |
| fpu_access_update(1); |
| } |
| |
| /* |
| * This is called from z_arm64_context_switch(). FPU access may be granted |
| * only if exception level is 0. If we switch to a thread that is still in |
| * some exception context then FPU access would be re-evaluated at exception |
| * exit time via z_arm64_fpu_exit_exc(). |
| */ |
| void z_arm64_fpu_thread_context_switch(void) |
| { |
| fpu_access_update(0); |
| } |
| |
| int arch_float_disable(struct k_thread *thread) |
| { |
| if (thread != NULL) { |
| unsigned int key = arch_irq_lock(); |
| |
| #ifdef CONFIG_SMP |
| flush_owned_fpu(thread); |
| #else |
| if (thread == _current_cpu->arch.fpu_owner) { |
| z_arm64_flush_local_fpu(); |
| } |
| #endif |
| |
| arch_irq_unlock(key); |
| } |
| |
| return 0; |
| } |
| |
| int arch_float_enable(struct k_thread *thread, unsigned int options) |
| { |
| /* floats always gets enabled automatically at the moment */ |
| return 0; |
| } |