| /* |
| * Copyright (c) 2019 Intel Corporation |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| */ |
| |
| #include <device.h> |
| #include <init.h> |
| #include <kernel.h> |
| #include <kernel_structs.h> |
| #include <toolchain.h> |
| #include <sys/__assert.h> |
| #include <sys/sys_io.h> |
| |
| #include <xtensa/config/core-isa.h> |
| |
| #include <logging/log.h> |
| LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL); |
| |
| #include <cavs-idc.h> |
| #include <soc.h> |
| #include <arch/xtensa/cache.h> |
| #include <adsp/io.h> |
| |
| #include <soc/shim.h> |
| |
| #include <drivers/ipm.h> |
| #include <ipm/ipm_cavs_idc.h> |
| |
| extern void z_sched_ipi(void); |
| extern void z_smp_start_cpu(int id); |
| extern void z_reinit_idle_thread(int i); |
| |
| /* ROM wake version parsed by ROM during core wake up. */ |
| #define IDC_ROM_WAKE_VERSION 0x2 |
| |
| /* IDC message type. */ |
| #define IDC_TYPE_SHIFT 24 |
| #define IDC_TYPE_MASK 0x7f |
| #define IDC_TYPE(x) (((x) & IDC_TYPE_MASK) << IDC_TYPE_SHIFT) |
| |
| /* IDC message header. */ |
| #define IDC_HEADER_MASK 0xffffff |
| #define IDC_HEADER(x) ((x) & IDC_HEADER_MASK) |
| |
| /* IDC message extension. */ |
| #define IDC_EXTENSION_MASK 0x3fffffff |
| #define IDC_EXTENSION(x) ((x) & IDC_EXTENSION_MASK) |
| |
| /* IDC power up message. */ |
| #define IDC_MSG_POWER_UP \ |
| (IDC_TYPE(0x1) | IDC_HEADER(IDC_ROM_WAKE_VERSION)) |
| |
| #define IDC_MSG_POWER_UP_EXT(x) IDC_EXTENSION((x) >> 2) |
| |
| struct cpustart_rec { |
| uint32_t cpu; |
| |
| arch_cpustart_t fn; |
| void *arg; |
| uint32_t vecbase; |
| |
| uint32_t alive; |
| }; |
| |
| static struct k_spinlock mplock; |
| |
| char *z_mp_stack_top; |
| |
| #ifdef CONFIG_KERNEL_COHERENCE |
| /* Coherence guarantees that normal .data will be coherent and that it |
| * won't overlap any cached memory. |
| */ |
| static struct { |
| struct cpustart_rec cpustart; |
| } cpustart_mem; |
| #else |
| /* If .data RAM is by default incoherent, then the start record goes |
| * into its own dedicated cache line(s) |
| */ |
| static __aligned(XCHAL_DCACHE_LINESIZE) union { |
| struct cpustart_rec cpustart; |
| char pad[XCHAL_DCACHE_LINESIZE]; |
| } cpustart_mem; |
| #endif |
| |
| #define start_rec \ |
| (*((volatile struct cpustart_rec *) \ |
| z_soc_uncached_ptr(&cpustart_mem.cpustart))) |
| |
| /* Simple array of CPUs that are active and available for an IPI. The |
| * IDC interrupt is ALSO used to bring a CPU out of reset, so we need |
| * to be absolutely sure we don't try to IPI a CPU that isn't ready to |
| * start, or else we'll launch it into garbage and crash the DSP. |
| */ |
| static bool cpus_active[CONFIG_MP_NUM_CPUS]; |
| |
| /* Tiny assembly stub for calling z_mp_entry() on the auxiliary CPUs. |
| * Mask interrupts, clear the register window state and set the stack |
| * pointer. This represents the minimum work required to run C code |
| * safely. |
| * |
| * Note that alignment is absolutely required: the IDC protocol passes |
| * only the upper 30 bits of the address to the second CPU. |
| */ |
| void z_soc_mp_asm_entry(void); |
| __asm__(".align 4 \n\t" |
| ".global z_soc_mp_asm_entry \n\t" |
| "z_soc_mp_asm_entry: \n\t" |
| " movi a0, 0x40025 \n\t" /* WOE | UM | INTLEVEL(5) */ |
| " wsr a0, PS \n\t" |
| " movi a0, 0 \n\t" |
| " wsr a0, WINDOWBASE \n\t" |
| " movi a0, 1 \n\t" |
| " wsr a0, WINDOWSTART \n\t" |
| " rsync \n\t" |
| " movi a1, z_mp_stack_top \n\t" |
| " l32i a1, a1, 0 \n\t" |
| " call4 z_mp_entry \n\t"); |
| BUILD_ASSERT(XCHAL_EXCM_LEVEL == 5); |
| |
| int cavs_idc_smp_init(const struct device *dev); |
| |
| #define CxL1CCAP (*(volatile uint32_t *)0x9F080080) |
| #define CxL1CCFG (*(volatile uint32_t *)0x9F080084) |
| #define CxL1PCFG (*(volatile uint32_t *)0x9F080088) |
| |
| /* "Data/Instruction Cache Memory Way Count" fields */ |
| #define CxL1CCAP_DCMWC ((CxL1CCAP >> 16) & 7) |
| #define CxL1CCAP_ICMWC ((CxL1CCAP >> 20) & 7) |
| |
| static ALWAYS_INLINE void enable_l1_cache(void) |
| { |
| uint32_t reg; |
| |
| #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 |
| /* First, on cAVS 2.5 we need to power the cache SRAM banks |
| * on! Write a bit for each cache way in the bottom half of |
| * the L1CCFG register and poll the top half for them to turn |
| * on. |
| */ |
| uint32_t dmask = BIT(CxL1CCAP_DCMWC) - 1; |
| uint32_t imask = BIT(CxL1CCAP_ICMWC) - 1; |
| uint32_t waymask = (imask << 8) | dmask; |
| |
| CxL1CCFG = waymask; |
| while (((CxL1CCFG >> 16) & waymask) != waymask) { |
| } |
| |
| /* Prefetcher also power gates, same interface */ |
| CxL1PCFG = 1; |
| while ((CxL1PCFG & 0x10000) == 0) { |
| } |
| #endif |
| |
| /* Now set up the Xtensa CPU to enable the cache logic. The |
| * details of the fields are somewhat complicated, but per the |
| * ISA ref: "Turning on caches at power-up usually consists of |
| * writing a constant with bits[31:8] all 1’s to MEMCTL.". |
| * Also set bit 0 to enable the LOOP extension instruction |
| * fetch buffer. |
| */ |
| #ifdef XCHAL_HAVE_ICACHE_DYN_ENABLE |
| reg = 0xffffff01; |
| __asm__ volatile("wsr %0, MEMCTL; rsync" :: "r"(reg)); |
| #endif |
| |
| /* Likewise enable prefetching. Sadly these values are not |
| * architecturally defined by Xtensa (they're just documented |
| * as priority hints), so this constant is just copied from |
| * SOF for now. If we care about prefetch priority tuning |
| * we're supposed to ask Cadence I guess. |
| */ |
| reg = IS_ENABLED(CONFIG_SOC_SERIES_INTEL_CAVS_V25) ? 0x1038 : 0; |
| __asm__ volatile("wsr %0, PREFCTL; rsync" :: "r"(reg)); |
| |
| /* Finally we need to enable the cache in the Region |
| * Protection Option "TLB" entries. The hardware defaults |
| * have this set to RW/uncached (2) everywhere. We want |
| * writeback caching (4) in the sixth mapping (the second of |
| * two RAM mappings) and to mark all unused regions |
| * inaccessible (15) for safety. Note that there is a HAL |
| * routine that does this (by emulating the older "cacheattr" |
| * hardware register), but it generates significantly larger |
| * code. |
| */ |
| #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 |
| /* Already set up by the ROM on older hardware. */ |
| const uint8_t attribs[] = { 2, 15, 15, 15, 2, 4, 15, 15 }; |
| |
| for (int region = 0; region < 8; region++) { |
| reg = 0x20000000 * region; |
| __asm__ volatile("wdtlb %0, %1" :: "r"(attribs[region]), "r"(reg)); |
| } |
| #endif |
| } |
| |
| void z_mp_entry(void) |
| { |
| volatile int ie; |
| uint32_t reg; |
| |
| enable_l1_cache(); |
| |
| /* Fix ATOMCTL to match CPU0. Hardware defaults for S32C1I |
| * use internal operations (and are thus presumably atomic |
| * only WRT the local CPU!). We need external transactions on |
| * the shared bus. |
| */ |
| reg = 0x15; |
| __asm__ volatile("wsr %0, ATOMCTL" :: "r"(reg)); |
| |
| /* We don't know what the boot ROM (on pre-2.5 DSPs) might |
| * have touched and we don't care. Make sure it's not in our |
| * local cache to be flushed accidentally later. |
| * |
| * Note that technically this is dropping our own (cached) |
| * stack memory, which we don't have a guarantee the compiler |
| * isn't using yet. Manual inspection of generated code says |
| * we're safe, but really we need a better solution here. |
| */ |
| #ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25 |
| z_xtensa_cache_flush_inv_all(); |
| #endif |
| |
| /* Copy over VECBASE from the main CPU for an initial value |
| * (will need to revisit this if we ever allow a user API to |
| * change interrupt vectors at runtime). |
| */ |
| ie = 0; |
| __asm__ volatile("wsr.INTENABLE %0" : : "r"(ie)); |
| __asm__ volatile("wsr.VECBASE %0" : : "r"(start_rec.vecbase)); |
| __asm__ volatile("rsync"); |
| |
| /* Set up the CPU pointer. */ |
| _cpu_t *cpu = &_kernel.cpus[start_rec.cpu]; |
| |
| __asm__ volatile( |
| "wsr." CONFIG_XTENSA_KERNEL_CPU_PTR_SR " %0" : : "r"(cpu)); |
| |
| /* We got here via an IDC interrupt. Clear the TFC high bit |
| * (by writing a one!) to acknowledge and clear the latched |
| * hardware interrupt (so we don't have to service it as a |
| * spurious IPI when we enter user code). Remember: this |
| * could have come from any core, clear all of them. |
| */ |
| for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { |
| IDC[start_rec.cpu].core[i].tfc = BIT(31); |
| } |
| |
| /* Interrupt must be enabled while running on current core */ |
| irq_enable(DT_IRQN(DT_INST(0, intel_cavs_idc))); |
| |
| #ifdef CONFIG_SMP_BOOT_DELAY |
| cavs_idc_smp_init(NULL); |
| #endif |
| |
| cpus_active[start_rec.cpu] = true; |
| start_rec.alive = 1; |
| |
| start_rec.fn(start_rec.arg); |
| |
| #if CONFIG_MP_NUM_CPUS == 1 |
| /* CPU#1 can be under manual control running custom functions |
| * instead of participating in general thread execution. |
| * Put the CPU into idle after those functions return |
| * so this won't return. |
| */ |
| for (;;) { |
| k_cpu_idle(); |
| } |
| #endif |
| } |
| |
| bool arch_cpu_active(int cpu_num) |
| { |
| return cpus_active[cpu_num]; |
| } |
| |
| static ALWAYS_INLINE uint32_t prid(void) |
| { |
| uint32_t prid; |
| |
| __asm__ volatile("rsr %0, PRID" : "=r"(prid)); |
| return prid; |
| } |
| |
| void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz, |
| arch_cpustart_t fn, void *arg) |
| { |
| uint32_t vecbase, curr_cpu = prid(); |
| |
| #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 |
| /* On cAVS v2.5, MP startup works differently. The core has |
| * no ROM, and starts running immediately upon receipt of an |
| * IDC interrupt at the start of LPSRAM at 0xbe800000. Note |
| * that means we don't need to bother constructing a "message" |
| * below, it will be ignored. But it's left in place for |
| * simplicity and compatibility. |
| * |
| * All we need to do is place a single jump at that address to |
| * our existing MP entry point. Unfortunately Xtensa makes |
| * this difficult, as the region is beyond the range of a |
| * relative jump instruction, so we need an immediate, which |
| * can only be backwards-referenced. So we hand-assemble a |
| * tiny trampoline here ("jump over the immediate address, |
| * load it, jump to it"). |
| * |
| * Long term we want to have this in linkable LP-SRAM memory |
| * such that the standard system bootstrap out of IMR can |
| * place it there. But this is fine for now. |
| */ |
| void **lpsram = z_soc_uncached_ptr((void *)LP_SRAM_BASE); |
| uint8_t tramp[] = { |
| 0x06, 0x01, 0x00, /* J <PC+8> (jump to L32R) */ |
| 0, /* (padding to align entry_addr) */ |
| 0, 0, 0, 0, /* (entry_addr goes here) */ |
| 0x01, 0xff, 0xff, /* L32R a0, <entry_addr> */ |
| 0xa0, 0x00, 0x00, /* JX a0 */ |
| }; |
| |
| memcpy(lpsram, tramp, ARRAY_SIZE(tramp)); |
| lpsram[1] = z_soc_mp_asm_entry; |
| #endif |
| |
| __asm__ volatile("rsr.VECBASE %0\n\t" : "=r"(vecbase)); |
| |
| start_rec.cpu = cpu_num; |
| start_rec.fn = fn; |
| start_rec.arg = arg; |
| start_rec.vecbase = vecbase; |
| start_rec.alive = 0; |
| |
| z_mp_stack_top = Z_THREAD_STACK_BUFFER(stack) + sz; |
| |
| /* Pre-2.x cAVS delivers the IDC to ROM code, so unmask it */ |
| CAVS_INTCTRL[cpu_num].l2.clear = CAVS_L2_IDC; |
| |
| /* Disable automatic power and clock gating for that CPU, so |
| * it won't just go back to sleep. Note that after startup, |
| * the cores are NOT power gated even if they're configured to |
| * be, so by default a core will launch successfully but then |
| * turn itself off when it gets to the WAITI instruction in |
| * the idle thread. |
| */ |
| volatile struct soc_dsp_shim_regs *shim = (void *)SOC_DSP_SHIM_REG_BASE; |
| |
| shim->pwrctl |= BIT(cpu_num); |
| if (!IS_ENABLED(CONFIG_SOC_SERIES_INTEL_CAVS_V15)) { |
| shim->clkctl |= BIT(16 + cpu_num); |
| } |
| |
| /* Send power up message to the other core */ |
| uint32_t ietc = IDC_MSG_POWER_UP_EXT((long) z_soc_mp_asm_entry); |
| |
| IDC[curr_cpu].core[cpu_num].ietc = ietc; |
| IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP | IPC_IDCITC_BUSY; |
| |
| #ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25 |
| /* Early DSPs have a ROM that actually receives the startup |
| * IDC as an interrupt, and we don't want that to be confused |
| * by IPIs sent by the OS elsewhere. Mask the IDC interrupt |
| * on other core so IPI won't cause them to jump to ISR until |
| * the core is fully initialized. |
| */ |
| uint32_t idc_reg = idc_read(IPC_IDCCTL, cpu_num); |
| |
| idc_reg &= ~IPC_IDCCTL_IDCTBIE(0); |
| idc_write(IPC_IDCCTL, cpu_num, idc_reg); |
| sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x00 + |
| CAVS_ICTL_INT_CPU_OFFSET(cpu_num), 8); |
| |
| k_busy_wait(100); |
| |
| #ifdef CONFIG_SMP_BOOT_DELAY |
| cavs_idc_smp_init(NULL); |
| #endif |
| #endif |
| |
| while (!start_rec.alive) |
| ; |
| } |
| |
| void arch_sched_ipi(void) |
| { |
| #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 |
| uint32_t curr = prid(); |
| |
| for (int c = 0; c < CONFIG_MP_NUM_CPUS; c++) { |
| if (c != curr && cpus_active[c]) { |
| IDC[curr].core[c].itc = BIT(31); |
| } |
| } |
| #else |
| /* Legacy implementation for cavs15 based on the 2-core-only |
| * IPM driver. To be replaced with the general one when |
| * validated. |
| */ |
| const struct device *idcdev = |
| device_get_binding(DT_LABEL(DT_INST(0, intel_cavs_idc))); |
| |
| ipm_send(idcdev, 0, IPM_CAVS_IDC_MSG_SCHED_IPI_ID, |
| IPM_CAVS_IDC_MSG_SCHED_IPI_DATA, 0); |
| #endif |
| } |
| |
| void idc_isr(void *param) |
| { |
| ARG_UNUSED(param); |
| |
| #ifdef CONFIG_SMP |
| /* Right now this interrupt is only used for IPIs */ |
| z_sched_ipi(); |
| #endif |
| |
| /* ACK the interrupt to all the possible sources. This is a |
| * level-sensitive interrupt triggered by a logical OR of each |
| * of the ITC/TFC high bits, INCLUDING the one "from this |
| * CPU". |
| */ |
| for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { |
| IDC[prid()].core[i].tfc = BIT(31); |
| } |
| } |
| |
| #ifndef CONFIG_IPM_CAVS_IDC |
| /* Fallback stub for external SOF code */ |
| int cavs_idc_smp_init(const struct device *dev) |
| { |
| ARG_UNUSED(dev); |
| return 0; |
| } |
| #endif |
| |
| void soc_idc_init(void) |
| { |
| #ifndef CONFIG_IPM_CAVS_IDC |
| IRQ_CONNECT(DT_IRQN(DT_NODELABEL(idc)), 0, idc_isr, NULL, 0); |
| #endif |
| |
| /* Every CPU should be able to receive an IDC interrupt from |
| * every other CPU, but not to be back-interrupted when the |
| * target core clears the busy bit. |
| */ |
| for (int core = 0; core < CONFIG_MP_NUM_CPUS; core++) { |
| uint32_t coremask = BIT(CONFIG_MP_NUM_CPUS) - 1; |
| |
| IDC[core].busy_int |= coremask; |
| IDC[core].done_int &= ~coremask; |
| |
| /* Also unmask the IDC interrupt for every core in the |
| * L2 mask register. |
| */ |
| CAVS_INTCTRL[core].l2.clear = CAVS_L2_IDC; |
| |
| } |
| |
| /* Clear out any existing pending interrupts that might be present */ |
| for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { |
| for (int j = 0; j < CONFIG_MP_NUM_CPUS; j++) { |
| IDC[i].core[j].tfc = BIT(31); |
| } |
| } |
| |
| cpus_active[0] = true; |
| } |
| |
| /** |
| * @brief Restart halted SMP CPU |
| * |
| * Relaunches a CPU that has entered an idle power state via |
| * soc_halt_cpu(). Returns -EINVAL if the CPU is not in a power-gated |
| * idle state. Upon successful return, the CPU is online and |
| * available to run any Zephyr thread. |
| * |
| * @param id CPU to start, in the range [1:CONFIG_MP_NUM_CPUS) |
| */ |
| int soc_relaunch_cpu(int id) |
| { |
| volatile struct soc_dsp_shim_regs *shim = (void *)SOC_DSP_SHIM_REG_BASE; |
| int ret = 0; |
| k_spinlock_key_t k = k_spin_lock(&mplock); |
| |
| if (id < 1 || id >= CONFIG_MP_NUM_CPUS) { |
| ret = -EINVAL; |
| goto out; |
| } |
| |
| if (shim->pwrsts & BIT(id)) { |
| ret = -EINVAL; |
| goto out; |
| } |
| |
| CAVS_INTCTRL[id].l2.clear = CAVS_L2_IDC; |
| z_reinit_idle_thread(id); |
| z_smp_start_cpu(id); |
| |
| out: |
| k_spin_unlock(&mplock, k); |
| return ret; |
| } |
| |
| /** |
| * @brief Halts and offlines a running CPU |
| * |
| * Enables power gating on the specified CPU, which cannot be the |
| * current CPU or CPU 0. The CPU must be idle; no application threads |
| * may be runnable on it when this function is called (or at least the |
| * CPU must be guaranteed to reach idle in finite time without |
| * deadlock). Actual CPU shutdown can only happen in the context of |
| * the idle thread, and synchronization is an application |
| * responsibility. This function will hang if the other CPU fails to |
| * reach idle. |
| * |
| * @param id CPU to halt, not current cpu or cpu 0 |
| * @return 0 on success, -EINVAL on error |
| */ |
| int soc_halt_cpu(int id) |
| { |
| volatile struct soc_dsp_shim_regs *shim = (void *)SOC_DSP_SHIM_REG_BASE; |
| int ret = 0; |
| k_spinlock_key_t k = k_spin_lock(&mplock); |
| |
| if (id == 0 || id == _current_cpu->id) { |
| ret = -EINVAL; |
| goto out; |
| } |
| |
| /* Turn off the "prevent power/clock gating" bits, enabling |
| * low power idle, and mask off IDC interrupts so it will not |
| * be woken up by scheduler IPIs |
| */ |
| CAVS_INTCTRL[id].l2.set = CAVS_L2_IDC; |
| shim->pwrctl &= ~BIT(id); |
| shim->clkctl &= ~BIT(16 + id); |
| |
| /* Wait for the CPU to reach an idle state before returing */ |
| while (shim->pwrsts & BIT(id)) { |
| } |
| |
| out: |
| k_spin_unlock(&mplock, k); |
| return ret; |
| } |