| /* |
| * Copyright 2023 Google LLC |
| * SPDX-License-Identifier: Apache-2.0 |
| */ |
| #include <stdint.h> |
| #include <stdbool.h> |
| #include <zephyr/kernel.h> |
| #include <xtensa/config/core-isa.h> |
| #include <xtensa_mmu_priv.h> |
| #include <zephyr/cache.h> |
| |
| #ifdef CONFIG_USERSPACE |
| BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) && |
| (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0); |
| #endif |
| |
| #define ASID_INVALID 0 |
| |
| struct tlb_regs { |
| uint32_t rasid; |
| uint32_t ptevaddr; |
| uint32_t ptepin_as; |
| uint32_t ptepin_at; |
| uint32_t vecpin_as; |
| uint32_t vecpin_at; |
| }; |
| |
| static void compute_regs(uint32_t user_asid, uint32_t *l1_page, struct tlb_regs *regs) |
| { |
| uint32_t vecbase = XTENSA_RSR("VECBASE"); |
| |
| __ASSERT_NO_MSG((((uint32_t)l1_page) & 0xfff) == 0); |
| __ASSERT_NO_MSG((user_asid == 0) || ((user_asid > 2) && |
| (user_asid < XTENSA_MMU_SHARED_ASID))); |
| |
| /* We don't use ring 1, ring 0 ASID must be 1 */ |
| regs->rasid = (XTENSA_MMU_SHARED_ASID << 24) | |
| (user_asid << 16) | 0x000201; |
| |
| /* Derive PTEVADDR from ASID so each domain gets its own PTE area */ |
| regs->ptevaddr = CONFIG_XTENSA_MMU_PTEVADDR + user_asid * 0x400000; |
| |
| /* The ptables code doesn't add the mapping for the l1 page itself */ |
| l1_page[XTENSA_MMU_L1_POS(regs->ptevaddr)] = |
| (uint32_t)l1_page | XTENSA_MMU_PAGE_TABLE_ATTR; |
| |
| regs->ptepin_at = (uint32_t)l1_page; |
| regs->ptepin_as = XTENSA_MMU_PTE_ENTRY_VADDR(regs->ptevaddr, regs->ptevaddr) |
| | XTENSA_MMU_PTE_WAY; |
| |
| /* Pin mapping for refilling the vector address into the ITLB |
| * (for handling TLB miss exceptions). Note: this is NOT an |
| * instruction TLB entry for the vector code itself, it's a |
| * DATA TLB entry for the page containing the vector mapping |
| * so the refill on instruction fetch can find it. The |
| * hardware doesn't have a 4k pinnable instruction TLB way, |
| * frustratingly. |
| */ |
| uint32_t vb_pte = l1_page[XTENSA_MMU_L1_POS(vecbase)]; |
| |
| regs->vecpin_at = vb_pte; |
| regs->vecpin_as = XTENSA_MMU_PTE_ENTRY_VADDR(regs->ptevaddr, vecbase) |
| | XTENSA_MMU_VECBASE_WAY; |
| } |
| |
| /* Switch to a new page table. There are four items we have to set in |
| * the hardware: the PTE virtual address, the ring/ASID mapping |
| * register, and two pinned entries in the data TLB handling refills |
| * for the page tables and the vector handlers. |
| * |
| * These can be done in any order, provided that we ensure that no |
| * memory access which cause a TLB miss can happen during the process. |
| * This means that we must work entirely within registers in a single |
| * asm block. Also note that instruction fetches are memory accesses |
| * too, which means we cannot cross a page boundary which might reach |
| * a new page not in the TLB (a single jump to an aligned address that |
| * holds our five instructions is sufficient to guarantee that: I |
| * couldn't think of a way to do the alignment statically that also |
| * interoperated well with inline assembly). |
| */ |
| void xtensa_set_paging(uint32_t user_asid, uint32_t *l1_page) |
| { |
| /* Optimization note: the registers computed here are pure |
| * functions of the two arguments. With a minor API tweak, |
| * they could be cached in e.g. a thread struct instead of |
| * being recomputed. This is called on context switch paths |
| * and is performance-sensitive. |
| */ |
| struct tlb_regs regs; |
| |
| compute_regs(user_asid, l1_page, ®s); |
| |
| __asm__ volatile("j 1f\n" |
| ".align 16\n" /* enough for 5 insns */ |
| "1:\n" |
| "wsr %0, PTEVADDR\n" |
| "wsr %1, RASID\n" |
| "wdtlb %2, %3\n" |
| "wdtlb %4, %5\n" |
| "isync" |
| :: "r"(regs.ptevaddr), "r"(regs.rasid), |
| "r"(regs.ptepin_at), "r"(regs.ptepin_as), |
| "r"(regs.vecpin_at), "r"(regs.vecpin_as)); |
| } |
| |
| /* This is effectively the same algorithm from xtensa_set_paging(), |
| * but it also disables the hardware-initialized 512M TLB entries in |
| * way 6 (because the hardware disallows duplicate TLB mappings). For |
| * instruction fetches this produces a critical ordering constraint: |
| * the instruction following the invalidation of ITLB entry mapping |
| * the current PC will by definition create a refill condition, which |
| * will (because the data TLB was invalidated) cause a refill |
| * exception. Therefore this step must be the very last one, once |
| * everything else is setup up and working, which includes the |
| * invalidation of the virtual PTEVADDR area so that the resulting |
| * refill can complete. |
| * |
| * Note that we can't guarantee that the compiler won't insert a data |
| * fetch from our stack memory after exit from the asm block (while it |
| * might be double-mapped), so we invalidate that data TLB inside the |
| * asm for correctness. The other 13 entries get invalidated in a C |
| * loop at the end. |
| */ |
| void xtensa_init_paging(uint32_t *l1_page) |
| { |
| extern char z_xt_init_pc; /* defined in asm below */ |
| struct tlb_regs regs; |
| unsigned int initial_rasid; |
| |
| /* The initial rasid after hardware initialization is 0x04030201. |
| * 1 is hardwired to ring 0, other slots must be different |
| * from each other and must not be 0. |
| * |
| * For our initial implementation we just set the 4th slot (ring 3), |
| * to use the ASID value used for memory that is shared with all threads. |
| */ |
| initial_rasid = 0xff030201; |
| |
| #if CONFIG_MP_MAX_NUM_CPUS > 1 |
| /* The incoherent cache can get into terrible trouble if it's |
| * allowed to cache PTEs differently across CPUs. We require |
| * that all page tables supplied by the OS have exclusively |
| * uncached mappings for page data, but can't do anything |
| * about earlier code/firmware. Dump the cache to be safe. |
| */ |
| sys_cache_data_flush_and_invd_all(); |
| #endif |
| |
| compute_regs(ASID_INVALID, l1_page, ®s); |
| |
| uint32_t idtlb_pte = (regs.ptevaddr & 0xe0000000) | XCHAL_SPANNING_WAY; |
| uint32_t idtlb_stk = (((uint32_t)®s) & ~0xfff) | XCHAL_SPANNING_WAY; |
| uint32_t iitlb_pc = (((uint32_t)&z_xt_init_pc) & ~0xfff) | XCHAL_SPANNING_WAY; |
| |
| /* Note: the jump is mostly pedantry, as it's almost |
| * inconceivable that a hardware memory region at boot is |
| * going to cross a 512M page boundary. But we need the entry |
| * symbol to get the address above, so the jump is here for |
| * symmetry with the set_paging() code. |
| */ |
| __asm__ volatile("j z_xt_init_pc\n" |
| ".align 32\n" /* room for 10 insns */ |
| ".globl z_xt_init_pc\n" |
| "z_xt_init_pc:\n" |
| "wsr %0, PTEVADDR\n" |
| "wsr %1, RASID\n" |
| "wdtlb %2, %3\n" |
| "wdtlb %4, %5\n" |
| "idtlb %6\n" /* invalidate pte */ |
| "idtlb %7\n" /* invalidate stk */ |
| "isync\n" |
| "iitlb %8\n" /* invalidate pc */ |
| "isync\n" /* <--- traps a ITLB miss */ |
| :: "r"(regs.ptevaddr), "r"(initial_rasid), |
| "r"(regs.ptepin_at), "r"(regs.ptepin_as), |
| "r"(regs.vecpin_at), "r"(regs.vecpin_as), |
| "r"(idtlb_pte), "r"(idtlb_stk), "r"(iitlb_pc)); |
| |
| /* Invalidate the remaining (unused by this function) |
| * initialization entries. Now we're flying free with our own |
| * page table. |
| */ |
| for (uint32_t i = 0; i < 8; i++) { |
| uint32_t ixtlb = (i * 0x20000000) | XCHAL_SPANNING_WAY; |
| |
| if (ixtlb != iitlb_pc) { |
| __asm__ volatile("iitlb %0" :: "r"(ixtlb)); |
| } |
| if (ixtlb != idtlb_stk && ixtlb != idtlb_pte) { |
| __asm__ volatile("idtlb %0" :: "r"(ixtlb)); |
| } |
| } |
| __asm__ volatile("isync"); |
| } |