| /* |
| * Copyright (c) 2011-2014 Wind River Systems, Inc. |
| * Copyright (c) 2017 Intel Corporation |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| */ |
| #include <kernel.h> |
| #include <ia32/mmustructs.h> |
| #include <linker/linker-defs.h> |
| #include <kernel_internal.h> |
| #include <kernel_structs.h> |
| #include <init.h> |
| #include <ctype.h> |
| #include <string.h> |
| |
| /* Despite our use of PAE page tables, we do not (and will never) actually |
| * support PAE. Use a 64-bit x86 target if you have that much RAM. |
| */ |
| BUILD_ASSERT(DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024ULL) - 1ULL <= |
| (unsigned long long)UINTPTR_MAX); |
| |
| /* Common regions for all x86 processors. |
| * Peripheral I/O ranges configured at the SOC level |
| */ |
| |
| /* Mark text and rodata as read-only. |
| * Userspace may read all text and rodata. |
| */ |
| MMU_BOOT_REGION((u32_t)&_image_text_start, (u32_t)&_image_text_size, |
| MMU_ENTRY_READ | MMU_ENTRY_USER); |
| |
| MMU_BOOT_REGION((u32_t)&_image_rodata_start, (u32_t)&_image_rodata_size, |
| MMU_ENTRY_READ | MMU_ENTRY_USER | MMU_ENTRY_EXECUTE_DISABLE); |
| |
| #ifdef CONFIG_USERSPACE |
| MMU_BOOT_REGION((u32_t)&_app_smem_start, (u32_t)&_app_smem_size, |
| MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE); |
| #endif |
| |
| #ifdef CONFIG_COVERAGE_GCOV |
| MMU_BOOT_REGION((u32_t)&__gcov_bss_start, (u32_t)&__gcov_bss_size, |
| MMU_ENTRY_WRITE | MMU_ENTRY_USER | MMU_ENTRY_EXECUTE_DISABLE); |
| #endif |
| |
| /* __kernel_ram_size includes all unused memory, which is used for heaps. |
| * User threads cannot access this unless granted at runtime. This is done |
| * automatically for stacks. |
| */ |
| MMU_BOOT_REGION((u32_t)&__kernel_ram_start, (u32_t)&__kernel_ram_size, |
| MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE); |
| |
| /* Works for PDPT, PD, PT entries, the bits we check here are all the same. |
| * |
| * Not trying to capture every flag, just the most interesting stuff, |
| * Present, write, XD, user, in typically encountered combinations. |
| */ |
| static char get_entry_code(u64_t value) |
| { |
| char ret; |
| |
| if ((value & MMU_ENTRY_PRESENT) == 0) { |
| ret = '.'; |
| } else { |
| if ((value & MMU_ENTRY_WRITE) != 0) { |
| /* Writable page */ |
| if ((value & MMU_ENTRY_EXECUTE_DISABLE) != 0) { |
| /* RW */ |
| ret = 'w'; |
| } else { |
| /* RWX */ |
| ret = 'a'; |
| } |
| } else { |
| if ((value & MMU_ENTRY_EXECUTE_DISABLE) != 0) { |
| /* R */ |
| ret = 'r'; |
| } else { |
| /* RX */ |
| ret = 'x'; |
| } |
| } |
| |
| if ((value & MMU_ENTRY_USER) != 0) { |
| /* Uppercase indicates user mode access */ |
| ret = toupper(ret); |
| } |
| } |
| |
| return ret; |
| } |
| |
| static void z_x86_dump_pt(struct x86_mmu_pt *pt, uintptr_t base, int index) |
| { |
| int column = 0; |
| |
| printk("Page table %d for 0x%08lX - 0x%08lX at %p\n", |
| index, base, base + Z_X86_PT_AREA - 1, pt); |
| |
| for (int i = 0; i < Z_X86_NUM_PT_ENTRIES; i++) { |
| printk("%c", get_entry_code(pt->entry[i].value)); |
| |
| column++; |
| if (column == 64) { |
| column = 0; |
| printk("\n"); |
| } |
| } |
| } |
| |
| static void z_x86_dump_pd(struct x86_mmu_pd *pd, uintptr_t base, int index) |
| { |
| int column = 0; |
| |
| printk("Page directory %d for 0x%08lX - 0x%08lX at %p\n", |
| index, base, base + Z_X86_PD_AREA - 1, pd); |
| |
| for (int i = 0; i < Z_X86_NUM_PD_ENTRIES; i++) { |
| printk("%c", get_entry_code(pd->entry[i].pt.value)); |
| |
| column++; |
| if (column == 64) { |
| column = 0; |
| printk("\n"); |
| } |
| } |
| |
| for (int i = 0; i < Z_X86_NUM_PD_ENTRIES; i++) { |
| struct x86_mmu_pt *pt; |
| union x86_mmu_pde_pt *pde = &pd->entry[i].pt; |
| |
| if (pde->p == 0 || pde->ps == 1) { |
| /* Skip non-present, or 2MB directory entries, there's |
| * no page table to examine */ |
| continue; |
| } |
| pt = (struct x86_mmu_pt *)(pde->pt << MMU_PAGE_SHIFT); |
| |
| z_x86_dump_pt(pt, base + (i * Z_X86_PT_AREA), i); |
| } |
| } |
| |
| static void z_x86_dump_pdpt(struct x86_mmu_pdpt *pdpt, uintptr_t base, |
| int index) |
| { |
| printk("Page directory pointer table %d for 0x%08lX - 0x%08lX at %p\n", |
| index, base, base + Z_X86_PDPT_AREA - 1, pdpt); |
| |
| for (int i = 0; i < Z_X86_NUM_PDPT_ENTRIES; i++) { |
| printk("%c", get_entry_code(pdpt->entry[i].value)); |
| } |
| printk("\n"); |
| for (int i = 0; i < Z_X86_NUM_PDPT_ENTRIES; i++) { |
| struct x86_mmu_pd *pd; |
| |
| if (pdpt->entry[i].p == 0) { |
| continue; |
| } |
| pd = (struct x86_mmu_pd *)(pdpt->entry[i].pd << MMU_PAGE_SHIFT); |
| |
| z_x86_dump_pd(pd, base + (i * Z_X86_PD_AREA), i); |
| } |
| } |
| |
| void z_x86_dump_page_tables(struct x86_mmu_pdpt *pdpt) |
| { |
| z_x86_dump_pdpt(pdpt, 0, 0); |
| } |
| |
| void z_x86_mmu_get_flags(struct x86_mmu_pdpt *pdpt, void *addr, |
| x86_page_entry_data_t *pde_flags, |
| x86_page_entry_data_t *pte_flags) |
| { |
| *pde_flags = |
| (x86_page_entry_data_t)(X86_MMU_GET_PDE(pdpt, addr)->value & |
| ~(x86_page_entry_data_t)MMU_PDE_PAGE_TABLE_MASK); |
| |
| if ((*pde_flags & MMU_ENTRY_PRESENT) != 0) { |
| *pte_flags = (x86_page_entry_data_t) |
| (X86_MMU_GET_PTE(pdpt, addr)->value & |
| ~(x86_page_entry_data_t)MMU_PTE_PAGE_MASK); |
| } else { |
| *pte_flags = 0; |
| } |
| } |
| |
| int z_x86_mmu_validate(struct x86_mmu_pdpt *pdpt, void *addr, size_t size, |
| int write) |
| { |
| u32_t start_pde_num; |
| u32_t end_pde_num; |
| u32_t starting_pte_num; |
| u32_t ending_pte_num; |
| u32_t pde; |
| u32_t pte; |
| union x86_mmu_pte pte_value; |
| u32_t start_pdpte_num = MMU_PDPTE_NUM(addr); |
| u32_t end_pdpte_num = MMU_PDPTE_NUM((char *)addr + size - 1); |
| u32_t pdpte; |
| struct x86_mmu_pt *pte_address; |
| int ret = -EPERM; |
| |
| start_pde_num = MMU_PDE_NUM(addr); |
| end_pde_num = MMU_PDE_NUM((char *)addr + size - 1); |
| starting_pte_num = MMU_PAGE_NUM((char *)addr); |
| |
| for (pdpte = start_pdpte_num; pdpte <= end_pdpte_num; pdpte++) { |
| if (pdpte != start_pdpte_num) { |
| start_pde_num = 0U; |
| } |
| |
| if (pdpte != end_pdpte_num) { |
| end_pde_num = 0U; |
| } else { |
| end_pde_num = MMU_PDE_NUM((char *)addr + size - 1); |
| } |
| |
| /* Ensure page directory pointer table entry is present */ |
| if (X86_MMU_GET_PDPTE_INDEX(pdpt, pdpte)->p == 0) { |
| goto out; |
| } |
| |
| struct x86_mmu_pd *pd_address = |
| X86_MMU_GET_PD_ADDR_INDEX(pdpt, pdpte); |
| |
| /* Iterate for all the pde's the buffer might take up. |
| * (depends on the size of the buffer and start address |
| * of the buff) |
| */ |
| for (pde = start_pde_num; pde <= end_pde_num; pde++) { |
| union x86_mmu_pde_pt pde_value = |
| pd_address->entry[pde].pt; |
| |
| if ((pde_value.p) == 0 || |
| (pde_value.us) == 0 || |
| ((write != 0) && (pde_value.rw == 0))) { |
| goto out; |
| } |
| |
| pte_address = (struct x86_mmu_pt *) |
| (pde_value.pt << MMU_PAGE_SHIFT); |
| |
| /* loop over all the possible page tables for the |
| * required size. If the pde is not the last one |
| * then the last pte would be 511. So each pde |
| * will be using all the page table entries except |
| * for the last pde. For the last pde, pte is |
| * calculated using the last memory address |
| * of the buffer. |
| */ |
| if (pde != end_pde_num) { |
| ending_pte_num = 511U; |
| } else { |
| ending_pte_num = |
| MMU_PAGE_NUM((char *)addr + size - 1); |
| } |
| |
| /* For all the pde's apart from the starting pde, |
| * will have the start pte number as zero. |
| */ |
| if (pde != start_pde_num) { |
| starting_pte_num = 0U; |
| } |
| |
| pte_value.value = 0xFFFFFFFFU; |
| |
| /* Bitwise AND all the pte values. |
| * An optimization done to make sure a compare is |
| * done only once. |
| */ |
| for (pte = starting_pte_num; |
| pte <= ending_pte_num; |
| pte++) { |
| pte_value.value &= |
| pte_address->entry[pte].value; |
| } |
| |
| if ((pte_value.p) == 0 || |
| (pte_value.us) == 0 || |
| ((write != 0) && (pte_value.rw == 0))) { |
| goto out; |
| } |
| } |
| } |
| ret = 0; |
| out: |
| #ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION |
| __asm__ volatile ("lfence" : : : "memory"); |
| #endif |
| |
| return ret; |
| } |
| |
| static inline void tlb_flush_page(void *addr) |
| { |
| /* Invalidate TLB entries corresponding to the page containing the |
| * specified address |
| */ |
| char *page = (char *)addr; |
| |
| __asm__ ("invlpg %0" :: "m" (*page)); |
| } |
| |
| #define PDPTE_FLAGS_MASK MMU_ENTRY_PRESENT |
| |
| #define PDE_FLAGS_MASK (MMU_ENTRY_WRITE | MMU_ENTRY_USER | \ |
| PDPTE_FLAGS_MASK) |
| |
| #define PTE_FLAGS_MASK (PDE_FLAGS_MASK | MMU_ENTRY_EXECUTE_DISABLE | \ |
| MMU_ENTRY_WRITE_THROUGH | \ |
| MMU_ENTRY_CACHING_DISABLE) |
| |
| void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size, |
| x86_page_entry_data_t flags, |
| x86_page_entry_data_t mask, bool flush) |
| { |
| u32_t addr = (u32_t)ptr; |
| |
| __ASSERT((addr & MMU_PAGE_MASK) == 0U, "unaligned address provided"); |
| __ASSERT((size & MMU_PAGE_MASK) == 0U, "unaligned size provided"); |
| |
| /* L1TF mitigation: non-present PTEs will have address fields |
| * zeroed. Expand the mask to include address bits if we are changing |
| * the present bit. |
| */ |
| if ((mask & MMU_PTE_P_MASK) != 0) { |
| mask |= MMU_PTE_PAGE_MASK; |
| } |
| |
| while (size != 0) { |
| union x86_mmu_pte *pte; |
| union x86_mmu_pde_pt *pde; |
| union x86_mmu_pdpte *pdpte; |
| x86_page_entry_data_t cur_flags = flags; |
| |
| pdpte = X86_MMU_GET_PDPTE(pdpt, addr); |
| __ASSERT(pdpte->p == 1, "set flags on non-present PDPTE"); |
| pdpte->value |= (flags & PDPTE_FLAGS_MASK); |
| |
| pde = X86_MMU_GET_PDE(pdpt, addr); |
| __ASSERT(pde->p == 1, "set flags on non-present PDE"); |
| pde->value |= (flags & PDE_FLAGS_MASK); |
| /* If any flags enable execution, clear execute disable at the |
| * page directory level |
| */ |
| if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) { |
| pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE; |
| } |
| |
| pte = X86_MMU_GET_PTE(pdpt, addr); |
| /* If we're setting the present bit, restore the address |
| * field. If we're clearing it, then the address field |
| * will be zeroed instead, mapping the PTE to the NULL page. |
| */ |
| if (((mask & MMU_PTE_P_MASK) != 0) && |
| ((flags & MMU_ENTRY_PRESENT) != 0)) { |
| cur_flags |= addr; |
| } |
| |
| pte->value = (pte->value & ~mask) | cur_flags; |
| if (flush) { |
| tlb_flush_page((void *)addr); |
| } |
| |
| size -= MMU_PAGE_SIZE; |
| addr += MMU_PAGE_SIZE; |
| } |
| } |
| |
| static char __aligned(MMU_PAGE_SIZE) |
| page_pool[MMU_PAGE_SIZE * CONFIG_X86_MMU_PAGE_POOL_PAGES]; |
| |
| static char *page_pos = page_pool + sizeof(page_pool); |
| |
| static void *get_page(void) |
| { |
| page_pos -= MMU_PAGE_SIZE; |
| |
| __ASSERT(page_pos >= page_pool, "out of MMU pages\n"); |
| |
| return page_pos; |
| } |
| |
| __aligned(0x20) struct x86_mmu_pdpt z_x86_kernel_pdpt; |
| #ifdef CONFIG_X86_KPTI |
| __aligned(0x20) struct x86_mmu_pdpt z_x86_user_pdpt; |
| #endif |
| |
| extern char z_shared_kernel_page_start[]; |
| |
| static inline bool is_within_system_ram(uintptr_t addr) |
| { |
| return (addr >= DT_PHYS_RAM_ADDR) && |
| (addr < (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U))); |
| } |
| |
| static void add_mmu_region_page(struct x86_mmu_pdpt *pdpt, uintptr_t addr, |
| u64_t flags, bool user_table) |
| { |
| union x86_mmu_pdpte *pdpte; |
| struct x86_mmu_pd *pd; |
| union x86_mmu_pde_pt *pde; |
| struct x86_mmu_pt *pt; |
| union x86_mmu_pte *pte; |
| |
| #ifdef CONFIG_X86_KPTI |
| /* If we are generating a page table for user mode, and this address |
| * does not have the user flag set, and this address falls outside |
| * of system RAM, then don't bother generating any tables for it, |
| * we will never need them later as memory domains are limited to |
| * regions within system RAM. |
| */ |
| if (user_table && (flags & MMU_ENTRY_USER) == 0 && |
| !is_within_system_ram(addr)) { |
| return; |
| } |
| #endif |
| |
| /* Setup the PDPTE entry for the address, creating a page directory |
| * if one didn't exist |
| */ |
| pdpte = &pdpt->entry[MMU_PDPTE_NUM(addr)]; |
| if (pdpte->p == 0) { |
| pd = get_page(); |
| pdpte->pd = ((uintptr_t)pd) >> MMU_PAGE_SHIFT; |
| } else { |
| pd = (struct x86_mmu_pd *)(pdpte->pd << MMU_PAGE_SHIFT); |
| } |
| pdpte->value |= (flags & PDPTE_FLAGS_MASK); |
| |
| /* Setup the PDE entry for the address, creating a page table |
| * if necessary |
| */ |
| pde = &pd->entry[MMU_PDE_NUM(addr)].pt; |
| if (pde->p == 0) { |
| pt = get_page(); |
| pde->pt = ((uintptr_t)pt) >> MMU_PAGE_SHIFT; |
| } else { |
| pt = (struct x86_mmu_pt *)(pde->pt << MMU_PAGE_SHIFT); |
| } |
| pde->value |= (flags & PDE_FLAGS_MASK); |
| |
| /* Execute disable bit needs special handling, we should only set it |
| * at the page directory level if ALL pages have XD set (instead of |
| * just one). |
| * |
| * Use the 'ignored2' field to store a marker on whether any |
| * configured region allows execution, the CPU never looks at |
| * or modifies it. |
| */ |
| if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) { |
| pde->ignored2 = 1; |
| pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE; |
| } else if (pde->ignored2 == 0) { |
| pde->value |= MMU_ENTRY_EXECUTE_DISABLE; |
| } |
| |
| #ifdef CONFIG_X86_KPTI |
| if (user_table && (flags & MMU_ENTRY_USER) == 0 && |
| addr != (uintptr_t)(&z_shared_kernel_page_start)) { |
| /* All non-user accessible pages except the shared page |
| * are marked non-present in the page table. |
| */ |
| return; |
| } |
| #else |
| ARG_UNUSED(user_table); |
| #endif |
| |
| /* Finally set up the page table entry */ |
| pte = &pt->entry[MMU_PAGE_NUM(addr)]; |
| pte->page = addr >> MMU_PAGE_SHIFT; |
| pte->value |= (flags & PTE_FLAGS_MASK); |
| } |
| |
| static void add_mmu_region(struct x86_mmu_pdpt *pdpt, struct mmu_region *rgn, |
| bool user_table) |
| { |
| size_t size; |
| u64_t flags; |
| uintptr_t addr; |
| |
| __ASSERT((rgn->address & MMU_PAGE_MASK) == 0U, |
| "unaligned address provided"); |
| __ASSERT((rgn->size & MMU_PAGE_MASK) == 0U, |
| "unaligned size provided"); |
| |
| addr = rgn->address; |
| |
| /* Add the present flag, and filter out 'runtime user' since this |
| * has no meaning to the actual MMU |
| */ |
| flags = rgn->flags | MMU_ENTRY_PRESENT; |
| |
| /* Iterate through the region a page at a time, creating entries as |
| * necessary. |
| */ |
| size = rgn->size; |
| while (size > 0) { |
| add_mmu_region_page(pdpt, addr, flags, user_table); |
| |
| size -= MMU_PAGE_SIZE; |
| addr += MMU_PAGE_SIZE; |
| } |
| } |
| |
| extern struct mmu_region z_x86_mmulist_start[]; |
| extern struct mmu_region z_x86_mmulist_end[]; |
| |
| /* Called from x86's kernel_arch_init() */ |
| void z_x86_paging_init(void) |
| { |
| size_t pages_free; |
| |
| for (struct mmu_region *rgn = z_x86_mmulist_start; |
| rgn < z_x86_mmulist_end; rgn++) { |
| add_mmu_region(&z_x86_kernel_pdpt, rgn, false); |
| #ifdef CONFIG_X86_KPTI |
| add_mmu_region(&z_x86_user_pdpt, rgn, true); |
| #endif |
| } |
| |
| pages_free = (page_pos - page_pool) / MMU_PAGE_SIZE; |
| |
| if (pages_free != 0) { |
| printk("Optimal CONFIG_X86_MMU_PAGE_POOL_PAGES %zu\n", |
| CONFIG_X86_MMU_PAGE_POOL_PAGES - pages_free); |
| } |
| |
| z_x86_enable_paging(); |
| } |
| |
| #ifdef CONFIG_X86_USERSPACE |
| int z_arch_buffer_validate(void *addr, size_t size, int write) |
| { |
| return z_x86_mmu_validate(z_x86_pdpt_get(_current), addr, size, write); |
| } |
| |
| static uintptr_t thread_pd_create(uintptr_t pages, |
| struct x86_mmu_pdpt *thread_pdpt, |
| struct x86_mmu_pdpt *master_pdpt) |
| { |
| uintptr_t pos = pages, phys_addr = Z_X86_PD_START; |
| |
| for (int i = 0; i < Z_X86_NUM_PD; i++, phys_addr += Z_X86_PD_AREA) { |
| union x86_mmu_pdpte *pdpte; |
| struct x86_mmu_pd *master_pd, *dest_pd; |
| |
| /* Obtain PD in master tables for the address range and copy |
| * into the per-thread PD for this range |
| */ |
| master_pd = X86_MMU_GET_PD_ADDR(master_pdpt, phys_addr); |
| dest_pd = (struct x86_mmu_pd *)pos; |
| |
| (void)memcpy(dest_pd, master_pd, sizeof(struct x86_mmu_pd)); |
| |
| /* Update pointer in per-thread pdpt to point to the per-thread |
| * directory we just copied |
| */ |
| pdpte = X86_MMU_GET_PDPTE(thread_pdpt, phys_addr); |
| pdpte->pd = pos >> MMU_PAGE_SHIFT; |
| pos += MMU_PAGE_SIZE; |
| } |
| |
| return pos; |
| } |
| |
| /* thread_pdpt must be initialized, as well as all the page directories */ |
| static uintptr_t thread_pt_create(uintptr_t pages, |
| struct x86_mmu_pdpt *thread_pdpt, |
| struct x86_mmu_pdpt *master_pdpt) |
| { |
| uintptr_t pos = pages, phys_addr = Z_X86_PT_START; |
| |
| for (int i = 0; i < Z_X86_NUM_PT; i++, phys_addr += Z_X86_PT_AREA) { |
| union x86_mmu_pde_pt *pde; |
| struct x86_mmu_pt *master_pt, *dest_pt; |
| |
| /* Same as we did with the directories, obtain PT in master |
| * tables for the address range and copy into per-thread PT |
| * for this range |
| */ |
| master_pt = X86_MMU_GET_PT_ADDR(master_pdpt, phys_addr); |
| dest_pt = (struct x86_mmu_pt *)pos; |
| (void)memcpy(dest_pt, master_pt, sizeof(struct x86_mmu_pd)); |
| |
| /* And then wire this up to the relevant per-thread |
| * page directory entry |
| */ |
| pde = X86_MMU_GET_PDE(thread_pdpt, phys_addr); |
| pde->pt = pos >> MMU_PAGE_SHIFT; |
| pos += MMU_PAGE_SIZE; |
| } |
| |
| return pos; |
| } |
| |
| /* Initialize the page tables for a thread. This will contain, once done, |
| * the boot-time configuration for a user thread page tables. There are |
| * no pre-conditions on the existing state of the per-thread tables. |
| */ |
| static void copy_page_tables(struct k_thread *thread, |
| struct x86_mmu_pdpt *master_pdpt) |
| { |
| uintptr_t pos, start; |
| struct x86_mmu_pdpt *thread_pdpt = z_x86_pdpt_get(thread); |
| struct z_x86_thread_stack_header *header = |
| (struct z_x86_thread_stack_header *)thread->stack_obj; |
| |
| __ASSERT(thread->stack_obj != NULL, "no stack object assigned"); |
| __ASSERT(z_x86_page_tables_get() != thread_pdpt, "PDPT is active"); |
| __ASSERT(((uintptr_t)thread_pdpt & 0x1f) == 0, "unaligned pdpt at %p", |
| thread_pdpt); |
| |
| (void)memcpy(thread_pdpt, master_pdpt, sizeof(struct x86_mmu_pdpt)); |
| |
| /* pos represents the page we are working with in the reserved area |
| * in the stack buffer for per-thread tables. As we create tables in |
| * this area, pos is incremented to the next free page. |
| * |
| * The layout of the stack object, when this is done: |
| * |
| * +---------------------------+ <- thread->stack_obj |
| * | PDE(0) | |
| * +---------------------------+ |
| * | ... | |
| * +---------------------------+ |
| * | PDE(Z_X86_NUM_PD - 1) | |
| * +---------------------------+ |
| * | PTE(0) | |
| * +---------------------------+ |
| * | ... | |
| * +---------------------------+ |
| * | PTE(Z_X86_NUM_PT - 1) | |
| * +---------------------------+ <- pos once this logic completes |
| * | Stack guard | |
| * +---------------------------+ |
| * | Privilege elevation stack | |
| * | PDPT | |
| * +---------------------------+ <- thread->stack_info.start |
| * | Thread stack | |
| * | ... | |
| * |
| */ |
| start = (uintptr_t)(&header->page_tables); |
| pos = thread_pd_create(start, thread_pdpt, master_pdpt); |
| pos = thread_pt_create(pos, thread_pdpt, master_pdpt); |
| |
| __ASSERT(pos == (start + Z_X86_THREAD_PT_AREA), |
| "wrong amount of stack object memory used"); |
| } |
| |
| static void reset_mem_partition(struct x86_mmu_pdpt *thread_pdpt, |
| struct k_mem_partition *partition) |
| { |
| uintptr_t addr = partition->start; |
| size_t size = partition->size; |
| |
| __ASSERT((addr & MMU_PAGE_MASK) == 0U, "unaligned address provided"); |
| __ASSERT((size & MMU_PAGE_MASK) == 0U, "unaligned size provided"); |
| |
| while (size != 0) { |
| union x86_mmu_pte *thread_pte, *master_pte; |
| |
| thread_pte = X86_MMU_GET_PTE(thread_pdpt, addr); |
| master_pte = X86_MMU_GET_PTE(&USER_PDPT, addr); |
| |
| (void)memcpy(thread_pte, master_pte, sizeof(union x86_mmu_pte)); |
| |
| size -= MMU_PAGE_SIZE; |
| addr += MMU_PAGE_SIZE; |
| } |
| } |
| |
| static void apply_mem_partition(struct x86_mmu_pdpt *pdpt, |
| struct k_mem_partition *partition) |
| { |
| x86_page_entry_data_t x86_attr; |
| x86_page_entry_data_t mask; |
| |
| if (IS_ENABLED(CONFIG_X86_KPTI)) { |
| x86_attr = partition->attr | MMU_ENTRY_PRESENT; |
| mask = K_MEM_PARTITION_PERM_MASK | MMU_PTE_P_MASK; |
| } else { |
| x86_attr = partition->attr; |
| mask = K_MEM_PARTITION_PERM_MASK; |
| } |
| |
| __ASSERT(partition->start >= DT_PHYS_RAM_ADDR, |
| "region at %08lx[%u] extends below system ram start 0x%08x", |
| partition->start, partition->size, DT_PHYS_RAM_ADDR); |
| __ASSERT(((partition->start + partition->size) <= |
| (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U))), |
| "region at %08lx[%u] end at %08lx extends beyond system ram end 0x%08x", |
| partition->start, partition->size, |
| partition->start + partition->size, |
| (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U))); |
| |
| z_x86_mmu_set_flags(pdpt, (void *)partition->start, partition->size, |
| x86_attr, mask, false); |
| } |
| |
| void z_x86_apply_mem_domain(struct x86_mmu_pdpt *pdpt, |
| struct k_mem_domain *mem_domain) |
| { |
| for (int i = 0, pcount = 0; pcount < mem_domain->num_partitions; i++) { |
| struct k_mem_partition *partition; |
| |
| partition = &mem_domain->partitions[i]; |
| if (partition->size == 0) { |
| continue; |
| } |
| pcount++; |
| |
| apply_mem_partition(pdpt, partition); |
| } |
| } |
| |
| /* Called on creation of a user thread or when a supervisor thread drops to |
| * user mode. |
| * |
| * Sets up the per-thread page tables, such that when they are activated on |
| * context switch, everything is ready to go. |
| */ |
| void z_x86_thread_pt_init(struct k_thread *thread) |
| { |
| struct x86_mmu_pdpt *pdpt = z_x86_pdpt_get(thread); |
| |
| /* USER_PDPT contains the page tables with the boot time memory |
| * policy. We use it as a template to set up the per-thread page |
| * tables. |
| * |
| * With KPTI, this is a distinct set of tables z_x86_user_pdpt from the |
| * kernel page tables in z_x86_kernel_pdpt; it has all non user |
| * accessible pages except the trampoline page marked as non-present. |
| * Without KPTI, they are the same object. |
| */ |
| copy_page_tables(thread, &USER_PDPT); |
| |
| /* Enable access to the thread's own stack buffer */ |
| z_x86_mmu_set_flags(pdpt, (void *)thread->stack_info.start, |
| ROUND_UP(thread->stack_info.size, MMU_PAGE_SIZE), |
| MMU_ENTRY_PRESENT | K_MEM_PARTITION_P_RW_U_RW, |
| MMU_PTE_P_MASK | K_MEM_PARTITION_PERM_MASK, |
| false); |
| } |
| |
| /* |
| * Memory domain interface |
| * |
| * In all cases, if one of these APIs is called on a supervisor thread, |
| * we don't need to do anything. If the thread later drops into supervisor |
| * mode the per-thread page tables will be generated and the memory domain |
| * configuration applied. |
| */ |
| void z_arch_mem_domain_partition_remove(struct k_mem_domain *domain, |
| u32_t partition_id) |
| { |
| sys_dnode_t *node, *next_node; |
| |
| /* Removing a partition. Need to reset the relevant memory range |
| * to the defaults in USER_PDPT for each thread. |
| */ |
| SYS_DLIST_FOR_EACH_NODE_SAFE(&domain->mem_domain_q, node, next_node) { |
| struct k_thread *thread = |
| CONTAINER_OF(node, struct k_thread, mem_domain_info); |
| |
| if ((thread->base.user_options & K_USER) == 0) { |
| continue; |
| } |
| |
| reset_mem_partition(z_x86_pdpt_get(thread), |
| &domain->partitions[partition_id]); |
| } |
| } |
| |
| void z_arch_mem_domain_destroy(struct k_mem_domain *domain) |
| { |
| for (int i = 0, pcount = 0; pcount < domain->num_partitions; i++) { |
| struct k_mem_partition *partition; |
| |
| partition = &domain->partitions[i]; |
| if (partition->size == 0) { |
| continue; |
| } |
| pcount++; |
| |
| z_arch_mem_domain_partition_remove(domain, i); |
| } |
| } |
| |
| void z_arch_mem_domain_thread_remove(struct k_thread *thread) |
| { |
| struct k_mem_domain *domain = thread->mem_domain_info.mem_domain; |
| |
| /* Non-user threads don't have per-thread page tables set up */ |
| if ((thread->base.user_options & K_USER) == 0) { |
| return; |
| } |
| |
| for (int i = 0, pcount = 0; pcount < domain->num_partitions; i++) { |
| struct k_mem_partition *partition; |
| |
| partition = &domain->partitions[i]; |
| if (partition->size == 0) { |
| continue; |
| } |
| pcount++; |
| |
| reset_mem_partition(z_x86_pdpt_get(thread), partition); |
| } |
| } |
| |
| void z_arch_mem_domain_partition_add(struct k_mem_domain *domain, |
| u32_t partition_id) |
| { |
| sys_dnode_t *node, *next_node; |
| |
| SYS_DLIST_FOR_EACH_NODE_SAFE(&domain->mem_domain_q, node, next_node) { |
| struct k_thread *thread = |
| CONTAINER_OF(node, struct k_thread, mem_domain_info); |
| |
| if ((thread->base.user_options & K_USER) == 0) { |
| continue; |
| } |
| |
| apply_mem_partition(z_x86_pdpt_get(thread), |
| &domain->partitions[partition_id]); |
| } |
| } |
| |
| void z_arch_mem_domain_thread_add(struct k_thread *thread) |
| { |
| if ((thread->base.user_options & K_USER) == 0) { |
| return; |
| } |
| |
| z_x86_apply_mem_domain(z_x86_pdpt_get(thread), |
| thread->mem_domain_info.mem_domain); |
| } |
| |
| int z_arch_mem_domain_max_partitions_get(void) |
| { |
| return CONFIG_MAX_DOMAIN_PARTITIONS; |
| } |
| #endif /* CONFIG_X86_USERSPACE*/ |