| /* |
| * Copyright (c) 2020 Intel Corporation |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| * |
| * Routines for managing virtual address spaces |
| */ |
| |
| #include <stdint.h> |
| #include <kernel_arch_interface.h> |
| #include <zephyr/spinlock.h> |
| #include <mmu.h> |
| #include <zephyr/init.h> |
| #include <kernel_internal.h> |
| #include <zephyr/internal/syscall_handler.h> |
| #include <zephyr/toolchain.h> |
| #include <zephyr/linker/linker-defs.h> |
| #include <zephyr/sys/bitarray.h> |
| #include <zephyr/sys/check.h> |
| #include <zephyr/sys/math_extras.h> |
| #include <zephyr/timing/timing.h> |
| #include <zephyr/logging/log.h> |
| LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL); |
| |
| #ifdef CONFIG_DEMAND_PAGING |
| #include <zephyr/kernel/mm/demand_paging.h> |
| #endif /* CONFIG_DEMAND_PAGING */ |
| |
| /* |
| * General terminology: |
| * - A page frame is a page-sized physical memory region in RAM. It is a |
| * container where a data page may be placed. It is always referred to by |
| * physical address. We have a convention of using uintptr_t for physical |
| * addresses. We instantiate a struct k_mem_page_frame to store metadata for |
| * every page frame. |
| * |
| * - A data page is a page-sized region of data. It may exist in a page frame, |
| * or be paged out to some backing store. Its location can always be looked |
| * up in the CPU's page tables (or equivalent) by virtual address. |
| * The data type will always be void * or in some cases uint8_t * when we |
| * want to do pointer arithmetic. |
| */ |
| |
| /* Spinlock to protect any globals in this file and serialize page table |
| * updates in arch code |
| */ |
| struct k_spinlock z_mm_lock; |
| |
| /* |
| * General page frame management |
| */ |
| |
| /* Database of all RAM page frames */ |
| struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES]; |
| |
| #if __ASSERT_ON |
| /* Indicator that k_mem_page_frames has been initialized, many of these APIs do |
| * not work before POST_KERNEL |
| */ |
| static bool page_frames_initialized; |
| #endif |
| |
| /* Add colors to page table dumps to indicate mapping type */ |
| #define COLOR_PAGE_FRAMES 1 |
| |
| #if COLOR_PAGE_FRAMES |
| #define ANSI_DEFAULT "\x1B" "[0m" |
| #define ANSI_RED "\x1B" "[1;31m" |
| #define ANSI_GREEN "\x1B" "[1;32m" |
| #define ANSI_YELLOW "\x1B" "[1;33m" |
| #define ANSI_BLUE "\x1B" "[1;34m" |
| #define ANSI_MAGENTA "\x1B" "[1;35m" |
| #define ANSI_CYAN "\x1B" "[1;36m" |
| #define ANSI_GREY "\x1B" "[1;90m" |
| |
| #define COLOR(x) printk(_CONCAT(ANSI_, x)) |
| #else |
| #define COLOR(x) do { } while (false) |
| #endif /* COLOR_PAGE_FRAMES */ |
| |
| /* LCOV_EXCL_START */ |
| static void page_frame_dump(struct k_mem_page_frame *pf) |
| { |
| if (k_mem_page_frame_is_free(pf)) { |
| COLOR(GREY); |
| printk("-"); |
| } else if (k_mem_page_frame_is_reserved(pf)) { |
| COLOR(CYAN); |
| printk("R"); |
| } else if (k_mem_page_frame_is_busy(pf)) { |
| COLOR(MAGENTA); |
| printk("B"); |
| } else if (k_mem_page_frame_is_pinned(pf)) { |
| COLOR(YELLOW); |
| printk("P"); |
| } else if (k_mem_page_frame_is_available(pf)) { |
| COLOR(GREY); |
| printk("."); |
| } else if (k_mem_page_frame_is_mapped(pf)) { |
| COLOR(DEFAULT); |
| printk("M"); |
| } else { |
| COLOR(RED); |
| printk("?"); |
| } |
| } |
| |
| void k_mem_page_frames_dump(void) |
| { |
| int column = 0; |
| |
| __ASSERT(page_frames_initialized, "%s called too early", __func__); |
| printk("Physical memory from 0x%lx to 0x%lx\n", |
| K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END); |
| |
| for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) { |
| struct k_mem_page_frame *pf = &k_mem_page_frames[i]; |
| |
| page_frame_dump(pf); |
| |
| column++; |
| if (column == 64) { |
| column = 0; |
| printk("\n"); |
| } |
| } |
| |
| COLOR(DEFAULT); |
| if (column != 0) { |
| printk("\n"); |
| } |
| } |
| /* LCOV_EXCL_STOP */ |
| |
| #define VIRT_FOREACH(_base, _size, _pos) \ |
| for ((_pos) = (_base); \ |
| (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE) |
| |
| #define PHYS_FOREACH(_base, _size, _pos) \ |
| for ((_pos) = (_base); \ |
| (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE) |
| |
| |
| /* |
| * Virtual address space management |
| * |
| * Call all of these functions with z_mm_lock held. |
| * |
| * Overall virtual memory map: When the kernel starts, it resides in |
| * virtual memory in the region K_MEM_KERNEL_VIRT_START to |
| * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit |
| * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings. |
| * |
| * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image, |
| * but have a mapping for all RAM in place. This is for special architectural |
| * purposes and does not otherwise affect page frame accounting or flags; |
| * the only guarantee is that such RAM mapping outside of the Zephyr image |
| * won't be disturbed by subsequent memory mapping calls. |
| * |
| * +--------------+ <- K_MEM_VIRT_RAM_START |
| * | Undefined VM | <- May contain ancillary regions like x86_64's locore |
| * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START) |
| * | Mapping for | |
| * | main kernel | |
| * | image | |
| * | | |
| * | | |
| * +--------------+ <- K_MEM_VM_FREE_START |
| * | | |
| * | Unused, | |
| * | Available VM | |
| * | | |
| * |..............| <- mapping_pos (grows downward as more mappings are made) |
| * | Mapping | |
| * +--------------+ |
| * | Mapping | |
| * +--------------+ |
| * | ... | |
| * +--------------+ |
| * | Mapping | |
| * +--------------+ <- mappings start here |
| * | Reserved | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED |
| * +--------------+ <- K_MEM_VIRT_RAM_END |
| */ |
| |
| /* Bitmap of virtual addresses where one bit corresponds to one page. |
| * This is being used for virt_region_alloc() to figure out which |
| * region of virtual addresses can be used for memory mapping. |
| * |
| * Note that bit #0 is the highest address so that allocation is |
| * done in reverse from highest address. |
| */ |
| SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap, |
| CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE); |
| |
| static bool virt_region_inited; |
| |
| #define Z_VIRT_REGION_START_ADDR K_MEM_VM_FREE_START |
| #define Z_VIRT_REGION_END_ADDR (K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED) |
| |
| static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size) |
| { |
| return POINTER_TO_UINT(K_MEM_VIRT_RAM_END) |
| - (offset * CONFIG_MMU_PAGE_SIZE) - size; |
| } |
| |
| static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size) |
| { |
| return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END) |
| - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE; |
| } |
| |
| static void virt_region_init(void) |
| { |
| size_t offset, num_bits; |
| |
| /* There are regions where we should never map via |
| * k_mem_map() and k_mem_map_phys_bare(). Mark them as |
| * already allocated so they will never be used. |
| */ |
| |
| if (K_MEM_VM_RESERVED > 0) { |
| /* Mark reserved region at end of virtual address space */ |
| num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE; |
| (void)sys_bitarray_set_region(&virt_region_bitmap, |
| num_bits, 0); |
| } |
| |
| /* Mark all bits up to Z_FREE_VM_START as allocated */ |
| num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START) |
| - POINTER_TO_UINT(K_MEM_VIRT_RAM_START); |
| offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits); |
| num_bits /= CONFIG_MMU_PAGE_SIZE; |
| (void)sys_bitarray_set_region(&virt_region_bitmap, |
| num_bits, offset); |
| |
| virt_region_inited = true; |
| } |
| |
| static void virt_region_free(void *vaddr, size_t size) |
| { |
| size_t offset, num_bits; |
| uint8_t *vaddr_u8 = (uint8_t *)vaddr; |
| |
| if (unlikely(!virt_region_inited)) { |
| virt_region_init(); |
| } |
| |
| #ifndef CONFIG_KERNEL_DIRECT_MAP |
| /* Without the need to support K_MEM_DIRECT_MAP, the region must be |
| * able to be represented in the bitmap. So this case is |
| * simple. |
| */ |
| |
| __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) |
| && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR), |
| "invalid virtual address region %p (%zu)", vaddr_u8, size); |
| if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) |
| && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) { |
| return; |
| } |
| |
| offset = virt_to_bitmap_offset(vaddr, size); |
| num_bits = size / CONFIG_MMU_PAGE_SIZE; |
| (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset); |
| #else /* !CONFIG_KERNEL_DIRECT_MAP */ |
| /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual |
| * memory space, wholly within it, or overlap partially. |
| * So additional processing is needed to make sure we only |
| * mark the pages within the bitmap. |
| */ |
| if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) && |
| (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) || |
| (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) && |
| ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) { |
| uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR); |
| uint8_t *adjusted_end = MIN(vaddr_u8 + size, |
| Z_VIRT_REGION_END_ADDR); |
| size_t adjusted_sz = adjusted_end - adjusted_start; |
| |
| offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz); |
| num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE; |
| (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset); |
| } |
| #endif /* !CONFIG_KERNEL_DIRECT_MAP */ |
| } |
| |
| static void *virt_region_alloc(size_t size, size_t align) |
| { |
| uintptr_t dest_addr; |
| size_t alloc_size; |
| size_t offset; |
| size_t num_bits; |
| int ret; |
| |
| if (unlikely(!virt_region_inited)) { |
| virt_region_init(); |
| } |
| |
| /* Possibly request more pages to ensure we can get an aligned virtual address */ |
| num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE; |
| alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE; |
| ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset); |
| if (ret != 0) { |
| LOG_ERR("insufficient virtual address space (requested %zu)", |
| size); |
| return NULL; |
| } |
| |
| /* Remember that bit #0 in bitmap corresponds to the highest |
| * virtual address. So here we need to go downwards (backwards?) |
| * to get the starting address of the allocated region. |
| */ |
| dest_addr = virt_from_bitmap_offset(offset, alloc_size); |
| |
| if (alloc_size > size) { |
| uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align); |
| |
| /* Here is the memory organization when trying to get an aligned |
| * virtual address: |
| * |
| * +--------------+ <- K_MEM_VIRT_RAM_START |
| * | Undefined VM | |
| * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START) |
| * | Mapping for | |
| * | main kernel | |
| * | image | |
| * | | |
| * | | |
| * +--------------+ <- K_MEM_VM_FREE_START |
| * | ... | |
| * +==============+ <- dest_addr |
| * | Unused | |
| * |..............| <- aligned_dest_addr |
| * | | |
| * | Aligned | |
| * | Mapping | |
| * | | |
| * |..............| <- aligned_dest_addr + size |
| * | Unused | |
| * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size |
| * | ... | |
| * +--------------+ |
| * | Mapping | |
| * +--------------+ |
| * | Reserved | |
| * +--------------+ <- K_MEM_VIRT_RAM_END |
| */ |
| |
| /* Free the two unused regions */ |
| virt_region_free(UINT_TO_POINTER(dest_addr), |
| aligned_dest_addr - dest_addr); |
| if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) { |
| virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size), |
| (dest_addr + alloc_size) - (aligned_dest_addr + size)); |
| } |
| |
| dest_addr = aligned_dest_addr; |
| } |
| |
| /* Need to make sure this does not step into kernel memory */ |
| if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) { |
| (void)sys_bitarray_free(&virt_region_bitmap, size, offset); |
| return NULL; |
| } |
| |
| return UINT_TO_POINTER(dest_addr); |
| } |
| |
| /* |
| * Free page frames management |
| * |
| * Call all of these functions with z_mm_lock held. |
| */ |
| |
| /* Linked list of unused and available page frames. |
| * |
| * TODO: This is very simple and treats all free page frames as being equal. |
| * However, there are use-cases to consolidate free pages such that entire |
| * SRAM banks can be switched off to save power, and so obtaining free pages |
| * may require a more complex ontology which prefers page frames in RAM banks |
| * which are still active. |
| * |
| * This implies in the future there may be multiple slists managing physical |
| * pages. Each page frame will still just have one snode link. |
| */ |
| static sys_sflist_t free_page_frame_list; |
| |
| /* Number of unused and available free page frames. |
| * This information may go stale immediately. |
| */ |
| static size_t z_free_page_count; |
| |
| #define PF_ASSERT(pf, expr, fmt, ...) \ |
| __ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \ |
| ##__VA_ARGS__) |
| |
| /* Get an unused page frame. don't care which one, or NULL if there are none */ |
| static struct k_mem_page_frame *free_page_frame_list_get(void) |
| { |
| sys_sfnode_t *node; |
| struct k_mem_page_frame *pf = NULL; |
| |
| node = sys_sflist_get(&free_page_frame_list); |
| if (node != NULL) { |
| z_free_page_count--; |
| pf = CONTAINER_OF(node, struct k_mem_page_frame, node); |
| PF_ASSERT(pf, k_mem_page_frame_is_free(pf), |
| "on free list but not free"); |
| pf->va_and_flags = 0; |
| } |
| |
| return pf; |
| } |
| |
| /* Release a page frame back into the list of free pages */ |
| static void free_page_frame_list_put(struct k_mem_page_frame *pf) |
| { |
| PF_ASSERT(pf, k_mem_page_frame_is_available(pf), |
| "unavailable page put on free list"); |
| |
| sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE); |
| sys_sflist_append(&free_page_frame_list, &pf->node); |
| z_free_page_count++; |
| } |
| |
| static void free_page_frame_list_init(void) |
| { |
| sys_sflist_init(&free_page_frame_list); |
| } |
| |
| static void page_frame_free_locked(struct k_mem_page_frame *pf) |
| { |
| pf->va_and_flags = 0; |
| free_page_frame_list_put(pf); |
| } |
| |
| /* |
| * Memory Mapping |
| */ |
| |
| /* Called after the frame is mapped in the arch layer, to update our |
| * local ontology (and do some assertions while we're at it) |
| */ |
| static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr) |
| { |
| PF_ASSERT(pf, !k_mem_page_frame_is_free(pf), |
| "attempted to map a page frame on the free list"); |
| PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf), |
| "attempted to map a reserved page frame"); |
| |
| /* We do allow multiple mappings for pinned page frames |
| * since we will never need to reverse map them. |
| * This is uncommon, use-cases are for things like the |
| * Zephyr equivalent of VSDOs |
| */ |
| PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf), |
| "non-pinned and already mapped to %p", |
| k_mem_page_frame_to_virt(pf)); |
| |
| uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1; |
| uintptr_t va = (uintptr_t)addr & ~flags_mask; |
| |
| pf->va_and_flags &= flags_mask; |
| pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED; |
| } |
| |
| /* LCOV_EXCL_START */ |
| /* Go through page frames to find the physical address mapped |
| * by a virtual address. |
| * |
| * @param[in] virt Virtual Address |
| * @param[out] phys Physical address mapped to the input virtual address |
| * if such mapping exists. |
| * |
| * @retval 0 if mapping is found and valid |
| * @retval -EFAULT if virtual address is not mapped |
| */ |
| static int virt_to_page_frame(void *virt, uintptr_t *phys) |
| { |
| uintptr_t paddr; |
| struct k_mem_page_frame *pf; |
| int ret = -EFAULT; |
| |
| K_MEM_PAGE_FRAME_FOREACH(paddr, pf) { |
| if (k_mem_page_frame_is_mapped(pf)) { |
| if (virt == k_mem_page_frame_to_virt(pf)) { |
| ret = 0; |
| if (phys != NULL) { |
| *phys = k_mem_page_frame_to_phys(pf); |
| } |
| break; |
| } |
| } |
| } |
| |
| return ret; |
| } |
| /* LCOV_EXCL_STOP */ |
| |
| __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int); |
| |
| #ifdef CONFIG_DEMAND_PAGING |
| static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr, |
| bool page_in, uintptr_t *location_ptr); |
| |
| static inline void do_backing_store_page_in(uintptr_t location); |
| static inline void do_backing_store_page_out(uintptr_t location); |
| #endif /* CONFIG_DEMAND_PAGING */ |
| |
| /* Allocate a free page frame, and map it to a specified virtual address |
| * |
| * TODO: Add optional support for copy-on-write mappings to a zero page instead |
| * of allocating, in which case page frames will be allocated lazily as |
| * the mappings to the zero page get touched. This will avoid expensive |
| * page-ins as memory is mapped and physical RAM or backing store storage will |
| * not be used if the mapped memory is unused. The cost is an empty physical |
| * page of zeroes. |
| */ |
| static int map_anon_page(void *addr, uint32_t flags) |
| { |
| struct k_mem_page_frame *pf; |
| uintptr_t phys; |
| bool lock = (flags & K_MEM_MAP_LOCK) != 0U; |
| |
| pf = free_page_frame_list_get(); |
| if (pf == NULL) { |
| #ifdef CONFIG_DEMAND_PAGING |
| uintptr_t location; |
| bool dirty; |
| int ret; |
| |
| pf = k_mem_paging_eviction_select(&dirty); |
| __ASSERT(pf != NULL, "failed to get a page frame"); |
| LOG_DBG("evicting %p at 0x%lx", |
| k_mem_page_frame_to_virt(pf), |
| k_mem_page_frame_to_phys(pf)); |
| ret = page_frame_prepare_locked(pf, &dirty, false, &location); |
| if (ret != 0) { |
| return -ENOMEM; |
| } |
| if (dirty) { |
| do_backing_store_page_out(location); |
| } |
| pf->va_and_flags = 0; |
| #else |
| return -ENOMEM; |
| #endif /* CONFIG_DEMAND_PAGING */ |
| } |
| |
| phys = k_mem_page_frame_to_phys(pf); |
| arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB); |
| |
| if (lock) { |
| k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); |
| } |
| frame_mapped_set(pf, addr); |
| #ifdef CONFIG_DEMAND_PAGING |
| if (!lock) { |
| k_mem_paging_eviction_add(pf); |
| } |
| #endif |
| |
| LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys); |
| |
| return 0; |
| } |
| |
| void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon) |
| { |
| uint8_t *dst; |
| size_t total_size; |
| int ret; |
| k_spinlock_key_t key; |
| uint8_t *pos; |
| bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U; |
| |
| __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) && |
| ((flags & K_MEM_MAP_UNINIT) != 0U)), |
| "user access to anonymous uninitialized pages is forbidden"); |
| __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U, |
| "unaligned size %zu passed to %s", size, __func__); |
| __ASSERT(size != 0, "zero sized memory mapping"); |
| __ASSERT(!is_anon || (is_anon && page_frames_initialized), |
| "%s called too early", __func__); |
| __ASSERT((flags & K_MEM_CACHE_MASK) == 0U, |
| "%s does not support explicit cache settings", __func__); |
| |
| CHECKIF(size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) { |
| LOG_ERR("too large size %zu passed to %s", size, __func__); |
| return NULL; |
| } |
| |
| key = k_spin_lock(&z_mm_lock); |
| |
| /* Need extra for the guard pages (before and after) which we |
| * won't map. |
| */ |
| total_size = size + (CONFIG_MMU_PAGE_SIZE * 2); |
| |
| dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE); |
| if (dst == NULL) { |
| /* Address space has no free region */ |
| goto out; |
| } |
| |
| /* Unmap both guard pages to make sure accessing them |
| * will generate fault. |
| */ |
| arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE); |
| arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size, |
| CONFIG_MMU_PAGE_SIZE); |
| |
| /* Skip over the "before" guard page in returned address. */ |
| dst += CONFIG_MMU_PAGE_SIZE; |
| |
| if (is_anon) { |
| /* Mapping from annoymous memory */ |
| VIRT_FOREACH(dst, size, pos) { |
| ret = map_anon_page(pos, flags); |
| |
| if (ret != 0) { |
| /* TODO: call k_mem_unmap(dst, pos - dst) when |
| * implemented in #28990 and release any guard virtual |
| * page as well. |
| */ |
| dst = NULL; |
| goto out; |
| } |
| } |
| } else { |
| /* Mapping known physical memory. |
| * |
| * arch_mem_map() is a void function and does not return |
| * anything. Arch code usually uses ASSERT() to catch |
| * mapping errors. Assume this works correctly for now. |
| */ |
| arch_mem_map(dst, phys, size, flags); |
| } |
| |
| if (!uninit) { |
| /* If we later implement mappings to a copy-on-write |
| * zero page, won't need this step |
| */ |
| memset(dst, 0, size); |
| } |
| |
| out: |
| k_spin_unlock(&z_mm_lock, key); |
| return dst; |
| } |
| |
| void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon) |
| { |
| uintptr_t phys; |
| uint8_t *pos; |
| struct k_mem_page_frame *pf; |
| k_spinlock_key_t key; |
| size_t total_size; |
| int ret; |
| |
| /* Need space for the "before" guard page */ |
| __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE); |
| |
| /* Make sure address range is still valid after accounting |
| * for two guard pages. |
| */ |
| pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE; |
| k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2)); |
| |
| key = k_spin_lock(&z_mm_lock); |
| |
| /* Check if both guard pages are unmapped. |
| * Bail if not, as this is probably a region not mapped |
| * using k_mem_map(). |
| */ |
| pos = addr; |
| ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL); |
| if (ret == 0) { |
| __ASSERT(ret == 0, |
| "%s: cannot find preceding guard page for (%p, %zu)", |
| __func__, addr, size); |
| goto out; |
| } |
| |
| ret = arch_page_phys_get(pos + size, NULL); |
| if (ret == 0) { |
| __ASSERT(ret == 0, |
| "%s: cannot find succeeding guard page for (%p, %zu)", |
| __func__, addr, size); |
| goto out; |
| } |
| |
| if (is_anon) { |
| /* Unmapping anonymous memory */ |
| VIRT_FOREACH(addr, size, pos) { |
| #ifdef CONFIG_DEMAND_PAGING |
| enum arch_page_location status; |
| uintptr_t location; |
| |
| status = arch_page_location_get(pos, &location); |
| switch (status) { |
| case ARCH_PAGE_LOCATION_PAGED_OUT: |
| /* |
| * No pf is associated with this mapping. |
| * Simply get rid of the MMU entry and free |
| * corresponding backing store. |
| */ |
| arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE); |
| k_mem_paging_backing_store_location_free(location); |
| continue; |
| case ARCH_PAGE_LOCATION_PAGED_IN: |
| /* |
| * The page is in memory but it may not be |
| * accessible in order to manage tracking |
| * of the ARCH_DATA_PAGE_ACCESSED flag |
| * meaning arch_page_phys_get() could fail. |
| * Still, we know the actual phys address. |
| */ |
| phys = location; |
| ret = 0; |
| break; |
| default: |
| ret = arch_page_phys_get(pos, &phys); |
| break; |
| } |
| #else |
| ret = arch_page_phys_get(pos, &phys); |
| #endif |
| __ASSERT(ret == 0, |
| "%s: cannot unmap an unmapped address %p", |
| __func__, pos); |
| if (ret != 0) { |
| /* Found an address not mapped. Do not continue. */ |
| goto out; |
| } |
| |
| __ASSERT(k_mem_is_page_frame(phys), |
| "%s: 0x%lx is not a page frame", __func__, phys); |
| if (!k_mem_is_page_frame(phys)) { |
| /* Physical address has no corresponding page frame |
| * description in the page frame array. |
| * This should not happen. Do not continue. |
| */ |
| goto out; |
| } |
| |
| /* Grab the corresponding page frame from physical address */ |
| pf = k_mem_phys_to_page_frame(phys); |
| |
| __ASSERT(k_mem_page_frame_is_mapped(pf), |
| "%s: 0x%lx is not a mapped page frame", __func__, phys); |
| if (!k_mem_page_frame_is_mapped(pf)) { |
| /* Page frame is not marked mapped. |
| * This should not happen. Do not continue. |
| */ |
| goto out; |
| } |
| |
| arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE); |
| #ifdef CONFIG_DEMAND_PAGING |
| if (!k_mem_page_frame_is_pinned(pf)) { |
| k_mem_paging_eviction_remove(pf); |
| } |
| #endif |
| |
| /* Put the page frame back into free list */ |
| page_frame_free_locked(pf); |
| } |
| } else { |
| /* |
| * Unmapping previous mapped memory with specific physical address. |
| * |
| * Note that we don't have to unmap the guard pages, as they should |
| * have been unmapped. We just need to unmapped the in-between |
| * region [addr, (addr + size)). |
| */ |
| arch_mem_unmap(addr, size); |
| } |
| |
| /* There are guard pages just before and after the mapped |
| * region. So we also need to free them from the bitmap. |
| */ |
| pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE; |
| total_size = size + (CONFIG_MMU_PAGE_SIZE * 2); |
| virt_region_free(pos, total_size); |
| |
| out: |
| k_spin_unlock(&z_mm_lock, key); |
| } |
| |
| size_t k_mem_free_get(void) |
| { |
| size_t ret; |
| k_spinlock_key_t key; |
| |
| __ASSERT(page_frames_initialized, "%s called too early", __func__); |
| |
| key = k_spin_lock(&z_mm_lock); |
| #ifdef CONFIG_DEMAND_PAGING |
| if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) { |
| ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE; |
| } else { |
| ret = 0; |
| } |
| #else |
| ret = z_free_page_count; |
| #endif /* CONFIG_DEMAND_PAGING */ |
| k_spin_unlock(&z_mm_lock, key); |
| |
| return ret * (size_t)CONFIG_MMU_PAGE_SIZE; |
| } |
| |
| /* Get the default virtual region alignment, here the default MMU page size |
| * |
| * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE |
| * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE |
| * |
| * @retval alignment to apply on the virtual address of this region |
| */ |
| static size_t virt_region_align(uintptr_t phys, size_t size) |
| { |
| ARG_UNUSED(phys); |
| ARG_UNUSED(size); |
| |
| return CONFIG_MMU_PAGE_SIZE; |
| } |
| |
| __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t); |
| |
| /* This may be called from arch early boot code before z_cstart() is invoked. |
| * Data will be copied and BSS zeroed, but this must not rely on any |
| * initialization functions being called prior to work correctly. |
| */ |
| void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags) |
| { |
| uintptr_t aligned_phys, addr_offset; |
| size_t aligned_size, align_boundary; |
| k_spinlock_key_t key; |
| uint8_t *dest_addr; |
| size_t num_bits; |
| size_t offset; |
| |
| #ifndef CONFIG_KERNEL_DIRECT_MAP |
| __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled"); |
| #endif /* CONFIG_KERNEL_DIRECT_MAP */ |
| addr_offset = k_mem_region_align(&aligned_phys, &aligned_size, |
| phys, size, |
| CONFIG_MMU_PAGE_SIZE); |
| __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys); |
| __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)), |
| "wraparound for physical address 0x%lx (size %zu)", |
| aligned_phys, aligned_size); |
| |
| align_boundary = arch_virt_region_align(aligned_phys, aligned_size); |
| |
| key = k_spin_lock(&z_mm_lock); |
| |
| if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) && |
| (flags & K_MEM_DIRECT_MAP)) { |
| dest_addr = (uint8_t *)aligned_phys; |
| |
| /* Mark the region of virtual memory bitmap as used |
| * if the region overlaps the virtual memory space. |
| * |
| * Basically if either end of region is within |
| * virtual memory space, we need to mark the bits. |
| */ |
| |
| if (IN_RANGE(aligned_phys, |
| (uintptr_t)K_MEM_VIRT_RAM_START, |
| (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) || |
| IN_RANGE(aligned_phys + aligned_size - 1, |
| (uintptr_t)K_MEM_VIRT_RAM_START, |
| (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) { |
| uint8_t *adjusted_start = MAX(dest_addr, K_MEM_VIRT_RAM_START); |
| uint8_t *adjusted_end = MIN(dest_addr + aligned_size, |
| K_MEM_VIRT_RAM_END); |
| size_t adjusted_sz = adjusted_end - adjusted_start; |
| |
| num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE; |
| offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz); |
| if (sys_bitarray_test_and_set_region( |
| &virt_region_bitmap, num_bits, offset, true)) |
| goto fail; |
| } |
| } else { |
| /* Obtain an appropriately sized chunk of virtual memory */ |
| dest_addr = virt_region_alloc(aligned_size, align_boundary); |
| if (!dest_addr) { |
| goto fail; |
| } |
| } |
| |
| /* If this fails there's something amiss with virt_region_get */ |
| __ASSERT((uintptr_t)dest_addr < |
| ((uintptr_t)dest_addr + (size - 1)), |
| "wraparound for virtual address %p (size %zu)", |
| dest_addr, size); |
| |
| LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr, |
| aligned_phys, aligned_size, flags, addr_offset); |
| |
| arch_mem_map(dest_addr, aligned_phys, aligned_size, flags); |
| k_spin_unlock(&z_mm_lock, key); |
| |
| *virt_ptr = dest_addr + addr_offset; |
| return; |
| fail: |
| /* May re-visit this in the future, but for now running out of |
| * virtual address space or failing the arch_mem_map() call is |
| * an unrecoverable situation. |
| * |
| * Other problems not related to resource exhaustion we leave as |
| * assertions since they are clearly programming mistakes. |
| */ |
| LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed", |
| phys, size, flags); |
| k_panic(); |
| } |
| |
| void k_mem_unmap_phys_bare(uint8_t *virt, size_t size) |
| { |
| uintptr_t aligned_virt, addr_offset; |
| size_t aligned_size; |
| k_spinlock_key_t key; |
| |
| addr_offset = k_mem_region_align(&aligned_virt, &aligned_size, |
| POINTER_TO_UINT(virt), size, |
| CONFIG_MMU_PAGE_SIZE); |
| __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt); |
| __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)), |
| "wraparound for virtual address 0x%lx (size %zu)", |
| aligned_virt, aligned_size); |
| |
| key = k_spin_lock(&z_mm_lock); |
| |
| LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu", |
| aligned_virt, aligned_size, addr_offset); |
| |
| arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size); |
| virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size); |
| k_spin_unlock(&z_mm_lock, key); |
| } |
| |
| /* |
| * Miscellaneous |
| */ |
| |
| size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size, |
| uintptr_t addr, size_t size, size_t align) |
| { |
| size_t addr_offset; |
| |
| /* The actual mapped region must be page-aligned. Round down the |
| * physical address and pad the region size appropriately |
| */ |
| *aligned_addr = ROUND_DOWN(addr, align); |
| addr_offset = addr - *aligned_addr; |
| *aligned_size = ROUND_UP(size + addr_offset, align); |
| |
| return addr_offset; |
| } |
| |
| #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION) |
| static void mark_linker_section_pinned(void *start_addr, void *end_addr, |
| bool pin) |
| { |
| struct k_mem_page_frame *pf; |
| uint8_t *addr; |
| |
| uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr), |
| CONFIG_MMU_PAGE_SIZE); |
| uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr), |
| CONFIG_MMU_PAGE_SIZE); |
| size_t pinned_size = pinned_end - pinned_start; |
| |
| VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr) |
| { |
| pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr)); |
| frame_mapped_set(pf, addr); |
| |
| if (pin) { |
| k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); |
| } else { |
| k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED); |
| #ifdef CONFIG_DEMAND_PAGING |
| if (k_mem_page_frame_is_evictable(pf)) { |
| k_mem_paging_eviction_add(pf); |
| } |
| #endif |
| } |
| } |
| } |
| #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */ |
| |
| void z_mem_manage_init(void) |
| { |
| uintptr_t phys; |
| uint8_t *addr; |
| struct k_mem_page_frame *pf; |
| k_spinlock_key_t key = k_spin_lock(&z_mm_lock); |
| |
| free_page_frame_list_init(); |
| |
| ARG_UNUSED(addr); |
| |
| #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES |
| /* If some page frames are unavailable for use as memory, arch |
| * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags |
| */ |
| arch_reserved_pages_update(); |
| #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */ |
| |
| #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT |
| /* All pages composing the Zephyr image are mapped at boot in a |
| * predictable way. This can change at runtime. |
| */ |
| VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr) |
| { |
| pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr)); |
| frame_mapped_set(pf, addr); |
| |
| /* TODO: for now we pin the whole Zephyr image. Demand paging |
| * currently tested with anonymously-mapped pages which are not |
| * pinned. |
| * |
| * We will need to setup linker regions for a subset of kernel |
| * code/data pages which are pinned in memory and |
| * may not be evicted. This will contain critical CPU data |
| * structures, and any code used to perform page fault |
| * handling, page-ins, etc. |
| */ |
| k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); |
| } |
| #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */ |
| |
| #ifdef CONFIG_LINKER_USE_BOOT_SECTION |
| /* Pin the boot section to prevent it from being swapped out during |
| * boot process. Will be un-pinned once boot process completes. |
| */ |
| mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true); |
| #endif /* CONFIG_LINKER_USE_BOOT_SECTION */ |
| |
| #ifdef CONFIG_LINKER_USE_PINNED_SECTION |
| /* Pin the page frames correspondng to the pinned symbols */ |
| mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true); |
| #endif /* CONFIG_LINKER_USE_PINNED_SECTION */ |
| |
| /* Any remaining pages that aren't mapped, reserved, or pinned get |
| * added to the free pages list |
| */ |
| K_MEM_PAGE_FRAME_FOREACH(phys, pf) { |
| if (k_mem_page_frame_is_available(pf)) { |
| free_page_frame_list_put(pf); |
| } |
| } |
| LOG_DBG("free page frames: %zu", z_free_page_count); |
| |
| #ifdef CONFIG_DEMAND_PAGING |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| z_paging_histogram_init(); |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| k_mem_paging_backing_store_init(); |
| k_mem_paging_eviction_init(); |
| /* start tracking evictable page installed above if any */ |
| K_MEM_PAGE_FRAME_FOREACH(phys, pf) { |
| if (k_mem_page_frame_is_evictable(pf)) { |
| k_mem_paging_eviction_add(pf); |
| } |
| } |
| #endif /* CONFIG_DEMAND_PAGING */ |
| #if __ASSERT_ON |
| page_frames_initialized = true; |
| #endif |
| k_spin_unlock(&z_mm_lock, key); |
| |
| #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT |
| /* If BSS section is not present in memory at boot, |
| * it would not have been cleared. This needs to be |
| * done now since paging mechanism has been initialized |
| * and the BSS pages can be brought into physical |
| * memory to be cleared. |
| */ |
| z_bss_zero(); |
| #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */ |
| } |
| |
| void z_mem_manage_boot_finish(void) |
| { |
| #ifdef CONFIG_LINKER_USE_BOOT_SECTION |
| /* At the end of boot process, unpin the boot sections |
| * as they don't need to be in memory all the time anymore. |
| */ |
| mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false); |
| #endif /* CONFIG_LINKER_USE_BOOT_SECTION */ |
| } |
| |
| #ifdef CONFIG_DEMAND_PAGING |
| |
| #ifdef CONFIG_DEMAND_PAGING_STATS |
| struct k_mem_paging_stats_t paging_stats; |
| extern struct k_mem_paging_histogram_t z_paging_histogram_eviction; |
| extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in; |
| extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out; |
| #endif /* CONFIG_DEMAND_PAGING_STATS */ |
| |
| static inline void do_backing_store_page_in(uintptr_t location) |
| { |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| uint32_t time_diff; |
| |
| #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS |
| timing_t time_start, time_end; |
| |
| time_start = timing_counter_get(); |
| #else |
| uint32_t time_start; |
| |
| time_start = k_cycle_get_32(); |
| #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| |
| k_mem_paging_backing_store_page_in(location); |
| |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS |
| time_end = timing_counter_get(); |
| time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end); |
| #else |
| time_diff = k_cycle_get_32() - time_start; |
| #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ |
| |
| z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in, |
| time_diff); |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| } |
| |
| static inline void do_backing_store_page_out(uintptr_t location) |
| { |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| uint32_t time_diff; |
| |
| #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS |
| timing_t time_start, time_end; |
| |
| time_start = timing_counter_get(); |
| #else |
| uint32_t time_start; |
| |
| time_start = k_cycle_get_32(); |
| #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| |
| k_mem_paging_backing_store_page_out(location); |
| |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS |
| time_end = timing_counter_get(); |
| time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end); |
| #else |
| time_diff = k_cycle_get_32() - time_start; |
| #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ |
| |
| z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out, |
| time_diff); |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| } |
| |
| /* Current implementation relies on interrupt locking to any prevent page table |
| * access, which falls over if other CPUs are active. Addressing this is not |
| * as simple as using spinlocks as regular memory reads/writes constitute |
| * "access" in this sense. |
| * |
| * Current needs for demand paging are on uniprocessor systems. |
| */ |
| BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP)); |
| |
| static void virt_region_foreach(void *addr, size_t size, |
| void (*func)(void *)) |
| { |
| k_mem_assert_virtual_region(addr, size); |
| |
| for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) { |
| func((uint8_t *)addr + offset); |
| } |
| } |
| |
| /* |
| * Perform some preparatory steps before paging out. The provided page frame |
| * must be evicted to the backing store immediately after this is called |
| * with a call to k_mem_paging_backing_store_page_out() if it contains |
| * a data page. |
| * |
| * - Map page frame to scratch area if requested. This always is true if we're |
| * doing a page fault, but is only set on manual evictions if the page is |
| * dirty. |
| * - If mapped: |
| * - obtain backing store location and populate location parameter |
| * - Update page tables with location |
| * - Mark page frame as busy |
| * |
| * Returns -ENOMEM if the backing store is full |
| */ |
| static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr, |
| bool page_fault, uintptr_t *location_ptr) |
| { |
| uintptr_t phys; |
| int ret; |
| bool dirty = *dirty_ptr; |
| |
| phys = k_mem_page_frame_to_phys(pf); |
| __ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned", |
| phys); |
| |
| /* If the backing store doesn't have a copy of the page, even if it |
| * wasn't modified, treat as dirty. This can happen for a few |
| * reasons: |
| * 1) Page has never been swapped out before, and the backing store |
| * wasn't pre-populated with this data page. |
| * 2) Page was swapped out before, but the page contents were not |
| * preserved after swapping back in. |
| * 3) Page contents were preserved when swapped back in, but were later |
| * evicted from the backing store to make room for other evicted |
| * pages. |
| */ |
| if (k_mem_page_frame_is_mapped(pf)) { |
| dirty = dirty || !k_mem_page_frame_is_backed(pf); |
| } |
| |
| if (dirty || page_fault) { |
| arch_mem_scratch(phys); |
| } |
| |
| if (k_mem_page_frame_is_mapped(pf)) { |
| ret = k_mem_paging_backing_store_location_get(pf, location_ptr, |
| page_fault); |
| if (ret != 0) { |
| LOG_ERR("out of backing store memory"); |
| return -ENOMEM; |
| } |
| arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr); |
| k_mem_paging_eviction_remove(pf); |
| } else { |
| /* Shouldn't happen unless this function is mis-used */ |
| __ASSERT(!dirty, "un-mapped page determined to be dirty"); |
| } |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| /* Mark as busy so that k_mem_page_frame_is_evictable() returns false */ |
| __ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy", |
| phys); |
| k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| /* Update dirty parameter, since we set to true if it wasn't backed |
| * even if otherwise clean |
| */ |
| *dirty_ptr = dirty; |
| |
| return 0; |
| } |
| |
| static int do_mem_evict(void *addr) |
| { |
| bool dirty; |
| struct k_mem_page_frame *pf; |
| uintptr_t location; |
| int key, ret; |
| uintptr_t flags, phys; |
| |
| #if CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| __ASSERT(!k_is_in_isr(), |
| "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", |
| __func__); |
| k_sched_lock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| key = irq_lock(); |
| flags = arch_page_info_get(addr, &phys, false); |
| __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, |
| "address %p isn't mapped", addr); |
| if ((flags & ARCH_DATA_PAGE_LOADED) == 0) { |
| /* Un-mapped or already evicted. Nothing to do */ |
| ret = 0; |
| goto out; |
| } |
| |
| dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0; |
| pf = k_mem_phys_to_page_frame(phys); |
| __ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch"); |
| ret = page_frame_prepare_locked(pf, &dirty, false, &location); |
| if (ret != 0) { |
| goto out; |
| } |
| |
| __ASSERT(ret == 0, "failed to prepare page frame"); |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| irq_unlock(key); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| if (dirty) { |
| do_backing_store_page_out(location); |
| } |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| key = irq_lock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| page_frame_free_locked(pf); |
| out: |
| irq_unlock(key); |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| k_sched_unlock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| return ret; |
| } |
| |
| int k_mem_page_out(void *addr, size_t size) |
| { |
| __ASSERT(page_frames_initialized, "%s called on %p too early", __func__, |
| addr); |
| k_mem_assert_virtual_region(addr, size); |
| |
| for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) { |
| void *pos = (uint8_t *)addr + offset; |
| int ret; |
| |
| ret = do_mem_evict(pos); |
| if (ret != 0) { |
| return ret; |
| } |
| } |
| |
| return 0; |
| } |
| |
| int k_mem_page_frame_evict(uintptr_t phys) |
| { |
| int key, ret; |
| struct k_mem_page_frame *pf; |
| bool dirty; |
| uintptr_t flags; |
| uintptr_t location; |
| |
| __ASSERT(page_frames_initialized, "%s called on 0x%lx too early", |
| __func__, phys); |
| |
| /* Implementation is similar to do_page_fault() except there is no |
| * data page to page-in, see comments in that function. |
| */ |
| |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| __ASSERT(!k_is_in_isr(), |
| "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", |
| __func__); |
| k_sched_lock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| key = irq_lock(); |
| pf = k_mem_phys_to_page_frame(phys); |
| if (!k_mem_page_frame_is_mapped(pf)) { |
| /* Nothing to do, free page */ |
| ret = 0; |
| goto out; |
| } |
| flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false); |
| /* Shouldn't ever happen */ |
| __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded"); |
| dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0; |
| ret = page_frame_prepare_locked(pf, &dirty, false, &location); |
| if (ret != 0) { |
| goto out; |
| } |
| |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| irq_unlock(key); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| if (dirty) { |
| do_backing_store_page_out(location); |
| } |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| key = irq_lock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| page_frame_free_locked(pf); |
| out: |
| irq_unlock(key); |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| k_sched_unlock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| return ret; |
| } |
| |
| static inline void paging_stats_faults_inc(struct k_thread *faulting_thread, |
| int key) |
| { |
| #ifdef CONFIG_DEMAND_PAGING_STATS |
| bool is_irq_unlocked = arch_irq_unlocked(key); |
| |
| paging_stats.pagefaults.cnt++; |
| |
| if (is_irq_unlocked) { |
| paging_stats.pagefaults.irq_unlocked++; |
| } else { |
| paging_stats.pagefaults.irq_locked++; |
| } |
| |
| #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS |
| faulting_thread->paging_stats.pagefaults.cnt++; |
| |
| if (is_irq_unlocked) { |
| faulting_thread->paging_stats.pagefaults.irq_unlocked++; |
| } else { |
| faulting_thread->paging_stats.pagefaults.irq_locked++; |
| } |
| #else |
| ARG_UNUSED(faulting_thread); |
| #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ |
| |
| #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| if (k_is_in_isr()) { |
| paging_stats.pagefaults.in_isr++; |
| |
| #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS |
| faulting_thread->paging_stats.pagefaults.in_isr++; |
| #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ |
| } |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| #endif /* CONFIG_DEMAND_PAGING_STATS */ |
| } |
| |
| static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread, |
| bool dirty) |
| { |
| #ifdef CONFIG_DEMAND_PAGING_STATS |
| if (dirty) { |
| paging_stats.eviction.dirty++; |
| } else { |
| paging_stats.eviction.clean++; |
| } |
| #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS |
| if (dirty) { |
| faulting_thread->paging_stats.eviction.dirty++; |
| } else { |
| faulting_thread->paging_stats.eviction.clean++; |
| } |
| #else |
| ARG_UNUSED(faulting_thread); |
| #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ |
| #endif /* CONFIG_DEMAND_PAGING_STATS */ |
| } |
| |
| static inline struct k_mem_page_frame *do_eviction_select(bool *dirty) |
| { |
| struct k_mem_page_frame *pf; |
| |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| uint32_t time_diff; |
| |
| #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS |
| timing_t time_start, time_end; |
| |
| time_start = timing_counter_get(); |
| #else |
| uint32_t time_start; |
| |
| time_start = k_cycle_get_32(); |
| #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| |
| pf = k_mem_paging_eviction_select(dirty); |
| |
| #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM |
| #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS |
| time_end = timing_counter_get(); |
| time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end); |
| #else |
| time_diff = k_cycle_get_32() - time_start; |
| #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ |
| |
| z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff); |
| #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ |
| |
| return pf; |
| } |
| |
| static bool do_page_fault(void *addr, bool pin) |
| { |
| struct k_mem_page_frame *pf; |
| int key, ret; |
| uintptr_t page_in_location, page_out_location; |
| enum arch_page_location status; |
| bool result; |
| bool dirty = false; |
| struct k_thread *faulting_thread = _current_cpu->current; |
| |
| __ASSERT(page_frames_initialized, "page fault at %p happened too early", |
| addr); |
| |
| LOG_DBG("page fault at %p", addr); |
| |
| /* |
| * TODO: Add performance accounting: |
| * - k_mem_paging_eviction_select() metrics |
| * * periodic timer execution time histogram (if implemented) |
| */ |
| |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| /* We lock the scheduler so that other threads are never scheduled |
| * during the page-in/out operation. |
| * |
| * We do however re-enable interrupts during the page-in/page-out |
| * operation if and only if interrupts were enabled when the exception |
| * was taken; in this configuration page faults in an ISR are a bug; |
| * all their code/data must be pinned. |
| * |
| * If interrupts were disabled when the exception was taken, the |
| * arch code is responsible for keeping them that way when entering |
| * this function. |
| * |
| * If this is not enabled, then interrupts are always locked for the |
| * entire operation. This is far worse for system interrupt latency |
| * but requires less pinned pages and ISRs may also take page faults. |
| * |
| * Support for allowing k_mem_paging_backing_store_page_out() and |
| * k_mem_paging_backing_store_page_in() to also sleep and allow |
| * other threads to run (such as in the case where the transfer is |
| * async DMA) is not implemented. Even if limited to thread context, |
| * arbitrary memory access triggering exceptions that put a thread to |
| * sleep on a contended page fault operation will break scheduling |
| * assumptions of cooperative threads or threads that implement |
| * crticial sections with spinlocks or disabling IRQs. |
| */ |
| k_sched_lock(); |
| __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden"); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| |
| key = irq_lock(); |
| status = arch_page_location_get(addr, &page_in_location); |
| if (status == ARCH_PAGE_LOCATION_BAD) { |
| /* Return false to treat as a fatal error */ |
| result = false; |
| goto out; |
| } |
| result = true; |
| |
| if (status == ARCH_PAGE_LOCATION_PAGED_IN) { |
| if (pin) { |
| /* It's a physical memory address */ |
| uintptr_t phys = page_in_location; |
| |
| pf = k_mem_phys_to_page_frame(phys); |
| if (!k_mem_page_frame_is_pinned(pf)) { |
| k_mem_paging_eviction_remove(pf); |
| k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); |
| } |
| } |
| |
| /* This if-block is to pin the page if it is |
| * already present in physical memory. There is |
| * no need to go through the following code to |
| * pull in the data pages. So skip to the end. |
| */ |
| goto out; |
| } |
| __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT, |
| "unexpected status value %d", status); |
| |
| paging_stats_faults_inc(faulting_thread, key); |
| |
| pf = free_page_frame_list_get(); |
| if (pf == NULL) { |
| /* Need to evict a page frame */ |
| pf = do_eviction_select(&dirty); |
| __ASSERT(pf != NULL, "failed to get a page frame"); |
| LOG_DBG("evicting %p at 0x%lx", |
| k_mem_page_frame_to_virt(pf), |
| k_mem_page_frame_to_phys(pf)); |
| |
| paging_stats_eviction_inc(faulting_thread, dirty); |
| } |
| ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location); |
| __ASSERT(ret == 0, "failed to prepare page frame"); |
| |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| irq_unlock(key); |
| /* Interrupts are now unlocked if they were not locked when we entered |
| * this function, and we may service ISRs. The scheduler is still |
| * locked. |
| */ |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| if (dirty) { |
| do_backing_store_page_out(page_out_location); |
| } |
| do_backing_store_page_in(page_in_location); |
| |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| key = irq_lock(); |
| k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED); |
| frame_mapped_set(pf, addr); |
| if (pin) { |
| k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); |
| } |
| |
| arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf)); |
| k_mem_paging_backing_store_page_finalize(pf, page_in_location); |
| if (!pin) { |
| k_mem_paging_eviction_add(pf); |
| } |
| out: |
| irq_unlock(key); |
| #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ |
| k_sched_unlock(); |
| #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ |
| |
| return result; |
| } |
| |
| static void do_page_in(void *addr) |
| { |
| bool ret; |
| |
| ret = do_page_fault(addr, false); |
| __ASSERT(ret, "unmapped memory address %p", addr); |
| (void)ret; |
| } |
| |
| void k_mem_page_in(void *addr, size_t size) |
| { |
| __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(), |
| "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled", |
| __func__); |
| virt_region_foreach(addr, size, do_page_in); |
| } |
| |
| static void do_mem_pin(void *addr) |
| { |
| bool ret; |
| |
| ret = do_page_fault(addr, true); |
| __ASSERT(ret, "unmapped memory address %p", addr); |
| (void)ret; |
| } |
| |
| void k_mem_pin(void *addr, size_t size) |
| { |
| __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(), |
| "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled", |
| __func__); |
| virt_region_foreach(addr, size, do_mem_pin); |
| } |
| |
| bool k_mem_page_fault(void *addr) |
| { |
| return do_page_fault(addr, false); |
| } |
| |
| static void do_mem_unpin(void *addr) |
| { |
| struct k_mem_page_frame *pf; |
| unsigned int key; |
| uintptr_t flags, phys; |
| |
| key = irq_lock(); |
| flags = arch_page_info_get(addr, &phys, false); |
| __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, |
| "invalid data page at %p", addr); |
| if ((flags & ARCH_DATA_PAGE_LOADED) != 0) { |
| pf = k_mem_phys_to_page_frame(phys); |
| if (k_mem_page_frame_is_pinned(pf)) { |
| k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED); |
| k_mem_paging_eviction_add(pf); |
| } |
| } |
| irq_unlock(key); |
| } |
| |
| void k_mem_unpin(void *addr, size_t size) |
| { |
| __ASSERT(page_frames_initialized, "%s called on %p too early", __func__, |
| addr); |
| virt_region_foreach(addr, size, do_mem_unpin); |
| } |
| |
| #endif /* CONFIG_DEMAND_PAGING */ |