blob: b03ff978786a48339b2fe0214b8a4c22bcedcb6f [file] [log] [blame]
Andrew Boie06cf6d22020-06-26 16:17:00 -07001/*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
Krzysztof Chruscinski3ed80832020-11-26 19:32:34 +01009#include <stdint.h>
10#include <kernel_arch_interface.h>
Gerard Marull-Paretascffefc82022-05-06 11:04:23 +020011#include <zephyr/spinlock.h>
Andrew Boiee35f1792020-12-09 12:18:40 -080012#include <mmu.h>
Gerard Marull-Paretascffefc82022-05-06 11:04:23 +020013#include <zephyr/init.h>
Andrew Boiee35f1792020-12-09 12:18:40 -080014#include <kernel_internal.h>
Anas Nashif4e396172023-09-26 22:46:01 +000015#include <zephyr/internal/syscall_handler.h>
Gerard Marull-Paretascffefc82022-05-06 11:04:23 +020016#include <zephyr/toolchain.h>
17#include <zephyr/linker/linker-defs.h>
18#include <zephyr/sys/bitarray.h>
Flavio Ceoline2f38402024-03-14 15:17:51 -070019#include <zephyr/sys/check.h>
20#include <zephyr/sys/math_extras.h>
Gerard Marull-Paretascffefc82022-05-06 11:04:23 +020021#include <zephyr/timing/timing.h>
22#include <zephyr/logging/log.h>
Krzysztof Chruscinski3ed80832020-11-26 19:32:34 +010023LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
Andrew Boie06cf6d22020-06-26 16:17:00 -070024
Daniel Leung40ba4012023-11-21 15:04:02 -080025#ifdef CONFIG_DEMAND_PAGING
26#include <zephyr/kernel/mm/demand_paging.h>
Simon Heinbcd1d192024-03-08 12:00:10 +010027#endif /* CONFIG_DEMAND_PAGING */
Daniel Leung40ba4012023-11-21 15:04:02 -080028
Andrew Boiee35f1792020-12-09 12:18:40 -080029/*
30 * General terminology:
31 * - A page frame is a page-sized physical memory region in RAM. It is a
32 * container where a data page may be placed. It is always referred to by
33 * physical address. We have a convention of using uintptr_t for physical
Daniel Leung54af5dd2024-06-07 10:55:06 -070034 * addresses. We instantiate a struct k_mem_page_frame to store metadata for
Andrew Boiee35f1792020-12-09 12:18:40 -080035 * every page frame.
36 *
37 * - A data page is a page-sized region of data. It may exist in a page frame,
38 * or be paged out to some backing store. Its location can always be looked
39 * up in the CPU's page tables (or equivalent) by virtual address.
40 * The data type will always be void * or in some cases uint8_t * when we
41 * want to do pointer arithmetic.
42 */
43
Andrew Boie06cf6d22020-06-26 16:17:00 -070044/* Spinlock to protect any globals in this file and serialize page table
45 * updates in arch code
46 */
Andrew Boiee35f1792020-12-09 12:18:40 -080047struct k_spinlock z_mm_lock;
Andrew Boie06cf6d22020-06-26 16:17:00 -070048
Andrew Boiee4334942020-07-15 14:56:24 -070049/*
Andrew Boiee35f1792020-12-09 12:18:40 -080050 * General page frame management
51 */
52
53/* Database of all RAM page frames */
Daniel Leung54af5dd2024-06-07 10:55:06 -070054struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES];
Andrew Boiee35f1792020-12-09 12:18:40 -080055
56#if __ASSERT_ON
Daniel Leung54af5dd2024-06-07 10:55:06 -070057/* Indicator that k_mem_page_frames has been initialized, many of these APIs do
Andrew Boiee35f1792020-12-09 12:18:40 -080058 * not work before POST_KERNEL
59 */
60static bool page_frames_initialized;
61#endif
62
63/* Add colors to page table dumps to indicate mapping type */
64#define COLOR_PAGE_FRAMES 1
65
66#if COLOR_PAGE_FRAMES
Abramo Bagnaraad8778d2021-11-14 13:18:29 +010067#define ANSI_DEFAULT "\x1B" "[0m"
68#define ANSI_RED "\x1B" "[1;31m"
69#define ANSI_GREEN "\x1B" "[1;32m"
70#define ANSI_YELLOW "\x1B" "[1;33m"
71#define ANSI_BLUE "\x1B" "[1;34m"
72#define ANSI_MAGENTA "\x1B" "[1;35m"
73#define ANSI_CYAN "\x1B" "[1;36m"
74#define ANSI_GREY "\x1B" "[1;90m"
Andrew Boiee35f1792020-12-09 12:18:40 -080075
76#define COLOR(x) printk(_CONCAT(ANSI_, x))
77#else
Simon Hein02cfbfe2022-07-19 22:30:17 +020078#define COLOR(x) do { } while (false)
Simon Heinbcd1d192024-03-08 12:00:10 +010079#endif /* COLOR_PAGE_FRAMES */
Andrew Boiee35f1792020-12-09 12:18:40 -080080
Lixin Guo6aa783e2021-11-02 15:11:34 +080081/* LCOV_EXCL_START */
Daniel Leung54af5dd2024-06-07 10:55:06 -070082static void page_frame_dump(struct k_mem_page_frame *pf)
Andrew Boiee35f1792020-12-09 12:18:40 -080083{
Daniel Leung54af5dd2024-06-07 10:55:06 -070084 if (k_mem_page_frame_is_free(pf)) {
Nicolas Pitree9a47d92024-05-01 21:58:54 -040085 COLOR(GREY);
86 printk("-");
Daniel Leung54af5dd2024-06-07 10:55:06 -070087 } else if (k_mem_page_frame_is_reserved(pf)) {
Andrew Boiee35f1792020-12-09 12:18:40 -080088 COLOR(CYAN);
89 printk("R");
Daniel Leung54af5dd2024-06-07 10:55:06 -070090 } else if (k_mem_page_frame_is_busy(pf)) {
Andrew Boiee35f1792020-12-09 12:18:40 -080091 COLOR(MAGENTA);
92 printk("B");
Daniel Leung54af5dd2024-06-07 10:55:06 -070093 } else if (k_mem_page_frame_is_pinned(pf)) {
Andrew Boiee35f1792020-12-09 12:18:40 -080094 COLOR(YELLOW);
95 printk("P");
Daniel Leung54af5dd2024-06-07 10:55:06 -070096 } else if (k_mem_page_frame_is_available(pf)) {
Andrew Boiee35f1792020-12-09 12:18:40 -080097 COLOR(GREY);
98 printk(".");
Daniel Leung54af5dd2024-06-07 10:55:06 -070099 } else if (k_mem_page_frame_is_mapped(pf)) {
Andrew Boiee35f1792020-12-09 12:18:40 -0800100 COLOR(DEFAULT);
101 printk("M");
102 } else {
103 COLOR(RED);
104 printk("?");
105 }
106}
107
Daniel Leung54af5dd2024-06-07 10:55:06 -0700108void k_mem_page_frames_dump(void)
Andrew Boiee35f1792020-12-09 12:18:40 -0800109{
110 int column = 0;
111
112 __ASSERT(page_frames_initialized, "%s called too early", __func__);
113 printk("Physical memory from 0x%lx to 0x%lx\n",
Daniel Leung3fd66de2024-06-07 09:31:46 -0700114 K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END);
Andrew Boiee35f1792020-12-09 12:18:40 -0800115
Daniel Leung54af5dd2024-06-07 10:55:06 -0700116 for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) {
117 struct k_mem_page_frame *pf = &k_mem_page_frames[i];
Andrew Boiee35f1792020-12-09 12:18:40 -0800118
119 page_frame_dump(pf);
120
121 column++;
122 if (column == 64) {
123 column = 0;
124 printk("\n");
125 }
126 }
127
128 COLOR(DEFAULT);
129 if (column != 0) {
130 printk("\n");
131 }
132}
Lixin Guo6aa783e2021-11-02 15:11:34 +0800133/* LCOV_EXCL_STOP */
Andrew Boiee35f1792020-12-09 12:18:40 -0800134
135#define VIRT_FOREACH(_base, _size, _pos) \
frei tycho4c2938a2024-06-05 09:44:36 +0000136 for ((_pos) = (_base); \
137 (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
Andrew Boiee35f1792020-12-09 12:18:40 -0800138
139#define PHYS_FOREACH(_base, _size, _pos) \
frei tycho4c2938a2024-06-05 09:44:36 +0000140 for ((_pos) = (_base); \
141 (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
Andrew Boiee35f1792020-12-09 12:18:40 -0800142
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800143
Andrew Boiee35f1792020-12-09 12:18:40 -0800144/*
145 * Virtual address space management
Andrew Boiee4334942020-07-15 14:56:24 -0700146 *
Andrew Boiee35f1792020-12-09 12:18:40 -0800147 * Call all of these functions with z_mm_lock held.
148 *
149 * Overall virtual memory map: When the kernel starts, it resides in
Daniel Leungb2784c92024-06-07 09:48:08 -0700150 * virtual memory in the region K_MEM_KERNEL_VIRT_START to
151 * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
Andrew Boiee35f1792020-12-09 12:18:40 -0800152 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
153 *
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800154 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
155 * but have a mapping for all RAM in place. This is for special architectural
156 * purposes and does not otherwise affect page frame accounting or flags;
157 * the only guarantee is that such RAM mapping outside of the Zephyr image
158 * won't be disturbed by subsequent memory mapping calls.
159 *
Daniel Leung03eded12024-06-07 09:37:48 -0700160 * +--------------+ <- K_MEM_VIRT_RAM_START
Andrew Boiee35f1792020-12-09 12:18:40 -0800161 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
Daniel Leungb2784c92024-06-07 09:48:08 -0700162 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
Andrew Boiee4334942020-07-15 14:56:24 -0700163 * | Mapping for |
Andrew Boiee35f1792020-12-09 12:18:40 -0800164 * | main kernel |
165 * | image |
166 * | |
167 * | |
Daniel Leunga459cdf2024-06-07 10:02:09 -0700168 * +--------------+ <- K_MEM_VM_FREE_START
Andrew Boiee4334942020-07-15 14:56:24 -0700169 * | |
Andrew Boiee35f1792020-12-09 12:18:40 -0800170 * | Unused, |
171 * | Available VM |
Andrew Boiee4334942020-07-15 14:56:24 -0700172 * | |
173 * |..............| <- mapping_pos (grows downward as more mappings are made)
174 * | Mapping |
175 * +--------------+
176 * | Mapping |
177 * +--------------+
178 * | ... |
179 * +--------------+
180 * | Mapping |
Andrew Boiee35f1792020-12-09 12:18:40 -0800181 * +--------------+ <- mappings start here
Daniel Leung01682752024-06-07 10:14:49 -0700182 * | Reserved | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED
Daniel Leung03eded12024-06-07 09:37:48 -0700183 * +--------------+ <- K_MEM_VIRT_RAM_END
Andrew Boiee35f1792020-12-09 12:18:40 -0800184 */
Andrew Boiee35f1792020-12-09 12:18:40 -0800185
Daniel Leungc3182902021-04-26 10:53:44 -0700186/* Bitmap of virtual addresses where one bit corresponds to one page.
187 * This is being used for virt_region_alloc() to figure out which
188 * region of virtual addresses can be used for memory mapping.
Andrew Boiee35f1792020-12-09 12:18:40 -0800189 *
Daniel Leungc3182902021-04-26 10:53:44 -0700190 * Note that bit #0 is the highest address so that allocation is
191 * done in reverse from highest address.
Andrew Boiee35f1792020-12-09 12:18:40 -0800192 */
Daniel Leungfff91802021-11-23 11:39:24 -0800193SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
194 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
Daniel Leungc3182902021-04-26 10:53:44 -0700195
196static bool virt_region_inited;
197
Daniel Leunga459cdf2024-06-07 10:02:09 -0700198#define Z_VIRT_REGION_START_ADDR K_MEM_VM_FREE_START
Daniel Leung01682752024-06-07 10:14:49 -0700199#define Z_VIRT_REGION_END_ADDR (K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED)
Daniel Leungc3182902021-04-26 10:53:44 -0700200
201static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
Andrew Boiee35f1792020-12-09 12:18:40 -0800202{
Daniel Leung03eded12024-06-07 09:37:48 -0700203 return POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
Daniel Leungc3182902021-04-26 10:53:44 -0700204 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
205}
Andrew Boiee35f1792020-12-09 12:18:40 -0800206
Daniel Leungc3182902021-04-26 10:53:44 -0700207static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
208{
Daniel Leung03eded12024-06-07 09:37:48 -0700209 return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
Daniel Leungc3182902021-04-26 10:53:44 -0700210 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
211}
212
213static void virt_region_init(void)
214{
215 size_t offset, num_bits;
216
217 /* There are regions where we should never map via
Daniel Leung552e2972024-06-06 09:26:10 -0700218 * k_mem_map() and k_mem_map_phys_bare(). Mark them as
Daniel Leungc3182902021-04-26 10:53:44 -0700219 * already allocated so they will never be used.
220 */
221
Daniel Leung01682752024-06-07 10:14:49 -0700222 if (K_MEM_VM_RESERVED > 0) {
Daniel Leungc3182902021-04-26 10:53:44 -0700223 /* Mark reserved region at end of virtual address space */
Daniel Leung01682752024-06-07 10:14:49 -0700224 num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
Daniel Leungc3182902021-04-26 10:53:44 -0700225 (void)sys_bitarray_set_region(&virt_region_bitmap,
226 num_bits, 0);
227 }
228
229 /* Mark all bits up to Z_FREE_VM_START as allocated */
Daniel Leunga459cdf2024-06-07 10:02:09 -0700230 num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START)
Daniel Leung03eded12024-06-07 09:37:48 -0700231 - POINTER_TO_UINT(K_MEM_VIRT_RAM_START);
232 offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits);
Daniel Leungc3182902021-04-26 10:53:44 -0700233 num_bits /= CONFIG_MMU_PAGE_SIZE;
234 (void)sys_bitarray_set_region(&virt_region_bitmap,
235 num_bits, offset);
236
237 virt_region_inited = true;
238}
239
Daniel Leungc3182902021-04-26 10:53:44 -0700240static void virt_region_free(void *vaddr, size_t size)
241{
242 size_t offset, num_bits;
243 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
244
245 if (unlikely(!virt_region_inited)) {
246 virt_region_init();
247 }
248
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800249#ifndef CONFIG_KERNEL_DIRECT_MAP
Daniel Leung22447c92023-12-04 16:22:31 -0800250 /* Without the need to support K_MEM_DIRECT_MAP, the region must be
Daniel Leungb8e0de22023-07-26 15:38:06 -0700251 * able to be represented in the bitmap. So this case is
252 * simple.
253 */
254
Daniel Leungc3182902021-04-26 10:53:44 -0700255 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
Daniel Leung100eacc2022-11-07 12:47:43 -0800256 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
Daniel Leungc3182902021-04-26 10:53:44 -0700257 "invalid virtual address region %p (%zu)", vaddr_u8, size);
258 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
Daniel Leung100eacc2022-11-07 12:47:43 -0800259 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
Daniel Leungc3182902021-04-26 10:53:44 -0700260 return;
261 }
262
263 offset = virt_to_bitmap_offset(vaddr, size);
264 num_bits = size / CONFIG_MMU_PAGE_SIZE;
265 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
Daniel Leungb8e0de22023-07-26 15:38:06 -0700266#else /* !CONFIG_KERNEL_DIRECT_MAP */
Daniel Leung22447c92023-12-04 16:22:31 -0800267 /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
Daniel Leungb8e0de22023-07-26 15:38:06 -0700268 * memory space, wholly within it, or overlap partially.
269 * So additional processing is needed to make sure we only
270 * mark the pages within the bitmap.
271 */
272 if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
273 (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
274 (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
275 ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
276 uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
277 uint8_t *adjusted_end = MIN(vaddr_u8 + size,
278 Z_VIRT_REGION_END_ADDR);
279 size_t adjusted_sz = adjusted_end - adjusted_start;
280
281 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
282 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
283 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
284 }
285#endif /* !CONFIG_KERNEL_DIRECT_MAP */
Andrew Boiee35f1792020-12-09 12:18:40 -0800286}
287
Neil Armstrong7830f872021-10-07 08:52:00 +0200288static void *virt_region_alloc(size_t size, size_t align)
289{
290 uintptr_t dest_addr;
291 size_t alloc_size;
292 size_t offset;
293 size_t num_bits;
294 int ret;
295
296 if (unlikely(!virt_region_inited)) {
297 virt_region_init();
298 }
299
300 /* Possibly request more pages to ensure we can get an aligned virtual address */
301 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
302 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
303 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
304 if (ret != 0) {
305 LOG_ERR("insufficient virtual address space (requested %zu)",
306 size);
307 return NULL;
308 }
309
310 /* Remember that bit #0 in bitmap corresponds to the highest
311 * virtual address. So here we need to go downwards (backwards?)
312 * to get the starting address of the allocated region.
313 */
314 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
315
316 if (alloc_size > size) {
317 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
318
319 /* Here is the memory organization when trying to get an aligned
320 * virtual address:
321 *
Daniel Leung03eded12024-06-07 09:37:48 -0700322 * +--------------+ <- K_MEM_VIRT_RAM_START
Neil Armstrong7830f872021-10-07 08:52:00 +0200323 * | Undefined VM |
Daniel Leungb2784c92024-06-07 09:48:08 -0700324 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
Neil Armstrong7830f872021-10-07 08:52:00 +0200325 * | Mapping for |
326 * | main kernel |
327 * | image |
328 * | |
329 * | |
Daniel Leunga459cdf2024-06-07 10:02:09 -0700330 * +--------------+ <- K_MEM_VM_FREE_START
Neil Armstrong7830f872021-10-07 08:52:00 +0200331 * | ... |
332 * +==============+ <- dest_addr
333 * | Unused |
334 * |..............| <- aligned_dest_addr
335 * | |
336 * | Aligned |
337 * | Mapping |
338 * | |
339 * |..............| <- aligned_dest_addr + size
340 * | Unused |
Daniel Leung03eded12024-06-07 09:37:48 -0700341 * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size
Neil Armstrong7830f872021-10-07 08:52:00 +0200342 * | ... |
343 * +--------------+
344 * | Mapping |
345 * +--------------+
346 * | Reserved |
Daniel Leung03eded12024-06-07 09:37:48 -0700347 * +--------------+ <- K_MEM_VIRT_RAM_END
Neil Armstrong7830f872021-10-07 08:52:00 +0200348 */
349
350 /* Free the two unused regions */
351 virt_region_free(UINT_TO_POINTER(dest_addr),
352 aligned_dest_addr - dest_addr);
Neil Armstrong2f359ae2021-10-12 14:36:30 +0200353 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
354 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
355 (dest_addr + alloc_size) - (aligned_dest_addr + size));
356 }
Neil Armstrong7830f872021-10-07 08:52:00 +0200357
358 dest_addr = aligned_dest_addr;
359 }
360
361 /* Need to make sure this does not step into kernel memory */
362 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
363 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
364 return NULL;
365 }
366
367 return UINT_TO_POINTER(dest_addr);
368}
369
Andrew Boiee35f1792020-12-09 12:18:40 -0800370/*
371 * Free page frames management
372 *
373 * Call all of these functions with z_mm_lock held.
Andrew Boiee4334942020-07-15 14:56:24 -0700374 */
375
Andrew Boiee35f1792020-12-09 12:18:40 -0800376/* Linked list of unused and available page frames.
377 *
378 * TODO: This is very simple and treats all free page frames as being equal.
379 * However, there are use-cases to consolidate free pages such that entire
380 * SRAM banks can be switched off to save power, and so obtaining free pages
381 * may require a more complex ontology which prefers page frames in RAM banks
382 * which are still active.
383 *
384 * This implies in the future there may be multiple slists managing physical
385 * pages. Each page frame will still just have one snode link.
Andrew Boiee4334942020-07-15 14:56:24 -0700386 */
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400387static sys_sflist_t free_page_frame_list;
Andrew Boiee4334942020-07-15 14:56:24 -0700388
Daniel Leungfa561cc2023-10-03 11:43:17 -0700389/* Number of unused and available free page frames.
390 * This information may go stale immediately.
391 */
392static size_t z_free_page_count;
Andrew Boiee35f1792020-12-09 12:18:40 -0800393
394#define PF_ASSERT(pf, expr, fmt, ...) \
Daniel Leung54af5dd2024-06-07 10:55:06 -0700395 __ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \
Andrew Boiee35f1792020-12-09 12:18:40 -0800396 ##__VA_ARGS__)
397
398/* Get an unused page frame. don't care which one, or NULL if there are none */
Daniel Leung54af5dd2024-06-07 10:55:06 -0700399static struct k_mem_page_frame *free_page_frame_list_get(void)
Andrew Boiee35f1792020-12-09 12:18:40 -0800400{
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400401 sys_sfnode_t *node;
Daniel Leung54af5dd2024-06-07 10:55:06 -0700402 struct k_mem_page_frame *pf = NULL;
Andrew Boiee35f1792020-12-09 12:18:40 -0800403
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400404 node = sys_sflist_get(&free_page_frame_list);
Andrew Boiee35f1792020-12-09 12:18:40 -0800405 if (node != NULL) {
406 z_free_page_count--;
Daniel Leung54af5dd2024-06-07 10:55:06 -0700407 pf = CONTAINER_OF(node, struct k_mem_page_frame, node);
408 PF_ASSERT(pf, k_mem_page_frame_is_free(pf),
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400409 "on free list but not free");
410 pf->va_and_flags = 0;
Andrew Boiee35f1792020-12-09 12:18:40 -0800411 }
412
413 return pf;
414}
415
416/* Release a page frame back into the list of free pages */
Daniel Leung54af5dd2024-06-07 10:55:06 -0700417static void free_page_frame_list_put(struct k_mem_page_frame *pf)
Andrew Boiee35f1792020-12-09 12:18:40 -0800418{
Daniel Leung54af5dd2024-06-07 10:55:06 -0700419 PF_ASSERT(pf, k_mem_page_frame_is_available(pf),
Andrew Boiee35f1792020-12-09 12:18:40 -0800420 "unavailable page put on free list");
Carles Cufi2000cdf2021-12-03 15:41:22 +0100421
Daniel Leung54af5dd2024-06-07 10:55:06 -0700422 sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE);
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400423 sys_sflist_append(&free_page_frame_list, &pf->node);
Andrew Boiee35f1792020-12-09 12:18:40 -0800424 z_free_page_count++;
425}
426
427static void free_page_frame_list_init(void)
428{
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400429 sys_sflist_init(&free_page_frame_list);
Andrew Boiee35f1792020-12-09 12:18:40 -0800430}
431
Daniel Leung54af5dd2024-06-07 10:55:06 -0700432static void page_frame_free_locked(struct k_mem_page_frame *pf)
Daniel Leungc254c582021-04-15 12:38:20 -0700433{
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400434 pf->va_and_flags = 0;
Daniel Leungc254c582021-04-15 12:38:20 -0700435 free_page_frame_list_put(pf);
436}
437
Andrew Boiee35f1792020-12-09 12:18:40 -0800438/*
439 * Memory Mapping
440 */
441
442/* Called after the frame is mapped in the arch layer, to update our
443 * local ontology (and do some assertions while we're at it)
444 */
Daniel Leung54af5dd2024-06-07 10:55:06 -0700445static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr)
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800446{
Daniel Leung54af5dd2024-06-07 10:55:06 -0700447 PF_ASSERT(pf, !k_mem_page_frame_is_free(pf),
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400448 "attempted to map a page frame on the free list");
Daniel Leung54af5dd2024-06-07 10:55:06 -0700449 PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf),
Andrew Boiee35f1792020-12-09 12:18:40 -0800450 "attempted to map a reserved page frame");
451
452 /* We do allow multiple mappings for pinned page frames
453 * since we will never need to reverse map them.
454 * This is uncommon, use-cases are for things like the
455 * Zephyr equivalent of VSDOs
456 */
Daniel Leung54af5dd2024-06-07 10:55:06 -0700457 PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf),
Nicolas Pitre57305972024-05-01 18:28:20 -0400458 "non-pinned and already mapped to %p",
Daniel Leung54af5dd2024-06-07 10:55:06 -0700459 k_mem_page_frame_to_virt(pf));
Andrew Boiee35f1792020-12-09 12:18:40 -0800460
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400461 uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;
462 uintptr_t va = (uintptr_t)addr & ~flags_mask;
463
464 pf->va_and_flags &= flags_mask;
Daniel Leung54af5dd2024-06-07 10:55:06 -0700465 pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED;
Andrew Boiee35f1792020-12-09 12:18:40 -0800466}
467
Lixin Guo6aa783e2021-11-02 15:11:34 +0800468/* LCOV_EXCL_START */
Daniel Leung085d3762021-04-15 18:44:56 -0700469/* Go through page frames to find the physical address mapped
470 * by a virtual address.
471 *
472 * @param[in] virt Virtual Address
473 * @param[out] phys Physical address mapped to the input virtual address
474 * if such mapping exists.
475 *
476 * @retval 0 if mapping is found and valid
477 * @retval -EFAULT if virtual address is not mapped
478 */
479static int virt_to_page_frame(void *virt, uintptr_t *phys)
480{
481 uintptr_t paddr;
Daniel Leung54af5dd2024-06-07 10:55:06 -0700482 struct k_mem_page_frame *pf;
Daniel Leung085d3762021-04-15 18:44:56 -0700483 int ret = -EFAULT;
484
Daniel Leung54af5dd2024-06-07 10:55:06 -0700485 K_MEM_PAGE_FRAME_FOREACH(paddr, pf) {
486 if (k_mem_page_frame_is_mapped(pf)) {
487 if (virt == k_mem_page_frame_to_virt(pf)) {
Daniel Leung085d3762021-04-15 18:44:56 -0700488 ret = 0;
Flavio Ceolina36c0162023-10-26 22:28:11 +0000489 if (phys != NULL) {
Daniel Leung54af5dd2024-06-07 10:55:06 -0700490 *phys = k_mem_page_frame_to_phys(pf);
Flavio Ceolina36c0162023-10-26 22:28:11 +0000491 }
Daniel Leung085d3762021-04-15 18:44:56 -0700492 break;
493 }
494 }
495 }
496
497 return ret;
498}
Lixin Guo6aa783e2021-11-02 15:11:34 +0800499/* LCOV_EXCL_STOP */
500
Daniel Leung085d3762021-04-15 18:44:56 -0700501__weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
502
Andrew Boiea5cb8782020-12-10 13:02:24 -0800503#ifdef CONFIG_DEMAND_PAGING
Daniel Leung54af5dd2024-06-07 10:55:06 -0700504static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
Andrew Boiea5cb8782020-12-10 13:02:24 -0800505 bool page_in, uintptr_t *location_ptr);
Daniel Leung8eea5112021-03-30 14:38:00 -0700506
507static inline void do_backing_store_page_in(uintptr_t location);
508static inline void do_backing_store_page_out(uintptr_t location);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800509#endif /* CONFIG_DEMAND_PAGING */
510
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800511/* Allocate a free page frame, and map it to a specified virtual address
512 *
513 * TODO: Add optional support for copy-on-write mappings to a zero page instead
514 * of allocating, in which case page frames will be allocated lazily as
Andrew Boiea5cb8782020-12-10 13:02:24 -0800515 * the mappings to the zero page get touched. This will avoid expensive
516 * page-ins as memory is mapped and physical RAM or backing store storage will
517 * not be used if the mapped memory is unused. The cost is an empty physical
518 * page of zeroes.
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800519 */
520static int map_anon_page(void *addr, uint32_t flags)
521{
Daniel Leung54af5dd2024-06-07 10:55:06 -0700522 struct k_mem_page_frame *pf;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800523 uintptr_t phys;
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400524 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800525
526 pf = free_page_frame_list_get();
527 if (pf == NULL) {
Andrew Boiea5cb8782020-12-10 13:02:24 -0800528#ifdef CONFIG_DEMAND_PAGING
529 uintptr_t location;
530 bool dirty;
531 int ret;
532
Daniel Leung31c362d2021-05-13 11:02:56 -0700533 pf = k_mem_paging_eviction_select(&dirty);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800534 __ASSERT(pf != NULL, "failed to get a page frame");
Nicolas Pitre57305972024-05-01 18:28:20 -0400535 LOG_DBG("evicting %p at 0x%lx",
Daniel Leung54af5dd2024-06-07 10:55:06 -0700536 k_mem_page_frame_to_virt(pf),
537 k_mem_page_frame_to_phys(pf));
Andrew Boiea5cb8782020-12-10 13:02:24 -0800538 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
539 if (ret != 0) {
540 return -ENOMEM;
541 }
542 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -0700543 do_backing_store_page_out(location);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800544 }
Nicolas Pitree9a47d92024-05-01 21:58:54 -0400545 pf->va_and_flags = 0;
Andrew Boiea5cb8782020-12-10 13:02:24 -0800546#else
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800547 return -ENOMEM;
Andrew Boiea5cb8782020-12-10 13:02:24 -0800548#endif /* CONFIG_DEMAND_PAGING */
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800549 }
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800550
Daniel Leung54af5dd2024-06-07 10:55:06 -0700551 phys = k_mem_page_frame_to_phys(pf);
Nicolas Pitrec9aa98e2024-07-17 00:06:42 -0400552 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags);
Andrew Boie299a2cf2020-12-18 12:01:31 -0800553
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800554 if (lock) {
Daniel Leung54af5dd2024-06-07 10:55:06 -0700555 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800556 }
557 frame_mapped_set(pf, addr);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -0400558#ifdef CONFIG_DEMAND_PAGING
559 if (!lock) {
560 k_mem_paging_eviction_add(pf);
561 }
562#endif
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800563
Andrew Boiea5cb8782020-12-10 13:02:24 -0800564 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
565
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800566 return 0;
567}
568
Daniel Leung9f9dd262024-06-05 16:42:38 -0700569void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon)
Carlo Caione302a36a2021-02-04 10:01:18 +0100570{
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800571 uint8_t *dst;
Daniel Leungfe48f5a2021-04-14 11:55:47 -0700572 size_t total_size;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800573 int ret;
574 k_spinlock_key_t key;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800575 uint8_t *pos;
Daniel Leung04c56322024-03-06 15:58:37 -0800576 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800577
Daniel Leung04c56322024-03-06 15:58:37 -0800578 __ASSERT(!is_anon || (is_anon && page_frames_initialized),
579 "%s called too early", __func__);
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400580 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800581 "%s does not support explicit cache settings", __func__);
582
Nicolas Pitre2a41d852024-07-03 16:27:53 -0400583 if (((flags & K_MEM_PERM_USER) != 0U) &&
584 ((flags & K_MEM_MAP_UNINIT) != 0U)) {
585 LOG_ERR("user access to anonymous uninitialized pages is forbidden");
586 return NULL;
587 }
588 if ((size % CONFIG_MMU_PAGE_SIZE) != 0U) {
589 LOG_ERR("unaligned size %zu passed to %s", size, __func__);
590 return NULL;
591 }
592 if (size == 0) {
593 LOG_ERR("zero sized memory mapping");
594 return NULL;
595 }
596
597 /* Need extra for the guard pages (before and after) which we
598 * won't map.
599 */
600 if (size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) {
Flavio Ceoline2f38402024-03-14 15:17:51 -0700601 LOG_ERR("too large size %zu passed to %s", size, __func__);
602 return NULL;
603 }
604
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800605 key = k_spin_lock(&z_mm_lock);
606
Neil Armstrong7830f872021-10-07 08:52:00 +0200607 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800608 if (dst == NULL) {
609 /* Address space has no free region */
610 goto out;
611 }
Daniel Leungfe48f5a2021-04-14 11:55:47 -0700612
613 /* Unmap both guard pages to make sure accessing them
614 * will generate fault.
615 */
616 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
617 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
618 CONFIG_MMU_PAGE_SIZE);
619
620 /* Skip over the "before" guard page in returned address. */
621 dst += CONFIG_MMU_PAGE_SIZE;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800622
Daniel Leung04c56322024-03-06 15:58:37 -0800623 if (is_anon) {
Pisit Sawangvonganan5ed3cd42024-07-06 01:12:07 +0700624 /* Mapping from anonymous memory */
Nicolas Pitrec9aa98e2024-07-17 00:06:42 -0400625 flags |= K_MEM_CACHE_WB;
626#ifdef CONFIG_DEMAND_MAPPING
627 if ((flags & K_MEM_MAP_LOCK) == 0) {
628 flags |= K_MEM_MAP_UNPAGED;
629 VIRT_FOREACH(dst, size, pos) {
630 arch_mem_map(pos,
631 uninit ? ARCH_UNPAGED_ANON_UNINIT
632 : ARCH_UNPAGED_ANON_ZERO,
633 CONFIG_MMU_PAGE_SIZE, flags);
634 }
635 LOG_DBG("memory mapping anon pages %p to %p unpaged", dst, pos-1);
636 /* skip the memset() below */
637 uninit = true;
638 } else
639#endif
640 {
641 VIRT_FOREACH(dst, size, pos) {
642 ret = map_anon_page(pos, flags);
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800643
Nicolas Pitrec9aa98e2024-07-17 00:06:42 -0400644 if (ret != 0) {
645 /* TODO:
646 * call k_mem_unmap(dst, pos - dst)
647 * when implemented in #28990 and
648 * release any guard virtual page as well.
649 */
650 dst = NULL;
651 goto out;
652 }
Daniel Leung04c56322024-03-06 15:58:37 -0800653 }
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800654 }
Daniel Leung04c56322024-03-06 15:58:37 -0800655 } else {
656 /* Mapping known physical memory.
657 *
658 * arch_mem_map() is a void function and does not return
659 * anything. Arch code usually uses ASSERT() to catch
660 * mapping errors. Assume this works correctly for now.
661 */
662 arch_mem_map(dst, phys, size, flags);
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800663 }
Daniel Leung04c56322024-03-06 15:58:37 -0800664
Nicolas Pitre6b3fff32024-07-09 18:04:31 -0400665out:
666 k_spin_unlock(&z_mm_lock, key);
667
668 if (dst != NULL && !uninit) {
Daniel Leung04c56322024-03-06 15:58:37 -0800669 /* If we later implement mappings to a copy-on-write
670 * zero page, won't need this step
671 */
672 memset(dst, 0, size);
673 }
674
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800675 return dst;
676}
Andrew Boiee35f1792020-12-09 12:18:40 -0800677
Daniel Leung9f9dd262024-06-05 16:42:38 -0700678void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon)
Daniel Leungc254c582021-04-15 12:38:20 -0700679{
680 uintptr_t phys;
681 uint8_t *pos;
Daniel Leung54af5dd2024-06-07 10:55:06 -0700682 struct k_mem_page_frame *pf;
Daniel Leungc254c582021-04-15 12:38:20 -0700683 k_spinlock_key_t key;
Daniel Leungc3182902021-04-26 10:53:44 -0700684 size_t total_size;
Daniel Leungc254c582021-04-15 12:38:20 -0700685 int ret;
686
687 /* Need space for the "before" guard page */
688 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
689
690 /* Make sure address range is still valid after accounting
691 * for two guard pages.
692 */
693 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
Daniel Leung54af5dd2024-06-07 10:55:06 -0700694 k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
Daniel Leungc254c582021-04-15 12:38:20 -0700695
696 key = k_spin_lock(&z_mm_lock);
697
698 /* Check if both guard pages are unmapped.
699 * Bail if not, as this is probably a region not mapped
700 * using k_mem_map().
701 */
702 pos = addr;
703 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
704 if (ret == 0) {
705 __ASSERT(ret == 0,
706 "%s: cannot find preceding guard page for (%p, %zu)",
707 __func__, addr, size);
708 goto out;
709 }
710
711 ret = arch_page_phys_get(pos + size, NULL);
712 if (ret == 0) {
713 __ASSERT(ret == 0,
714 "%s: cannot find succeeding guard page for (%p, %zu)",
715 __func__, addr, size);
716 goto out;
717 }
718
Daniel Leung04c56322024-03-06 15:58:37 -0800719 if (is_anon) {
720 /* Unmapping anonymous memory */
721 VIRT_FOREACH(addr, size, pos) {
Nicolas Pitre03130912024-06-10 17:00:41 -0400722#ifdef CONFIG_DEMAND_PAGING
723 enum arch_page_location status;
724 uintptr_t location;
Daniel Leungc254c582021-04-15 12:38:20 -0700725
Nicolas Pitre03130912024-06-10 17:00:41 -0400726 status = arch_page_location_get(pos, &location);
727 switch (status) {
728 case ARCH_PAGE_LOCATION_PAGED_OUT:
729 /*
730 * No pf is associated with this mapping.
731 * Simply get rid of the MMU entry and free
732 * corresponding backing store.
733 */
734 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
735 k_mem_paging_backing_store_location_free(location);
736 continue;
737 case ARCH_PAGE_LOCATION_PAGED_IN:
738 /*
739 * The page is in memory but it may not be
740 * accessible in order to manage tracking
741 * of the ARCH_DATA_PAGE_ACCESSED flag
742 * meaning arch_page_phys_get() could fail.
743 * Still, we know the actual phys address.
744 */
745 phys = location;
746 ret = 0;
747 break;
748 default:
749 ret = arch_page_phys_get(pos, &phys);
750 break;
751 }
752#else
753 ret = arch_page_phys_get(pos, &phys);
754#endif
Daniel Leung04c56322024-03-06 15:58:37 -0800755 __ASSERT(ret == 0,
756 "%s: cannot unmap an unmapped address %p",
757 __func__, pos);
758 if (ret != 0) {
759 /* Found an address not mapped. Do not continue. */
760 goto out;
761 }
762
Daniel Leung54af5dd2024-06-07 10:55:06 -0700763 __ASSERT(k_mem_is_page_frame(phys),
Daniel Leung04c56322024-03-06 15:58:37 -0800764 "%s: 0x%lx is not a page frame", __func__, phys);
Daniel Leung54af5dd2024-06-07 10:55:06 -0700765 if (!k_mem_is_page_frame(phys)) {
Daniel Leung04c56322024-03-06 15:58:37 -0800766 /* Physical address has no corresponding page frame
767 * description in the page frame array.
768 * This should not happen. Do not continue.
769 */
770 goto out;
771 }
772
773 /* Grab the corresponding page frame from physical address */
Daniel Leung54af5dd2024-06-07 10:55:06 -0700774 pf = k_mem_phys_to_page_frame(phys);
Daniel Leung04c56322024-03-06 15:58:37 -0800775
Daniel Leung54af5dd2024-06-07 10:55:06 -0700776 __ASSERT(k_mem_page_frame_is_mapped(pf),
Daniel Leung04c56322024-03-06 15:58:37 -0800777 "%s: 0x%lx is not a mapped page frame", __func__, phys);
Daniel Leung54af5dd2024-06-07 10:55:06 -0700778 if (!k_mem_page_frame_is_mapped(pf)) {
Daniel Leung04c56322024-03-06 15:58:37 -0800779 /* Page frame is not marked mapped.
780 * This should not happen. Do not continue.
781 */
782 goto out;
783 }
784
785 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -0400786#ifdef CONFIG_DEMAND_PAGING
787 if (!k_mem_page_frame_is_pinned(pf)) {
788 k_mem_paging_eviction_remove(pf);
789 }
790#endif
Daniel Leung04c56322024-03-06 15:58:37 -0800791
792 /* Put the page frame back into free list */
793 page_frame_free_locked(pf);
Daniel Leungc254c582021-04-15 12:38:20 -0700794 }
Daniel Leung04c56322024-03-06 15:58:37 -0800795 } else {
796 /*
797 * Unmapping previous mapped memory with specific physical address.
798 *
799 * Note that we don't have to unmap the guard pages, as they should
800 * have been unmapped. We just need to unmapped the in-between
801 * region [addr, (addr + size)).
802 */
803 arch_mem_unmap(addr, size);
Daniel Leungc254c582021-04-15 12:38:20 -0700804 }
805
Daniel Leungc3182902021-04-26 10:53:44 -0700806 /* There are guard pages just before and after the mapped
807 * region. So we also need to free them from the bitmap.
808 */
809 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
Hess Nathan6d417d52024-04-30 13:26:35 +0200810 total_size = size + (CONFIG_MMU_PAGE_SIZE * 2);
Daniel Leungc3182902021-04-26 10:53:44 -0700811 virt_region_free(pos, total_size);
812
Daniel Leungc254c582021-04-15 12:38:20 -0700813out:
814 k_spin_unlock(&z_mm_lock, key);
815}
816
Nicolas Pitrec6921362024-08-29 14:18:20 -0400817int k_mem_update_flags(void *addr, size_t size, uint32_t flags)
818{
819 uintptr_t phys;
820 k_spinlock_key_t key;
821 int ret;
822
823 k_mem_assert_virtual_region(addr, size);
824
825 key = k_spin_lock(&z_mm_lock);
826
827 /*
828 * We can achieve desired result without explicit architecture support
829 * by unmapping and remapping the same physical memory using new flags.
830 */
831
832 ret = arch_page_phys_get(addr, &phys);
833 if (ret < 0) {
834 goto out;
835 }
836
837 /* TODO: detect and handle paged-out memory as well */
838
839 arch_mem_unmap(addr, size);
840 arch_mem_map(addr, phys, size, flags);
841
842out:
843 k_spin_unlock(&z_mm_lock, key);
844 return ret;
845}
846
Andrew Boie5db615b2020-12-18 11:50:58 -0800847size_t k_mem_free_get(void)
848{
849 size_t ret;
850 k_spinlock_key_t key;
851
852 __ASSERT(page_frames_initialized, "%s called too early", __func__);
853
854 key = k_spin_lock(&z_mm_lock);
Daniel Leung2dfae4a2021-07-22 15:08:28 -0700855#ifdef CONFIG_DEMAND_PAGING
856 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
857 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
858 } else {
859 ret = 0;
860 }
861#else
Andrew Boie5db615b2020-12-18 11:50:58 -0800862 ret = z_free_page_count;
Simon Heinbcd1d192024-03-08 12:00:10 +0100863#endif /* CONFIG_DEMAND_PAGING */
Andrew Boie5db615b2020-12-18 11:50:58 -0800864 k_spin_unlock(&z_mm_lock, key);
865
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400866 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
Andrew Boie5db615b2020-12-18 11:50:58 -0800867}
868
Neil Armstrong7830f872021-10-07 08:52:00 +0200869/* Get the default virtual region alignment, here the default MMU page size
870 *
871 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
872 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
873 *
874 * @retval alignment to apply on the virtual address of this region
875 */
876static size_t virt_region_align(uintptr_t phys, size_t size)
877{
878 ARG_UNUSED(phys);
879 ARG_UNUSED(size);
880
881 return CONFIG_MMU_PAGE_SIZE;
882}
883
884__weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
885
Andrew Boiee35f1792020-12-09 12:18:40 -0800886/* This may be called from arch early boot code before z_cstart() is invoked.
887 * Data will be copied and BSS zeroed, but this must not rely on any
888 * initialization functions being called prior to work correctly.
889 */
Daniel Leung552e2972024-06-06 09:26:10 -0700890void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
Andrew Boiee35f1792020-12-09 12:18:40 -0800891{
892 uintptr_t aligned_phys, addr_offset;
Neil Armstrong7830f872021-10-07 08:52:00 +0200893 size_t aligned_size, align_boundary;
Andrew Boiee35f1792020-12-09 12:18:40 -0800894 k_spinlock_key_t key;
895 uint8_t *dest_addr;
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800896 size_t num_bits;
897 size_t offset;
Andrew Boiee35f1792020-12-09 12:18:40 -0800898
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800899#ifndef CONFIG_KERNEL_DIRECT_MAP
900 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
Simon Heinbcd1d192024-03-08 12:00:10 +0100901#endif /* CONFIG_KERNEL_DIRECT_MAP */
Andrew Boiee35f1792020-12-09 12:18:40 -0800902 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
903 phys, size,
904 CONFIG_MMU_PAGE_SIZE);
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400905 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
Andrew Boiee35f1792020-12-09 12:18:40 -0800906 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
907 "wraparound for physical address 0x%lx (size %zu)",
908 aligned_phys, aligned_size);
909
Neil Armstrong7830f872021-10-07 08:52:00 +0200910 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
911
Andrew Boiee35f1792020-12-09 12:18:40 -0800912 key = k_spin_lock(&z_mm_lock);
Daniel Leungb8e0de22023-07-26 15:38:06 -0700913
914 if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
915 (flags & K_MEM_DIRECT_MAP)) {
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800916 dest_addr = (uint8_t *)aligned_phys;
Daniel Leungb8e0de22023-07-26 15:38:06 -0700917
918 /* Mark the region of virtual memory bitmap as used
919 * if the region overlaps the virtual memory space.
920 *
921 * Basically if either end of region is within
922 * virtual memory space, we need to mark the bits.
923 */
Peter Mitsis91a6af32023-11-24 14:14:36 -0500924
925 if (IN_RANGE(aligned_phys,
Daniel Leung03eded12024-06-07 09:37:48 -0700926 (uintptr_t)K_MEM_VIRT_RAM_START,
927 (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) ||
Peter Mitsis91a6af32023-11-24 14:14:36 -0500928 IN_RANGE(aligned_phys + aligned_size - 1,
Daniel Leung03eded12024-06-07 09:37:48 -0700929 (uintptr_t)K_MEM_VIRT_RAM_START,
930 (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) {
931 uint8_t *adjusted_start = MAX(dest_addr, K_MEM_VIRT_RAM_START);
Daniel Leungb8e0de22023-07-26 15:38:06 -0700932 uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
Daniel Leung03eded12024-06-07 09:37:48 -0700933 K_MEM_VIRT_RAM_END);
Daniel Leungb8e0de22023-07-26 15:38:06 -0700934 size_t adjusted_sz = adjusted_end - adjusted_start;
935
936 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
937 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800938 if (sys_bitarray_test_and_set_region(
Pisit Sawangvongananef639ef2024-08-16 13:27:03 +0700939 &virt_region_bitmap, num_bits, offset, true)) {
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800940 goto fail;
Pisit Sawangvongananef639ef2024-08-16 13:27:03 +0700941 }
Hou Zhiqiang41520e82023-05-18 18:29:35 +0800942 }
943 } else {
944 /* Obtain an appropriately sized chunk of virtual memory */
945 dest_addr = virt_region_alloc(aligned_size, align_boundary);
946 if (!dest_addr) {
947 goto fail;
948 }
Andrew Boiee35f1792020-12-09 12:18:40 -0800949 }
950
951 /* If this fails there's something amiss with virt_region_get */
952 __ASSERT((uintptr_t)dest_addr <
953 ((uintptr_t)dest_addr + (size - 1)),
954 "wraparound for virtual address %p (size %zu)",
955 dest_addr, size);
956
957 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
958 aligned_phys, aligned_size, flags, addr_offset);
959
Andrew Boie299a2cf2020-12-18 12:01:31 -0800960 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
Andrew Boiee35f1792020-12-09 12:18:40 -0800961 k_spin_unlock(&z_mm_lock, key);
962
963 *virt_ptr = dest_addr + addr_offset;
964 return;
965fail:
966 /* May re-visit this in the future, but for now running out of
967 * virtual address space or failing the arch_mem_map() call is
968 * an unrecoverable situation.
969 *
970 * Other problems not related to resource exhaustion we leave as
971 * assertions since they are clearly programming mistakes.
972 */
973 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
974 phys, size, flags);
975 k_panic();
976}
977
Daniel Leung552e2972024-06-06 09:26:10 -0700978void k_mem_unmap_phys_bare(uint8_t *virt, size_t size)
Daniel Leunge6df25f2021-04-14 12:57:03 -0700979{
980 uintptr_t aligned_virt, addr_offset;
981 size_t aligned_size;
982 k_spinlock_key_t key;
983
984 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
985 POINTER_TO_UINT(virt), size,
986 CONFIG_MMU_PAGE_SIZE);
987 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
988 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
989 "wraparound for virtual address 0x%lx (size %zu)",
990 aligned_virt, aligned_size);
991
992 key = k_spin_lock(&z_mm_lock);
Daniel Leungb4691642022-02-23 07:38:15 -0800993
994 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
995 aligned_virt, aligned_size, addr_offset);
996
Daniel Leunge6df25f2021-04-14 12:57:03 -0700997 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
Daniel Leung100eacc2022-11-07 12:47:43 -0800998 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
Daniel Leunge6df25f2021-04-14 12:57:03 -0700999 k_spin_unlock(&z_mm_lock, key);
1000}
1001
Andrew Boiee35f1792020-12-09 12:18:40 -08001002/*
1003 * Miscellaneous
1004 */
1005
Anas Nashif25c87db2021-03-29 10:54:23 -04001006size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
1007 uintptr_t addr, size_t size, size_t align)
Andrew Boie06cf6d22020-06-26 16:17:00 -07001008{
1009 size_t addr_offset;
1010
1011 /* The actual mapped region must be page-aligned. Round down the
1012 * physical address and pad the region size appropriately
1013 */
Anas Nashif25c87db2021-03-29 10:54:23 -04001014 *aligned_addr = ROUND_DOWN(addr, align);
1015 addr_offset = addr - *aligned_addr;
Andrew Boie06cf6d22020-06-26 16:17:00 -07001016 *aligned_size = ROUND_UP(size + addr_offset, align);
1017
1018 return addr_offset;
1019}
1020
Daniel Leunge88afd22021-07-15 13:15:29 -07001021#if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
1022static void mark_linker_section_pinned(void *start_addr, void *end_addr,
1023 bool pin)
1024{
Daniel Leung54af5dd2024-06-07 10:55:06 -07001025 struct k_mem_page_frame *pf;
Daniel Leunge88afd22021-07-15 13:15:29 -07001026 uint8_t *addr;
1027
1028 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
1029 CONFIG_MMU_PAGE_SIZE);
1030 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
1031 CONFIG_MMU_PAGE_SIZE);
1032 size_t pinned_size = pinned_end - pinned_start;
1033
1034 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
1035 {
Daniel Leung54af5dd2024-06-07 10:55:06 -07001036 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
Daniel Leunge88afd22021-07-15 13:15:29 -07001037 frame_mapped_set(pf, addr);
1038
1039 if (pin) {
Daniel Leung54af5dd2024-06-07 10:55:06 -07001040 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
Daniel Leunge88afd22021-07-15 13:15:29 -07001041 } else {
Daniel Leung54af5dd2024-06-07 10:55:06 -07001042 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -04001043#ifdef CONFIG_DEMAND_PAGING
1044 if (k_mem_page_frame_is_evictable(pf)) {
1045 k_mem_paging_eviction_add(pf);
1046 }
1047#endif
Daniel Leunge88afd22021-07-15 13:15:29 -07001048 }
1049 }
1050}
1051#endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
1052
Nicolas Pitre1e4fd232024-07-30 14:58:17 -04001053#ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
1054static void z_paging_ondemand_section_map(void)
1055{
1056 uint8_t *addr;
1057 size_t size;
1058 uintptr_t location;
1059 uint32_t flags;
1060
1061 size = (uintptr_t)lnkr_ondemand_text_size;
1062 flags = K_MEM_MAP_UNPAGED | K_MEM_PERM_EXEC | K_MEM_CACHE_WB;
1063 VIRT_FOREACH(lnkr_ondemand_text_start, size, addr) {
1064 k_mem_paging_backing_store_location_query(addr, &location);
1065 arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1066 sys_bitarray_set_region(&virt_region_bitmap, 1,
1067 virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1068 }
1069
1070 size = (uintptr_t)lnkr_ondemand_rodata_size;
1071 flags = K_MEM_MAP_UNPAGED | K_MEM_CACHE_WB;
1072 VIRT_FOREACH(lnkr_ondemand_rodata_start, size, addr) {
1073 k_mem_paging_backing_store_location_query(addr, &location);
1074 arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1075 sys_bitarray_set_region(&virt_region_bitmap, 1,
1076 virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1077 }
1078}
1079#endif /* CONFIG_LINKER_USE_ONDEMAND_SECTION */
1080
Andrew Boiee35f1792020-12-09 12:18:40 -08001081void z_mem_manage_init(void)
Andrew Boie06cf6d22020-06-26 16:17:00 -07001082{
Andrew Boiee35f1792020-12-09 12:18:40 -08001083 uintptr_t phys;
1084 uint8_t *addr;
Daniel Leung54af5dd2024-06-07 10:55:06 -07001085 struct k_mem_page_frame *pf;
Andrew Boiee35f1792020-12-09 12:18:40 -08001086 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
Andrew Boie06cf6d22020-06-26 16:17:00 -07001087
Andrew Boiee35f1792020-12-09 12:18:40 -08001088 free_page_frame_list_init();
Andrew Boie06cf6d22020-06-26 16:17:00 -07001089
Daniel Leunge88afd22021-07-15 13:15:29 -07001090 ARG_UNUSED(addr);
1091
Andrew Boiee35f1792020-12-09 12:18:40 -08001092#ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
1093 /* If some page frames are unavailable for use as memory, arch
Daniel Leung54af5dd2024-06-07 10:55:06 -07001094 * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags
Andrew Boie06cf6d22020-06-26 16:17:00 -07001095 */
Andrew Boiee35f1792020-12-09 12:18:40 -08001096 arch_reserved_pages_update();
1097#endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
1098
Daniel Leunge88afd22021-07-15 13:15:29 -07001099#ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
Andrew Boiee35f1792020-12-09 12:18:40 -08001100 /* All pages composing the Zephyr image are mapped at boot in a
1101 * predictable way. This can change at runtime.
1102 */
Daniel Leungb2784c92024-06-07 09:48:08 -07001103 VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr)
Andrew Boiee35f1792020-12-09 12:18:40 -08001104 {
Daniel Leung54af5dd2024-06-07 10:55:06 -07001105 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
Andrew Boie611b6262020-12-18 16:06:09 -08001106 frame_mapped_set(pf, addr);
1107
1108 /* TODO: for now we pin the whole Zephyr image. Demand paging
1109 * currently tested with anonymously-mapped pages which are not
1110 * pinned.
1111 *
1112 * We will need to setup linker regions for a subset of kernel
1113 * code/data pages which are pinned in memory and
1114 * may not be evicted. This will contain critical CPU data
1115 * structures, and any code used to perform page fault
1116 * handling, page-ins, etc.
1117 */
Daniel Leung54af5dd2024-06-07 10:55:06 -07001118 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
Andrew Boiee4334942020-07-15 14:56:24 -07001119 }
Daniel Leunge88afd22021-07-15 13:15:29 -07001120#endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1121
1122#ifdef CONFIG_LINKER_USE_BOOT_SECTION
1123 /* Pin the boot section to prevent it from being swapped out during
1124 * boot process. Will be un-pinned once boot process completes.
1125 */
1126 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
Simon Heinbcd1d192024-03-08 12:00:10 +01001127#endif /* CONFIG_LINKER_USE_BOOT_SECTION */
Andrew Boie06cf6d22020-06-26 16:17:00 -07001128
Daniel Leung1310ad62021-02-23 13:33:38 -08001129#ifdef CONFIG_LINKER_USE_PINNED_SECTION
1130 /* Pin the page frames correspondng to the pinned symbols */
Daniel Leunge88afd22021-07-15 13:15:29 -07001131 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
Simon Heinbcd1d192024-03-08 12:00:10 +01001132#endif /* CONFIG_LINKER_USE_PINNED_SECTION */
Daniel Leung1310ad62021-02-23 13:33:38 -08001133
Andrew Boiee35f1792020-12-09 12:18:40 -08001134 /* Any remaining pages that aren't mapped, reserved, or pinned get
1135 * added to the free pages list
1136 */
Daniel Leung54af5dd2024-06-07 10:55:06 -07001137 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1138 if (k_mem_page_frame_is_available(pf)) {
Andrew Boiee35f1792020-12-09 12:18:40 -08001139 free_page_frame_list_put(pf);
1140 }
Andrew Boie06cf6d22020-06-26 16:17:00 -07001141 }
Andrew Boiee35f1792020-12-09 12:18:40 -08001142 LOG_DBG("free page frames: %zu", z_free_page_count);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001143
1144#ifdef CONFIG_DEMAND_PAGING
Daniel Leung8eea5112021-03-30 14:38:00 -07001145#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1146 z_paging_histogram_init();
Simon Heinbcd1d192024-03-08 12:00:10 +01001147#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001148 k_mem_paging_backing_store_init();
Daniel Leung31c362d2021-05-13 11:02:56 -07001149 k_mem_paging_eviction_init();
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -04001150 /* start tracking evictable page installed above if any */
1151 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1152 if (k_mem_page_frame_is_evictable(pf)) {
1153 k_mem_paging_eviction_add(pf);
1154 }
1155 }
Simon Heinbcd1d192024-03-08 12:00:10 +01001156#endif /* CONFIG_DEMAND_PAGING */
Nicolas Pitre1e4fd232024-07-30 14:58:17 -04001157
1158#ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
1159 z_paging_ondemand_section_map();
1160#endif
1161
Andrew Boiee35f1792020-12-09 12:18:40 -08001162#if __ASSERT_ON
1163 page_frames_initialized = true;
1164#endif
1165 k_spin_unlock(&z_mm_lock, key);
Daniel Leungf32ea442021-07-12 11:17:56 -07001166
1167#ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1168 /* If BSS section is not present in memory at boot,
1169 * it would not have been cleared. This needs to be
1170 * done now since paging mechanism has been initialized
1171 * and the BSS pages can be brought into physical
1172 * memory to be cleared.
1173 */
1174 z_bss_zero();
Simon Heinbcd1d192024-03-08 12:00:10 +01001175#endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
Andrew Boie06cf6d22020-06-26 16:17:00 -07001176}
Andrew Boiea5cb8782020-12-10 13:02:24 -08001177
Daniel Leunge88afd22021-07-15 13:15:29 -07001178void z_mem_manage_boot_finish(void)
1179{
1180#ifdef CONFIG_LINKER_USE_BOOT_SECTION
1181 /* At the end of boot process, unpin the boot sections
1182 * as they don't need to be in memory all the time anymore.
1183 */
1184 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
Simon Heinbcd1d192024-03-08 12:00:10 +01001185#endif /* CONFIG_LINKER_USE_BOOT_SECTION */
Daniel Leunge88afd22021-07-15 13:15:29 -07001186}
1187
Andrew Boiea5cb8782020-12-10 13:02:24 -08001188#ifdef CONFIG_DEMAND_PAGING
Daniel Leungae865192021-03-26 12:03:42 -07001189
1190#ifdef CONFIG_DEMAND_PAGING_STATS
1191struct k_mem_paging_stats_t paging_stats;
Daniel Leung8eea5112021-03-30 14:38:00 -07001192extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1193extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1194extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
Simon Heinbcd1d192024-03-08 12:00:10 +01001195#endif /* CONFIG_DEMAND_PAGING_STATS */
Andrew Boie60d30662021-01-14 12:22:18 -08001196
Daniel Leung8eea5112021-03-30 14:38:00 -07001197static inline void do_backing_store_page_in(uintptr_t location)
1198{
Nicolas Pitrec9aa98e2024-07-17 00:06:42 -04001199#ifdef CONFIG_DEMAND_MAPPING
1200 /* Check for special cases */
1201 switch (location) {
1202 case ARCH_UNPAGED_ANON_ZERO:
1203 memset(K_MEM_SCRATCH_PAGE, 0, CONFIG_MMU_PAGE_SIZE);
1204 __fallthrough;
1205 case ARCH_UNPAGED_ANON_UNINIT:
1206 /* nothing else to do */
1207 return;
1208 default:
1209 break;
1210 }
1211#endif /* CONFIG_DEMAND_MAPPING */
1212
Daniel Leung8eea5112021-03-30 14:38:00 -07001213#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1214 uint32_t time_diff;
Daniel Leung09e8db32021-03-31 13:56:05 -07001215
1216#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1217 timing_t time_start, time_end;
1218
1219 time_start = timing_counter_get();
1220#else
Daniel Leung8eea5112021-03-30 14:38:00 -07001221 uint32_t time_start;
1222
1223 time_start = k_cycle_get_32();
Daniel Leung09e8db32021-03-31 13:56:05 -07001224#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -07001225#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1226
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001227 k_mem_paging_backing_store_page_in(location);
Daniel Leung8eea5112021-03-30 14:38:00 -07001228
1229#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung09e8db32021-03-31 13:56:05 -07001230#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1231 time_end = timing_counter_get();
1232 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1233#else
Daniel Leung8eea5112021-03-30 14:38:00 -07001234 time_diff = k_cycle_get_32() - time_start;
Daniel Leung09e8db32021-03-31 13:56:05 -07001235#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -07001236
1237 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1238 time_diff);
1239#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1240}
1241
1242static inline void do_backing_store_page_out(uintptr_t location)
1243{
1244#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1245 uint32_t time_diff;
Daniel Leung09e8db32021-03-31 13:56:05 -07001246
1247#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1248 timing_t time_start, time_end;
1249
1250 time_start = timing_counter_get();
1251#else
Daniel Leung8eea5112021-03-30 14:38:00 -07001252 uint32_t time_start;
1253
1254 time_start = k_cycle_get_32();
Daniel Leung09e8db32021-03-31 13:56:05 -07001255#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -07001256#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1257
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001258 k_mem_paging_backing_store_page_out(location);
Daniel Leung8eea5112021-03-30 14:38:00 -07001259
1260#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung09e8db32021-03-31 13:56:05 -07001261#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1262 time_end = timing_counter_get();
1263 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1264#else
Daniel Leung8eea5112021-03-30 14:38:00 -07001265 time_diff = k_cycle_get_32() - time_start;
Daniel Leung09e8db32021-03-31 13:56:05 -07001266#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -07001267
1268 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1269 time_diff);
1270#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1271}
1272
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001273#if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ)
1274/*
1275 * SMP support is very simple. Some resources such as the scratch page could
1276 * be made per CPU, backing store driver execution be confined to the faulting
1277 * CPU, statistics be made to cope with access concurrency, etc. But in the
1278 * end we're dealing with memory transfer to/from some external storage which
1279 * is inherently slow and whose access is most likely serialized anyway.
1280 * So let's simply enforce global demand paging serialization across all CPUs
1281 * with a mutex as there is no real gain from added parallelism here.
Andrew Boiea5cb8782020-12-10 13:02:24 -08001282 */
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001283static K_MUTEX_DEFINE(z_mm_paging_lock);
1284#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001285
1286static void virt_region_foreach(void *addr, size_t size,
1287 void (*func)(void *))
1288{
Daniel Leung54af5dd2024-06-07 10:55:06 -07001289 k_mem_assert_virtual_region(addr, size);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001290
1291 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1292 func((uint8_t *)addr + offset);
1293 }
1294}
1295
Andrew Boiea5cb8782020-12-10 13:02:24 -08001296/*
1297 * Perform some preparatory steps before paging out. The provided page frame
1298 * must be evicted to the backing store immediately after this is called
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001299 * with a call to k_mem_paging_backing_store_page_out() if it contains
1300 * a data page.
Andrew Boiea5cb8782020-12-10 13:02:24 -08001301 *
1302 * - Map page frame to scratch area if requested. This always is true if we're
1303 * doing a page fault, but is only set on manual evictions if the page is
1304 * dirty.
1305 * - If mapped:
1306 * - obtain backing store location and populate location parameter
1307 * - Update page tables with location
1308 * - Mark page frame as busy
1309 *
1310 * Returns -ENOMEM if the backing store is full
1311 */
Daniel Leung54af5dd2024-06-07 10:55:06 -07001312static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
Andrew Boiec7be5dd2021-01-15 12:07:45 -08001313 bool page_fault, uintptr_t *location_ptr)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001314{
1315 uintptr_t phys;
1316 int ret;
1317 bool dirty = *dirty_ptr;
1318
Daniel Leung54af5dd2024-06-07 10:55:06 -07001319 phys = k_mem_page_frame_to_phys(pf);
1320 __ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
Andrew Boiea5cb8782020-12-10 13:02:24 -08001321 phys);
1322
1323 /* If the backing store doesn't have a copy of the page, even if it
1324 * wasn't modified, treat as dirty. This can happen for a few
1325 * reasons:
1326 * 1) Page has never been swapped out before, and the backing store
1327 * wasn't pre-populated with this data page.
1328 * 2) Page was swapped out before, but the page contents were not
1329 * preserved after swapping back in.
1330 * 3) Page contents were preserved when swapped back in, but were later
1331 * evicted from the backing store to make room for other evicted
1332 * pages.
1333 */
Daniel Leung54af5dd2024-06-07 10:55:06 -07001334 if (k_mem_page_frame_is_mapped(pf)) {
1335 dirty = dirty || !k_mem_page_frame_is_backed(pf);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001336 }
1337
Andrew Boiec7be5dd2021-01-15 12:07:45 -08001338 if (dirty || page_fault) {
Andrew Boiea5cb8782020-12-10 13:02:24 -08001339 arch_mem_scratch(phys);
1340 }
1341
Daniel Leung54af5dd2024-06-07 10:55:06 -07001342 if (k_mem_page_frame_is_mapped(pf)) {
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001343 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1344 page_fault);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001345 if (ret != 0) {
1346 LOG_ERR("out of backing store memory");
1347 return -ENOMEM;
1348 }
Daniel Leung54af5dd2024-06-07 10:55:06 -07001349 arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -04001350 k_mem_paging_eviction_remove(pf);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001351 } else {
1352 /* Shouldn't happen unless this function is mis-used */
1353 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1354 }
1355#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Daniel Leung54af5dd2024-06-07 10:55:06 -07001356 /* Mark as busy so that k_mem_page_frame_is_evictable() returns false */
1357 __ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
Andrew Boiea5cb8782020-12-10 13:02:24 -08001358 phys);
Daniel Leung54af5dd2024-06-07 10:55:06 -07001359 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY);
Simon Heinbcd1d192024-03-08 12:00:10 +01001360#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
Andrew Boiea5cb8782020-12-10 13:02:24 -08001361 /* Update dirty parameter, since we set to true if it wasn't backed
1362 * even if otherwise clean
1363 */
1364 *dirty_ptr = dirty;
1365
1366 return 0;
1367}
1368
1369static int do_mem_evict(void *addr)
1370{
1371 bool dirty;
Daniel Leung54af5dd2024-06-07 10:55:06 -07001372 struct k_mem_page_frame *pf;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001373 uintptr_t location;
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001374 k_spinlock_key_t key;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001375 uintptr_t flags, phys;
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001376 int ret;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001377
1378#if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1379 __ASSERT(!k_is_in_isr(),
1380 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1381 __func__);
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001382#ifdef CONFIG_SMP
1383 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1384#else
Andrew Boiea5cb8782020-12-10 13:02:24 -08001385 k_sched_lock();
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001386#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001387#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001388 key = k_spin_lock(&z_mm_lock);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001389 flags = arch_page_info_get(addr, &phys, false);
1390 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1391 "address %p isn't mapped", addr);
1392 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1393 /* Un-mapped or already evicted. Nothing to do */
1394 ret = 0;
1395 goto out;
1396 }
1397
1398 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
Daniel Leung54af5dd2024-06-07 10:55:06 -07001399 pf = k_mem_phys_to_page_frame(phys);
1400 __ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch");
Andrew Boiea5cb8782020-12-10 13:02:24 -08001401 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1402 if (ret != 0) {
1403 goto out;
1404 }
1405
1406 __ASSERT(ret == 0, "failed to prepare page frame");
1407#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001408 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001409#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1410 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -07001411 do_backing_store_page_out(location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001412 }
1413#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001414 key = k_spin_lock(&z_mm_lock);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001415#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1416 page_frame_free_locked(pf);
1417out:
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001418 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001419#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001420#ifdef CONFIG_SMP
1421 k_mutex_unlock(&z_mm_paging_lock);
1422#else
Andrew Boiea5cb8782020-12-10 13:02:24 -08001423 k_sched_unlock();
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001424#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001425#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1426 return ret;
1427}
1428
Andrew Boie6c97ab32021-01-20 17:03:13 -08001429int k_mem_page_out(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001430{
1431 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1432 addr);
Daniel Leung54af5dd2024-06-07 10:55:06 -07001433 k_mem_assert_virtual_region(addr, size);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001434
1435 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1436 void *pos = (uint8_t *)addr + offset;
1437 int ret;
1438
1439 ret = do_mem_evict(pos);
1440 if (ret != 0) {
1441 return ret;
1442 }
1443 }
1444
1445 return 0;
1446}
1447
Daniel Leung54af5dd2024-06-07 10:55:06 -07001448int k_mem_page_frame_evict(uintptr_t phys)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001449{
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001450 k_spinlock_key_t key;
Daniel Leung54af5dd2024-06-07 10:55:06 -07001451 struct k_mem_page_frame *pf;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001452 bool dirty;
1453 uintptr_t flags;
1454 uintptr_t location;
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001455 int ret;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001456
1457 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1458 __func__, phys);
1459
1460 /* Implementation is similar to do_page_fault() except there is no
1461 * data page to page-in, see comments in that function.
1462 */
1463
1464#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1465 __ASSERT(!k_is_in_isr(),
1466 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1467 __func__);
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001468#ifdef CONFIG_SMP
1469 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1470#else
Andrew Boiea5cb8782020-12-10 13:02:24 -08001471 k_sched_lock();
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001472#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001473#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001474 key = k_spin_lock(&z_mm_lock);
Daniel Leung54af5dd2024-06-07 10:55:06 -07001475 pf = k_mem_phys_to_page_frame(phys);
1476 if (!k_mem_page_frame_is_mapped(pf)) {
Andrew Boiea5cb8782020-12-10 13:02:24 -08001477 /* Nothing to do, free page */
1478 ret = 0;
1479 goto out;
1480 }
Daniel Leung54af5dd2024-06-07 10:55:06 -07001481 flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001482 /* Shouldn't ever happen */
1483 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1484 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1485 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1486 if (ret != 0) {
1487 goto out;
1488 }
1489
1490#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001491 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001492#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1493 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -07001494 do_backing_store_page_out(location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001495 }
1496#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001497 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001498#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1499 page_frame_free_locked(pf);
1500out:
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001501 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001502#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001503#ifdef CONFIG_SMP
1504 k_mutex_unlock(&z_mm_paging_lock);
1505#else
Andrew Boiea5cb8782020-12-10 13:02:24 -08001506 k_sched_unlock();
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001507#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001508#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1509 return ret;
1510}
1511
Daniel Leungae865192021-03-26 12:03:42 -07001512static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1513 int key)
1514{
1515#ifdef CONFIG_DEMAND_PAGING_STATS
1516 bool is_irq_unlocked = arch_irq_unlocked(key);
1517
1518 paging_stats.pagefaults.cnt++;
1519
1520 if (is_irq_unlocked) {
1521 paging_stats.pagefaults.irq_unlocked++;
1522 } else {
1523 paging_stats.pagefaults.irq_locked++;
1524 }
1525
1526#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1527 faulting_thread->paging_stats.pagefaults.cnt++;
1528
1529 if (is_irq_unlocked) {
1530 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1531 } else {
1532 faulting_thread->paging_stats.pagefaults.irq_locked++;
1533 }
1534#else
1535 ARG_UNUSED(faulting_thread);
Simon Heinbcd1d192024-03-08 12:00:10 +01001536#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
Daniel Leungae865192021-03-26 12:03:42 -07001537
1538#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1539 if (k_is_in_isr()) {
1540 paging_stats.pagefaults.in_isr++;
1541
1542#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1543 faulting_thread->paging_stats.pagefaults.in_isr++;
Simon Heinbcd1d192024-03-08 12:00:10 +01001544#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
Daniel Leungae865192021-03-26 12:03:42 -07001545 }
1546#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1547#endif /* CONFIG_DEMAND_PAGING_STATS */
1548}
1549
1550static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1551 bool dirty)
1552{
1553#ifdef CONFIG_DEMAND_PAGING_STATS
1554 if (dirty) {
1555 paging_stats.eviction.dirty++;
1556 } else {
1557 paging_stats.eviction.clean++;
1558 }
1559#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1560 if (dirty) {
1561 faulting_thread->paging_stats.eviction.dirty++;
1562 } else {
1563 faulting_thread->paging_stats.eviction.clean++;
1564 }
1565#else
1566 ARG_UNUSED(faulting_thread);
1567#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1568#endif /* CONFIG_DEMAND_PAGING_STATS */
1569}
1570
Daniel Leung54af5dd2024-06-07 10:55:06 -07001571static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
Daniel Leung8eea5112021-03-30 14:38:00 -07001572{
Daniel Leung54af5dd2024-06-07 10:55:06 -07001573 struct k_mem_page_frame *pf;
Daniel Leung8eea5112021-03-30 14:38:00 -07001574
1575#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung8eea5112021-03-30 14:38:00 -07001576 uint32_t time_diff;
Daniel Leung09e8db32021-03-31 13:56:05 -07001577
1578#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1579 timing_t time_start, time_end;
1580
1581 time_start = timing_counter_get();
1582#else
1583 uint32_t time_start;
1584
1585 time_start = k_cycle_get_32();
1586#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -07001587#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1588
Daniel Leung31c362d2021-05-13 11:02:56 -07001589 pf = k_mem_paging_eviction_select(dirty);
Daniel Leung8eea5112021-03-30 14:38:00 -07001590
1591#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung09e8db32021-03-31 13:56:05 -07001592#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1593 time_end = timing_counter_get();
1594 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1595#else
Daniel Leung8eea5112021-03-30 14:38:00 -07001596 time_diff = k_cycle_get_32() - time_start;
Daniel Leung09e8db32021-03-31 13:56:05 -07001597#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1598
Daniel Leung8eea5112021-03-30 14:38:00 -07001599 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1600#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1601
1602 return pf;
1603}
1604
Andrew Boiea5cb8782020-12-10 13:02:24 -08001605static bool do_page_fault(void *addr, bool pin)
1606{
Daniel Leung54af5dd2024-06-07 10:55:06 -07001607 struct k_mem_page_frame *pf;
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001608 k_spinlock_key_t key;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001609 uintptr_t page_in_location, page_out_location;
1610 enum arch_page_location status;
1611 bool result;
1612 bool dirty = false;
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001613 struct k_thread *faulting_thread;
1614 int ret;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001615
1616 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1617 addr);
1618
1619 LOG_DBG("page fault at %p", addr);
1620
1621 /*
1622 * TODO: Add performance accounting:
Daniel Leung31c362d2021-05-13 11:02:56 -07001623 * - k_mem_paging_eviction_select() metrics
Andrew Boiea5cb8782020-12-10 13:02:24 -08001624 * * periodic timer execution time histogram (if implemented)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001625 */
1626
1627#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001628 /*
1629 * We do re-enable interrupts during the page-in/page-out operation
1630 * if and only if interrupts were enabled when the exception was
1631 * taken; in this configuration page faults in an ISR are a bug; all
1632 * their code/data must be pinned.
Andrew Boiea5cb8782020-12-10 13:02:24 -08001633 *
1634 * If interrupts were disabled when the exception was taken, the
1635 * arch code is responsible for keeping them that way when entering
1636 * this function.
1637 *
1638 * If this is not enabled, then interrupts are always locked for the
1639 * entire operation. This is far worse for system interrupt latency
1640 * but requires less pinned pages and ISRs may also take page faults.
1641 *
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001642 * On UP we lock the scheduler so that other threads are never
1643 * scheduled during the page-in/out operation. Support for
1644 * allowing k_mem_paging_backing_store_page_out() and
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001645 * k_mem_paging_backing_store_page_in() to also sleep and allow
1646 * other threads to run (such as in the case where the transfer is
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001647 * async DMA) is not supported on UP. Even if limited to thread
1648 * context, arbitrary memory access triggering exceptions that put
1649 * a thread to sleep on a contended page fault operation will break
1650 * scheduling assumptions of cooperative threads or threads that
1651 * implement critical sections with spinlocks or disabling IRQs.
1652 *
1653 * On SMP, though, exclusivity cannot be assumed solely from being
1654 * a cooperative thread. Another thread with any prio may be running
1655 * on another CPU so exclusion must already be enforced by other
1656 * means. Therefore trying to prevent scheduling on SMP is pointless,
1657 * and k_sched_lock() is equivalent to a no-op on SMP anyway.
1658 * As a result, sleeping/rescheduling in the SMP case is fine.
Andrew Boiea5cb8782020-12-10 13:02:24 -08001659 */
Andrew Boiea5cb8782020-12-10 13:02:24 -08001660 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001661#ifdef CONFIG_SMP
1662 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1663#else
1664 k_sched_lock();
1665#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001666#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1667
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001668 key = k_spin_lock(&z_mm_lock);
1669 faulting_thread = _current_cpu->current;
1670
Andrew Boiea5cb8782020-12-10 13:02:24 -08001671 status = arch_page_location_get(addr, &page_in_location);
1672 if (status == ARCH_PAGE_LOCATION_BAD) {
1673 /* Return false to treat as a fatal error */
1674 result = false;
1675 goto out;
1676 }
1677 result = true;
Daniel Leungae865192021-03-26 12:03:42 -07001678
Andrew Boiea5cb8782020-12-10 13:02:24 -08001679 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1680 if (pin) {
1681 /* It's a physical memory address */
1682 uintptr_t phys = page_in_location;
1683
Daniel Leung54af5dd2024-06-07 10:55:06 -07001684 pf = k_mem_phys_to_page_frame(phys);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -04001685 if (!k_mem_page_frame_is_pinned(pf)) {
1686 k_mem_paging_eviction_remove(pf);
1687 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1688 }
Andrew Boiea5cb8782020-12-10 13:02:24 -08001689 }
Daniel Leung7771d272021-07-13 14:08:05 -07001690
1691 /* This if-block is to pin the page if it is
1692 * already present in physical memory. There is
1693 * no need to go through the following code to
1694 * pull in the data pages. So skip to the end.
1695 */
Andrew Boiea5cb8782020-12-10 13:02:24 -08001696 goto out;
1697 }
1698 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1699 "unexpected status value %d", status);
1700
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001701 paging_stats_faults_inc(faulting_thread, key.key);
Daniel Leung7771d272021-07-13 14:08:05 -07001702
Andrew Boiea5cb8782020-12-10 13:02:24 -08001703 pf = free_page_frame_list_get();
1704 if (pf == NULL) {
1705 /* Need to evict a page frame */
Daniel Leung8eea5112021-03-30 14:38:00 -07001706 pf = do_eviction_select(&dirty);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001707 __ASSERT(pf != NULL, "failed to get a page frame");
Nicolas Pitre57305972024-05-01 18:28:20 -04001708 LOG_DBG("evicting %p at 0x%lx",
Daniel Leung54af5dd2024-06-07 10:55:06 -07001709 k_mem_page_frame_to_virt(pf),
1710 k_mem_page_frame_to_phys(pf));
Daniel Leungae865192021-03-26 12:03:42 -07001711
1712 paging_stats_eviction_inc(faulting_thread, dirty);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001713 }
1714 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1715 __ASSERT(ret == 0, "failed to prepare page frame");
1716
1717#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001718 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001719 /* Interrupts are now unlocked if they were not locked when we entered
1720 * this function, and we may service ISRs. The scheduler is still
1721 * locked.
1722 */
1723#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1724 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -07001725 do_backing_store_page_out(page_out_location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001726 }
Daniel Leung8eea5112021-03-30 14:38:00 -07001727 do_backing_store_page_in(page_in_location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001728
1729#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001730 key = k_spin_lock(&z_mm_lock);
Daniel Leung54af5dd2024-06-07 10:55:06 -07001731 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001732#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
Daniel Leung54af5dd2024-06-07 10:55:06 -07001733 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
Nicolas Pitre57305972024-05-01 18:28:20 -04001734 frame_mapped_set(pf, addr);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001735 if (pin) {
Daniel Leung54af5dd2024-06-07 10:55:06 -07001736 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001737 }
Daniel Leungc38634f2021-07-16 15:27:27 -07001738
Daniel Leung54af5dd2024-06-07 10:55:06 -07001739 arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf));
Daniel Leungdfa4b7e2021-05-13 11:57:54 -07001740 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -04001741 if (!pin) {
1742 k_mem_paging_eviction_add(pf);
1743 }
Andrew Boiea5cb8782020-12-10 13:02:24 -08001744out:
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001745 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001746#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001747#ifdef CONFIG_SMP
1748 k_mutex_unlock(&z_mm_paging_lock);
1749#else
Andrew Boiea5cb8782020-12-10 13:02:24 -08001750 k_sched_unlock();
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001751#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -08001752#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1753
1754 return result;
1755}
1756
1757static void do_page_in(void *addr)
1758{
1759 bool ret;
1760
1761 ret = do_page_fault(addr, false);
1762 __ASSERT(ret, "unmapped memory address %p", addr);
1763 (void)ret;
1764}
1765
Andrew Boie6c97ab32021-01-20 17:03:13 -08001766void k_mem_page_in(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001767{
1768 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1769 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1770 __func__);
1771 virt_region_foreach(addr, size, do_page_in);
1772}
1773
1774static void do_mem_pin(void *addr)
1775{
1776 bool ret;
1777
1778 ret = do_page_fault(addr, true);
1779 __ASSERT(ret, "unmapped memory address %p", addr);
1780 (void)ret;
1781}
1782
Andrew Boie6c97ab32021-01-20 17:03:13 -08001783void k_mem_pin(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001784{
1785 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1786 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1787 __func__);
1788 virt_region_foreach(addr, size, do_mem_pin);
1789}
1790
Daniel Leung564ca112024-06-07 11:49:21 -07001791bool k_mem_page_fault(void *addr)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001792{
Daniel Leungae865192021-03-26 12:03:42 -07001793 return do_page_fault(addr, false);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001794}
1795
1796static void do_mem_unpin(void *addr)
1797{
Daniel Leung54af5dd2024-06-07 10:55:06 -07001798 struct k_mem_page_frame *pf;
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001799 k_spinlock_key_t key;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001800 uintptr_t flags, phys;
1801
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001802 key = k_spin_lock(&z_mm_lock);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001803 flags = arch_page_info_get(addr, &phys, false);
1804 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1805 "invalid data page at %p", addr);
1806 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
Daniel Leung54af5dd2024-06-07 10:55:06 -07001807 pf = k_mem_phys_to_page_frame(phys);
Nicolas Pitre6a3aa3b2024-06-04 23:27:37 -04001808 if (k_mem_page_frame_is_pinned(pf)) {
1809 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1810 k_mem_paging_eviction_add(pf);
1811 }
Andrew Boiea5cb8782020-12-10 13:02:24 -08001812 }
Nicolas Pitre6b3fff32024-07-09 18:04:31 -04001813 k_spin_unlock(&z_mm_lock, key);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001814}
1815
Andrew Boie6c97ab32021-01-20 17:03:13 -08001816void k_mem_unpin(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001817{
1818 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1819 addr);
1820 virt_region_foreach(addr, size, do_mem_unpin);
1821}
Daniel Leungae865192021-03-26 12:03:42 -07001822
Andrew Boiea5cb8782020-12-10 13:02:24 -08001823#endif /* CONFIG_DEMAND_PAGING */