blob: b4f5fe276da864f5a2aadbfa87b7752edfb60b21 [file] [log] [blame]
Andrew Boie06cf6d22020-06-26 16:17:00 -07001/*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
Krzysztof Chruscinski3ed80832020-11-26 19:32:34 +01009#include <stdint.h>
10#include <kernel_arch_interface.h>
11#include <spinlock.h>
Andrew Boiee35f1792020-12-09 12:18:40 -080012#include <mmu.h>
13#include <init.h>
14#include <kernel_internal.h>
Daniel Leungae865192021-03-26 12:03:42 -070015#include <syscall_handler.h>
Daniel Leung085d3762021-04-15 18:44:56 -070016#include <toolchain.h>
Andrew Boiee35f1792020-12-09 12:18:40 -080017#include <linker/linker-defs.h>
Daniel Leungc3182902021-04-26 10:53:44 -070018#include <sys/bitarray.h>
Daniel Leung09e8db32021-03-31 13:56:05 -070019#include <timing/timing.h>
Andrew Boie06cf6d22020-06-26 16:17:00 -070020#include <logging/log.h>
Krzysztof Chruscinski3ed80832020-11-26 19:32:34 +010021LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
Andrew Boie06cf6d22020-06-26 16:17:00 -070022
Andrew Boiee35f1792020-12-09 12:18:40 -080023/*
24 * General terminology:
25 * - A page frame is a page-sized physical memory region in RAM. It is a
26 * container where a data page may be placed. It is always referred to by
27 * physical address. We have a convention of using uintptr_t for physical
28 * addresses. We instantiate a struct z_page_frame to store metadata for
29 * every page frame.
30 *
31 * - A data page is a page-sized region of data. It may exist in a page frame,
32 * or be paged out to some backing store. Its location can always be looked
33 * up in the CPU's page tables (or equivalent) by virtual address.
34 * The data type will always be void * or in some cases uint8_t * when we
35 * want to do pointer arithmetic.
36 */
37
Andrew Boie06cf6d22020-06-26 16:17:00 -070038/* Spinlock to protect any globals in this file and serialize page table
39 * updates in arch code
40 */
Andrew Boiee35f1792020-12-09 12:18:40 -080041struct k_spinlock z_mm_lock;
Andrew Boie06cf6d22020-06-26 16:17:00 -070042
Andrew Boiee4334942020-07-15 14:56:24 -070043/*
Andrew Boiee35f1792020-12-09 12:18:40 -080044 * General page frame management
45 */
46
47/* Database of all RAM page frames */
48struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
49
50#if __ASSERT_ON
51/* Indicator that z_page_frames has been initialized, many of these APIs do
52 * not work before POST_KERNEL
53 */
54static bool page_frames_initialized;
55#endif
56
57/* Add colors to page table dumps to indicate mapping type */
58#define COLOR_PAGE_FRAMES 1
59
60#if COLOR_PAGE_FRAMES
61#define ANSI_DEFAULT "\x1B[0m"
62#define ANSI_RED "\x1B[1;31m"
63#define ANSI_GREEN "\x1B[1;32m"
64#define ANSI_YELLOW "\x1B[1;33m"
65#define ANSI_BLUE "\x1B[1;34m"
66#define ANSI_MAGENTA "\x1B[1;35m"
67#define ANSI_CYAN "\x1B[1;36m"
68#define ANSI_GREY "\x1B[1;90m"
69
70#define COLOR(x) printk(_CONCAT(ANSI_, x))
71#else
72#define COLOR(x) do { } while (0)
73#endif
74
75static void page_frame_dump(struct z_page_frame *pf)
76{
77 if (z_page_frame_is_reserved(pf)) {
78 COLOR(CYAN);
79 printk("R");
80 } else if (z_page_frame_is_busy(pf)) {
81 COLOR(MAGENTA);
82 printk("B");
83 } else if (z_page_frame_is_pinned(pf)) {
84 COLOR(YELLOW);
85 printk("P");
86 } else if (z_page_frame_is_available(pf)) {
87 COLOR(GREY);
88 printk(".");
89 } else if (z_page_frame_is_mapped(pf)) {
90 COLOR(DEFAULT);
91 printk("M");
92 } else {
93 COLOR(RED);
94 printk("?");
95 }
96}
97
98void z_page_frames_dump(void)
99{
100 int column = 0;
101
102 __ASSERT(page_frames_initialized, "%s called too early", __func__);
103 printk("Physical memory from 0x%lx to 0x%lx\n",
104 Z_PHYS_RAM_START, Z_PHYS_RAM_END);
105
106 for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
107 struct z_page_frame *pf = &z_page_frames[i];
108
109 page_frame_dump(pf);
110
111 column++;
112 if (column == 64) {
113 column = 0;
114 printk("\n");
115 }
116 }
117
118 COLOR(DEFAULT);
119 if (column != 0) {
120 printk("\n");
121 }
122}
123
124#define VIRT_FOREACH(_base, _size, _pos) \
125 for (_pos = _base; \
126 _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
127
128#define PHYS_FOREACH(_base, _size, _pos) \
129 for (_pos = _base; \
130 _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
131
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800132
Andrew Boiee35f1792020-12-09 12:18:40 -0800133/*
134 * Virtual address space management
Andrew Boiee4334942020-07-15 14:56:24 -0700135 *
Andrew Boiee35f1792020-12-09 12:18:40 -0800136 * Call all of these functions with z_mm_lock held.
137 *
138 * Overall virtual memory map: When the kernel starts, it resides in
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800139 * virtual memory in the region Z_KERNEL_VIRT_START to
140 * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
Andrew Boiee35f1792020-12-09 12:18:40 -0800141 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
142 *
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800143 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
144 * but have a mapping for all RAM in place. This is for special architectural
145 * purposes and does not otherwise affect page frame accounting or flags;
146 * the only guarantee is that such RAM mapping outside of the Zephyr image
147 * won't be disturbed by subsequent memory mapping calls.
148 *
Carlo Caione302a36a2021-02-04 10:01:18 +0100149 * +--------------+ <- Z_VIRT_RAM_START
Andrew Boiee35f1792020-12-09 12:18:40 -0800150 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
Carlo Caione302a36a2021-02-04 10:01:18 +0100151 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
Andrew Boiee4334942020-07-15 14:56:24 -0700152 * | Mapping for |
Andrew Boiee35f1792020-12-09 12:18:40 -0800153 * | main kernel |
154 * | image |
155 * | |
156 * | |
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800157 * +--------------+ <- Z_FREE_VM_START
Andrew Boiee4334942020-07-15 14:56:24 -0700158 * | |
Andrew Boiee35f1792020-12-09 12:18:40 -0800159 * | Unused, |
160 * | Available VM |
Andrew Boiee4334942020-07-15 14:56:24 -0700161 * | |
162 * |..............| <- mapping_pos (grows downward as more mappings are made)
163 * | Mapping |
164 * +--------------+
165 * | Mapping |
166 * +--------------+
167 * | ... |
168 * +--------------+
169 * | Mapping |
Andrew Boiee35f1792020-12-09 12:18:40 -0800170 * +--------------+ <- mappings start here
171 * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED
172 * +--------------+ <- Z_VIRT_RAM_END
Andrew Boiee35f1792020-12-09 12:18:40 -0800173 */
Andrew Boiee35f1792020-12-09 12:18:40 -0800174
Daniel Leungc3182902021-04-26 10:53:44 -0700175/* Bitmap of virtual addresses where one bit corresponds to one page.
176 * This is being used for virt_region_alloc() to figure out which
177 * region of virtual addresses can be used for memory mapping.
Andrew Boiee35f1792020-12-09 12:18:40 -0800178 *
Daniel Leungc3182902021-04-26 10:53:44 -0700179 * Note that bit #0 is the highest address so that allocation is
180 * done in reverse from highest address.
Andrew Boiee35f1792020-12-09 12:18:40 -0800181 */
Daniel Leungc3182902021-04-26 10:53:44 -0700182SYS_BITARRAY_DEFINE(virt_region_bitmap,
183 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
184
185static bool virt_region_inited;
186
187#define Z_VIRT_REGION_START_ADDR Z_FREE_VM_START
188#define Z_VIRT_REGION_END_ADDR (Z_VIRT_RAM_END - Z_VM_RESERVED)
189
190static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
Andrew Boiee35f1792020-12-09 12:18:40 -0800191{
Daniel Leungc3182902021-04-26 10:53:44 -0700192 return POINTER_TO_UINT(Z_VIRT_RAM_END)
193 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
194}
Andrew Boiee35f1792020-12-09 12:18:40 -0800195
Daniel Leungc3182902021-04-26 10:53:44 -0700196static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
197{
198 return (POINTER_TO_UINT(Z_VIRT_RAM_END)
199 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
200}
201
202static void virt_region_init(void)
203{
204 size_t offset, num_bits;
205
206 /* There are regions where we should never map via
207 * k_mem_map() and z_phys_map(). Mark them as
208 * already allocated so they will never be used.
209 */
210
211 if (Z_VM_RESERVED > 0) {
212 /* Mark reserved region at end of virtual address space */
Daniel Leungc3182902021-04-26 10:53:44 -0700213 num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
214 (void)sys_bitarray_set_region(&virt_region_bitmap,
215 num_bits, 0);
216 }
217
218 /* Mark all bits up to Z_FREE_VM_START as allocated */
219 num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
220 - POINTER_TO_UINT(Z_VIRT_RAM_START);
221 offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
222 num_bits /= CONFIG_MMU_PAGE_SIZE;
223 (void)sys_bitarray_set_region(&virt_region_bitmap,
224 num_bits, offset);
225
226 virt_region_inited = true;
227}
228
229static void *virt_region_alloc(size_t size)
230{
231 uintptr_t dest_addr;
232 size_t offset;
233 size_t num_bits;
234 int ret;
235
236 if (unlikely(!virt_region_inited)) {
237 virt_region_init();
238 }
239
240 num_bits = size / CONFIG_MMU_PAGE_SIZE;
241 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
242 if (ret != 0) {
Andrew Boiee35f1792020-12-09 12:18:40 -0800243 LOG_ERR("insufficient virtual address space (requested %zu)",
244 size);
245 return NULL;
246 }
247
Daniel Leungc3182902021-04-26 10:53:44 -0700248 /* Remember that bit #0 in bitmap corresponds to the highest
249 * virtual address. So here we need to go downwards (backwards?)
250 * to get the starting address of the allocated region.
251 */
252 dest_addr = virt_from_bitmap_offset(offset, size);
Andrew Boiee35f1792020-12-09 12:18:40 -0800253
Daniel Leungc3182902021-04-26 10:53:44 -0700254 /* Need to make sure this does not step into kernel memory */
255 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
256 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
257 return NULL;
258 }
259
260 return UINT_TO_POINTER(dest_addr);
261}
262
263static void virt_region_free(void *vaddr, size_t size)
264{
265 size_t offset, num_bits;
266 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
267
268 if (unlikely(!virt_region_inited)) {
269 virt_region_init();
270 }
271
272 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
273 && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR),
274 "invalid virtual address region %p (%zu)", vaddr_u8, size);
275 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
276 && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR))) {
277 return;
278 }
279
280 offset = virt_to_bitmap_offset(vaddr, size);
281 num_bits = size / CONFIG_MMU_PAGE_SIZE;
282 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
Andrew Boiee35f1792020-12-09 12:18:40 -0800283}
284
285/*
286 * Free page frames management
287 *
288 * Call all of these functions with z_mm_lock held.
Andrew Boiee4334942020-07-15 14:56:24 -0700289 */
290
Andrew Boiee35f1792020-12-09 12:18:40 -0800291/* Linked list of unused and available page frames.
292 *
293 * TODO: This is very simple and treats all free page frames as being equal.
294 * However, there are use-cases to consolidate free pages such that entire
295 * SRAM banks can be switched off to save power, and so obtaining free pages
296 * may require a more complex ontology which prefers page frames in RAM banks
297 * which are still active.
298 *
299 * This implies in the future there may be multiple slists managing physical
300 * pages. Each page frame will still just have one snode link.
Andrew Boiee4334942020-07-15 14:56:24 -0700301 */
Andrew Boiee35f1792020-12-09 12:18:40 -0800302static sys_slist_t free_page_frame_list;
Andrew Boiee4334942020-07-15 14:56:24 -0700303
Andrew Boiee35f1792020-12-09 12:18:40 -0800304/* Number of unused and available free page frames */
305size_t z_free_page_count;
306
307#define PF_ASSERT(pf, expr, fmt, ...) \
308 __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
309 ##__VA_ARGS__)
310
311/* Get an unused page frame. don't care which one, or NULL if there are none */
312static struct z_page_frame *free_page_frame_list_get(void)
313{
314 sys_snode_t *node;
315 struct z_page_frame *pf = NULL;
316
317 node = sys_slist_get(&free_page_frame_list);
318 if (node != NULL) {
319 z_free_page_count--;
320 pf = CONTAINER_OF(node, struct z_page_frame, node);
321 PF_ASSERT(pf, z_page_frame_is_available(pf),
322 "unavailable but somehow on free list");
323 }
324
325 return pf;
326}
327
328/* Release a page frame back into the list of free pages */
329static void free_page_frame_list_put(struct z_page_frame *pf)
330{
331 PF_ASSERT(pf, z_page_frame_is_available(pf),
332 "unavailable page put on free list");
333 sys_slist_append(&free_page_frame_list, &pf->node);
334 z_free_page_count++;
335}
336
337static void free_page_frame_list_init(void)
338{
339 sys_slist_init(&free_page_frame_list);
340}
341
Daniel Leungc254c582021-04-15 12:38:20 -0700342static void page_frame_free_locked(struct z_page_frame *pf)
343{
344 pf->flags = 0;
345 free_page_frame_list_put(pf);
346}
347
Andrew Boiee35f1792020-12-09 12:18:40 -0800348/*
349 * Memory Mapping
350 */
351
352/* Called after the frame is mapped in the arch layer, to update our
353 * local ontology (and do some assertions while we're at it)
354 */
355static void frame_mapped_set(struct z_page_frame *pf, void *addr)
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800356{
Andrew Boiee35f1792020-12-09 12:18:40 -0800357 PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
358 "attempted to map a reserved page frame");
359
360 /* We do allow multiple mappings for pinned page frames
361 * since we will never need to reverse map them.
362 * This is uncommon, use-cases are for things like the
363 * Zephyr equivalent of VSDOs
364 */
365 PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
366 "non-pinned and already mapped to %p", pf->addr);
367
368 pf->flags |= Z_PAGE_FRAME_MAPPED;
369 pf->addr = addr;
Andrew Boiee35f1792020-12-09 12:18:40 -0800370}
371
Daniel Leung085d3762021-04-15 18:44:56 -0700372/* Go through page frames to find the physical address mapped
373 * by a virtual address.
374 *
375 * @param[in] virt Virtual Address
376 * @param[out] phys Physical address mapped to the input virtual address
377 * if such mapping exists.
378 *
379 * @retval 0 if mapping is found and valid
380 * @retval -EFAULT if virtual address is not mapped
381 */
382static int virt_to_page_frame(void *virt, uintptr_t *phys)
383{
384 uintptr_t paddr;
385 struct z_page_frame *pf;
386 int ret = -EFAULT;
387
388 Z_PAGE_FRAME_FOREACH(paddr, pf) {
389 if (z_page_frame_is_mapped(pf)) {
390 if (virt == pf->addr) {
391 ret = 0;
392 *phys = z_page_frame_to_phys(pf);
393 break;
394 }
395 }
396 }
397
398 return ret;
399}
400__weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
401
Andrew Boiea5cb8782020-12-10 13:02:24 -0800402#ifdef CONFIG_DEMAND_PAGING
403static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
404 bool page_in, uintptr_t *location_ptr);
Daniel Leung8eea5112021-03-30 14:38:00 -0700405
406static inline void do_backing_store_page_in(uintptr_t location);
407static inline void do_backing_store_page_out(uintptr_t location);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800408#endif /* CONFIG_DEMAND_PAGING */
409
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800410/* Allocate a free page frame, and map it to a specified virtual address
411 *
412 * TODO: Add optional support for copy-on-write mappings to a zero page instead
413 * of allocating, in which case page frames will be allocated lazily as
Andrew Boiea5cb8782020-12-10 13:02:24 -0800414 * the mappings to the zero page get touched. This will avoid expensive
415 * page-ins as memory is mapped and physical RAM or backing store storage will
416 * not be used if the mapped memory is unused. The cost is an empty physical
417 * page of zeroes.
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800418 */
419static int map_anon_page(void *addr, uint32_t flags)
420{
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800421 struct z_page_frame *pf;
422 uintptr_t phys;
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400423 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
424 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800425
426 pf = free_page_frame_list_get();
427 if (pf == NULL) {
Andrew Boiea5cb8782020-12-10 13:02:24 -0800428#ifdef CONFIG_DEMAND_PAGING
429 uintptr_t location;
430 bool dirty;
431 int ret;
432
433 pf = z_eviction_select(&dirty);
434 __ASSERT(pf != NULL, "failed to get a page frame");
435 LOG_DBG("evicting %p at 0x%lx", pf->addr,
436 z_page_frame_to_phys(pf));
437 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
438 if (ret != 0) {
439 return -ENOMEM;
440 }
441 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -0700442 do_backing_store_page_out(location);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800443 }
444 pf->flags = 0;
445#else
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800446 return -ENOMEM;
Andrew Boiea5cb8782020-12-10 13:02:24 -0800447#endif /* CONFIG_DEMAND_PAGING */
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800448 }
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800449
Andrew Boie299a2cf2020-12-18 12:01:31 -0800450 phys = z_page_frame_to_phys(pf);
451 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
452
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800453 if (lock) {
454 pf->flags |= Z_PAGE_FRAME_PINNED;
455 }
456 frame_mapped_set(pf, addr);
457
Andrew Boiea5cb8782020-12-10 13:02:24 -0800458 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
459
460 if (!uninit) {
461 /* If we later implement mappings to a copy-on-write
462 * zero page, won't need this step
463 */
464 memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
465 }
466
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800467 return 0;
468}
469
470void *k_mem_map(size_t size, uint32_t flags)
Carlo Caione302a36a2021-02-04 10:01:18 +0100471{
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800472 uint8_t *dst;
Daniel Leungfe48f5a2021-04-14 11:55:47 -0700473 size_t total_size;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800474 int ret;
475 k_spinlock_key_t key;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800476 uint8_t *pos;
477
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400478 __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
479 ((flags & K_MEM_MAP_UNINIT) != 0U)),
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800480 "user access to anonymous uninitialized pages is forbidden");
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400481 __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800482 "unaligned size %zu passed to %s", size, __func__);
483 __ASSERT(size != 0, "zero sized memory mapping");
484 __ASSERT(page_frames_initialized, "%s called too early", __func__);
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400485 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800486 "%s does not support explicit cache settings", __func__);
487
488 key = k_spin_lock(&z_mm_lock);
489
Daniel Leungfe48f5a2021-04-14 11:55:47 -0700490 /* Need extra for the guard pages (before and after) which we
491 * won't map.
492 */
493 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800494
Daniel Leungc3182902021-04-26 10:53:44 -0700495 dst = virt_region_alloc(total_size);
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800496 if (dst == NULL) {
497 /* Address space has no free region */
498 goto out;
499 }
Daniel Leungfe48f5a2021-04-14 11:55:47 -0700500
501 /* Unmap both guard pages to make sure accessing them
502 * will generate fault.
503 */
504 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
505 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
506 CONFIG_MMU_PAGE_SIZE);
507
508 /* Skip over the "before" guard page in returned address. */
509 dst += CONFIG_MMU_PAGE_SIZE;
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800510
511 VIRT_FOREACH(dst, size, pos) {
512 ret = map_anon_page(pos, flags);
513
514 if (ret != 0) {
515 /* TODO: call k_mem_unmap(dst, pos - dst) when
516 * implmented in #28990 and release any guard virtual
517 * page as well.
518 */
519 dst = NULL;
520 goto out;
521 }
522 }
Andrew Boie8ccec8e2020-12-16 18:56:02 -0800523out:
524 k_spin_unlock(&z_mm_lock, key);
525 return dst;
526}
Andrew Boiee35f1792020-12-09 12:18:40 -0800527
Daniel Leungc254c582021-04-15 12:38:20 -0700528void k_mem_unmap(void *addr, size_t size)
529{
530 uintptr_t phys;
531 uint8_t *pos;
532 struct z_page_frame *pf;
533 k_spinlock_key_t key;
Daniel Leungc3182902021-04-26 10:53:44 -0700534 size_t total_size;
Daniel Leungc254c582021-04-15 12:38:20 -0700535 int ret;
536
537 /* Need space for the "before" guard page */
538 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
539
540 /* Make sure address range is still valid after accounting
541 * for two guard pages.
542 */
543 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
544 z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
545
546 key = k_spin_lock(&z_mm_lock);
547
548 /* Check if both guard pages are unmapped.
549 * Bail if not, as this is probably a region not mapped
550 * using k_mem_map().
551 */
552 pos = addr;
553 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
554 if (ret == 0) {
555 __ASSERT(ret == 0,
556 "%s: cannot find preceding guard page for (%p, %zu)",
557 __func__, addr, size);
558 goto out;
559 }
560
561 ret = arch_page_phys_get(pos + size, NULL);
562 if (ret == 0) {
563 __ASSERT(ret == 0,
564 "%s: cannot find succeeding guard page for (%p, %zu)",
565 __func__, addr, size);
566 goto out;
567 }
568
569 VIRT_FOREACH(addr, size, pos) {
570 ret = arch_page_phys_get(pos, &phys);
571
572 __ASSERT(ret == 0,
573 "%s: cannot unmap an unmapped address %p",
574 __func__, pos);
575 if (ret != 0) {
576 /* Found an address not mapped. Do not continue. */
577 goto out;
578 }
579
580 __ASSERT(z_is_page_frame(phys),
581 "%s: 0x%lx is not a page frame", __func__, phys);
582 if (!z_is_page_frame(phys)) {
583 /* Physical address has no corresponding page frame
584 * description in the page frame array.
585 * This should not happen. Do not continue.
586 */
587 goto out;
588 }
589
590 /* Grab the corresponding page frame from physical address */
591 pf = z_phys_to_page_frame(phys);
592
593 __ASSERT(z_page_frame_is_mapped(pf),
594 "%s: 0x%lx is not a mapped page frame", __func__, phys);
595 if (!z_page_frame_is_mapped(pf)) {
596 /* Page frame is not marked mapped.
597 * This should not happen. Do not continue.
598 */
599 goto out;
600 }
601
602 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
603
604 /* Put the page frame back into free list */
605 page_frame_free_locked(pf);
606 }
607
Daniel Leungc3182902021-04-26 10:53:44 -0700608 /* There are guard pages just before and after the mapped
609 * region. So we also need to free them from the bitmap.
610 */
611 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
612 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
613 virt_region_free(pos, total_size);
614
Daniel Leungc254c582021-04-15 12:38:20 -0700615out:
616 k_spin_unlock(&z_mm_lock, key);
617}
618
Andrew Boie5db615b2020-12-18 11:50:58 -0800619size_t k_mem_free_get(void)
620{
621 size_t ret;
622 k_spinlock_key_t key;
623
624 __ASSERT(page_frames_initialized, "%s called too early", __func__);
625
626 key = k_spin_lock(&z_mm_lock);
627 ret = z_free_page_count;
628 k_spin_unlock(&z_mm_lock, key);
629
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400630 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
Andrew Boie5db615b2020-12-18 11:50:58 -0800631}
632
Andrew Boiee35f1792020-12-09 12:18:40 -0800633/* This may be called from arch early boot code before z_cstart() is invoked.
634 * Data will be copied and BSS zeroed, but this must not rely on any
635 * initialization functions being called prior to work correctly.
636 */
637void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
638{
639 uintptr_t aligned_phys, addr_offset;
640 size_t aligned_size;
Andrew Boiee35f1792020-12-09 12:18:40 -0800641 k_spinlock_key_t key;
642 uint8_t *dest_addr;
643
644 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
645 phys, size,
646 CONFIG_MMU_PAGE_SIZE);
Anas Nashifbbbc38b2021-03-29 10:03:49 -0400647 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
Andrew Boiee35f1792020-12-09 12:18:40 -0800648 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
649 "wraparound for physical address 0x%lx (size %zu)",
650 aligned_phys, aligned_size);
651
652 key = k_spin_lock(&z_mm_lock);
653 /* Obtain an appropriately sized chunk of virtual memory */
Daniel Leungc3182902021-04-26 10:53:44 -0700654 dest_addr = virt_region_alloc(aligned_size);
Andrew Boiee35f1792020-12-09 12:18:40 -0800655 if (!dest_addr) {
656 goto fail;
657 }
658
659 /* If this fails there's something amiss with virt_region_get */
660 __ASSERT((uintptr_t)dest_addr <
661 ((uintptr_t)dest_addr + (size - 1)),
662 "wraparound for virtual address %p (size %zu)",
663 dest_addr, size);
664
665 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
666 aligned_phys, aligned_size, flags, addr_offset);
667
Andrew Boie299a2cf2020-12-18 12:01:31 -0800668 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
Andrew Boiee35f1792020-12-09 12:18:40 -0800669 k_spin_unlock(&z_mm_lock, key);
670
671 *virt_ptr = dest_addr + addr_offset;
672 return;
673fail:
674 /* May re-visit this in the future, but for now running out of
675 * virtual address space or failing the arch_mem_map() call is
676 * an unrecoverable situation.
677 *
678 * Other problems not related to resource exhaustion we leave as
679 * assertions since they are clearly programming mistakes.
680 */
681 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
682 phys, size, flags);
683 k_panic();
684}
685
Daniel Leunge6df25f2021-04-14 12:57:03 -0700686void z_phys_unmap(uint8_t *virt, size_t size)
687{
688 uintptr_t aligned_virt, addr_offset;
689 size_t aligned_size;
690 k_spinlock_key_t key;
691
692 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
693 POINTER_TO_UINT(virt), size,
694 CONFIG_MMU_PAGE_SIZE);
695 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
696 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
697 "wraparound for virtual address 0x%lx (size %zu)",
698 aligned_virt, aligned_size);
699
700 key = k_spin_lock(&z_mm_lock);
701 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
Daniel Leungc3182902021-04-26 10:53:44 -0700702 virt_region_free(virt, size);
Daniel Leunge6df25f2021-04-14 12:57:03 -0700703 k_spin_unlock(&z_mm_lock, key);
704}
705
Andrew Boiee35f1792020-12-09 12:18:40 -0800706/*
707 * Miscellaneous
708 */
709
Anas Nashif25c87db2021-03-29 10:54:23 -0400710size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
711 uintptr_t addr, size_t size, size_t align)
Andrew Boie06cf6d22020-06-26 16:17:00 -0700712{
713 size_t addr_offset;
714
715 /* The actual mapped region must be page-aligned. Round down the
716 * physical address and pad the region size appropriately
717 */
Anas Nashif25c87db2021-03-29 10:54:23 -0400718 *aligned_addr = ROUND_DOWN(addr, align);
719 addr_offset = addr - *aligned_addr;
Andrew Boie06cf6d22020-06-26 16:17:00 -0700720 *aligned_size = ROUND_UP(size + addr_offset, align);
721
722 return addr_offset;
723}
724
Andrew Boiee35f1792020-12-09 12:18:40 -0800725void z_mem_manage_init(void)
Andrew Boie06cf6d22020-06-26 16:17:00 -0700726{
Andrew Boiee35f1792020-12-09 12:18:40 -0800727 uintptr_t phys;
728 uint8_t *addr;
729 struct z_page_frame *pf;
730 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
Andrew Boie06cf6d22020-06-26 16:17:00 -0700731
Andrew Boiee35f1792020-12-09 12:18:40 -0800732 free_page_frame_list_init();
Andrew Boie06cf6d22020-06-26 16:17:00 -0700733
Andrew Boiee35f1792020-12-09 12:18:40 -0800734#ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
735 /* If some page frames are unavailable for use as memory, arch
736 * code will mark Z_PAGE_FRAME_RESERVED in their flags
Andrew Boie06cf6d22020-06-26 16:17:00 -0700737 */
Andrew Boiee35f1792020-12-09 12:18:40 -0800738 arch_reserved_pages_update();
739#endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
740
741 /* All pages composing the Zephyr image are mapped at boot in a
742 * predictable way. This can change at runtime.
743 */
744 VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
745 {
Andrew Boie14c5d1f2021-01-23 14:08:12 -0800746 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
Andrew Boie611b6262020-12-18 16:06:09 -0800747 frame_mapped_set(pf, addr);
748
749 /* TODO: for now we pin the whole Zephyr image. Demand paging
750 * currently tested with anonymously-mapped pages which are not
751 * pinned.
752 *
753 * We will need to setup linker regions for a subset of kernel
754 * code/data pages which are pinned in memory and
755 * may not be evicted. This will contain critical CPU data
756 * structures, and any code used to perform page fault
757 * handling, page-ins, etc.
758 */
759 pf->flags |= Z_PAGE_FRAME_PINNED;
Andrew Boiee4334942020-07-15 14:56:24 -0700760 }
Andrew Boie06cf6d22020-06-26 16:17:00 -0700761
Daniel Leung1310ad62021-02-23 13:33:38 -0800762#ifdef CONFIG_LINKER_USE_PINNED_SECTION
763 /* Pin the page frames correspondng to the pinned symbols */
764 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(lnkr_pinned_start),
765 CONFIG_MMU_PAGE_SIZE);
766 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(lnkr_pinned_end),
767 CONFIG_MMU_PAGE_SIZE);
768 size_t pinned_size = pinned_end - pinned_start;
769
770 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
771 {
772 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
773 frame_mapped_set(pf, addr);
774
775 pf->flags |= Z_PAGE_FRAME_PINNED;
776 }
777#endif
778
Andrew Boiee35f1792020-12-09 12:18:40 -0800779 /* Any remaining pages that aren't mapped, reserved, or pinned get
780 * added to the free pages list
781 */
782 Z_PAGE_FRAME_FOREACH(phys, pf) {
783 if (z_page_frame_is_available(pf)) {
784 free_page_frame_list_put(pf);
785 }
Andrew Boie06cf6d22020-06-26 16:17:00 -0700786 }
Andrew Boiee35f1792020-12-09 12:18:40 -0800787 LOG_DBG("free page frames: %zu", z_free_page_count);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800788
789#ifdef CONFIG_DEMAND_PAGING
Daniel Leung8eea5112021-03-30 14:38:00 -0700790#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
791 z_paging_histogram_init();
792#endif
Andrew Boiea5cb8782020-12-10 13:02:24 -0800793 z_backing_store_init();
794 z_eviction_init();
795#endif
Andrew Boiee35f1792020-12-09 12:18:40 -0800796#if __ASSERT_ON
797 page_frames_initialized = true;
798#endif
799 k_spin_unlock(&z_mm_lock, key);
Andrew Boie06cf6d22020-06-26 16:17:00 -0700800}
Andrew Boiea5cb8782020-12-10 13:02:24 -0800801
802#ifdef CONFIG_DEMAND_PAGING
Daniel Leungae865192021-03-26 12:03:42 -0700803
804#ifdef CONFIG_DEMAND_PAGING_STATS
805struct k_mem_paging_stats_t paging_stats;
Daniel Leung8eea5112021-03-30 14:38:00 -0700806extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
807extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
808extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
Daniel Leungae865192021-03-26 12:03:42 -0700809#endif
Andrew Boie60d30662021-01-14 12:22:18 -0800810
Daniel Leung8eea5112021-03-30 14:38:00 -0700811static inline void do_backing_store_page_in(uintptr_t location)
812{
813#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
814 uint32_t time_diff;
Daniel Leung09e8db32021-03-31 13:56:05 -0700815
816#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
817 timing_t time_start, time_end;
818
819 time_start = timing_counter_get();
820#else
Daniel Leung8eea5112021-03-30 14:38:00 -0700821 uint32_t time_start;
822
823 time_start = k_cycle_get_32();
Daniel Leung09e8db32021-03-31 13:56:05 -0700824#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -0700825#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
826
827 z_backing_store_page_in(location);
828
829#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung09e8db32021-03-31 13:56:05 -0700830#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
831 time_end = timing_counter_get();
832 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
833#else
Daniel Leung8eea5112021-03-30 14:38:00 -0700834 time_diff = k_cycle_get_32() - time_start;
Daniel Leung09e8db32021-03-31 13:56:05 -0700835#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -0700836
837 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
838 time_diff);
839#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
840}
841
842static inline void do_backing_store_page_out(uintptr_t location)
843{
844#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
845 uint32_t time_diff;
Daniel Leung09e8db32021-03-31 13:56:05 -0700846
847#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
848 timing_t time_start, time_end;
849
850 time_start = timing_counter_get();
851#else
Daniel Leung8eea5112021-03-30 14:38:00 -0700852 uint32_t time_start;
853
854 time_start = k_cycle_get_32();
Daniel Leung09e8db32021-03-31 13:56:05 -0700855#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -0700856#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
857
858 z_backing_store_page_out(location);
859
860#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung09e8db32021-03-31 13:56:05 -0700861#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
862 time_end = timing_counter_get();
863 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
864#else
Daniel Leung8eea5112021-03-30 14:38:00 -0700865 time_diff = k_cycle_get_32() - time_start;
Daniel Leung09e8db32021-03-31 13:56:05 -0700866#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -0700867
868 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
869 time_diff);
870#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
871}
872
Andrew Boiea5cb8782020-12-10 13:02:24 -0800873/* Current implementation relies on interrupt locking to any prevent page table
874 * access, which falls over if other CPUs are active. Addressing this is not
875 * as simple as using spinlocks as regular memory reads/writes constitute
876 * "access" in this sense.
877 *
878 * Current needs for demand paging are on uniprocessor systems.
879 */
880BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
881
882static void virt_region_foreach(void *addr, size_t size,
883 void (*func)(void *))
884{
885 z_mem_assert_virtual_region(addr, size);
886
887 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
888 func((uint8_t *)addr + offset);
889 }
890}
891
Andrew Boiea5cb8782020-12-10 13:02:24 -0800892/*
893 * Perform some preparatory steps before paging out. The provided page frame
894 * must be evicted to the backing store immediately after this is called
895 * with a call to z_backing_store_page_out() if it contains a data page.
896 *
897 * - Map page frame to scratch area if requested. This always is true if we're
898 * doing a page fault, but is only set on manual evictions if the page is
899 * dirty.
900 * - If mapped:
901 * - obtain backing store location and populate location parameter
902 * - Update page tables with location
903 * - Mark page frame as busy
904 *
905 * Returns -ENOMEM if the backing store is full
906 */
907static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
Andrew Boiec7be5dd2021-01-15 12:07:45 -0800908 bool page_fault, uintptr_t *location_ptr)
Andrew Boiea5cb8782020-12-10 13:02:24 -0800909{
910 uintptr_t phys;
911 int ret;
912 bool dirty = *dirty_ptr;
913
914 phys = z_page_frame_to_phys(pf);
915 __ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
916 phys);
917
918 /* If the backing store doesn't have a copy of the page, even if it
919 * wasn't modified, treat as dirty. This can happen for a few
920 * reasons:
921 * 1) Page has never been swapped out before, and the backing store
922 * wasn't pre-populated with this data page.
923 * 2) Page was swapped out before, but the page contents were not
924 * preserved after swapping back in.
925 * 3) Page contents were preserved when swapped back in, but were later
926 * evicted from the backing store to make room for other evicted
927 * pages.
928 */
929 if (z_page_frame_is_mapped(pf)) {
930 dirty = dirty || !z_page_frame_is_backed(pf);
931 }
932
Andrew Boiec7be5dd2021-01-15 12:07:45 -0800933 if (dirty || page_fault) {
Andrew Boiea5cb8782020-12-10 13:02:24 -0800934 arch_mem_scratch(phys);
935 }
936
937 if (z_page_frame_is_mapped(pf)) {
Andrew Boiec7be5dd2021-01-15 12:07:45 -0800938 ret = z_backing_store_location_get(pf, location_ptr,
939 page_fault);
Andrew Boiea5cb8782020-12-10 13:02:24 -0800940 if (ret != 0) {
941 LOG_ERR("out of backing store memory");
942 return -ENOMEM;
943 }
944 arch_mem_page_out(pf->addr, *location_ptr);
945 } else {
946 /* Shouldn't happen unless this function is mis-used */
947 __ASSERT(!dirty, "un-mapped page determined to be dirty");
948 }
949#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
950 /* Mark as busy so that z_page_frame_is_evictable() returns false */
951 __ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
952 phys);
953 pf->flags |= Z_PAGE_FRAME_BUSY;
954#endif
955 /* Update dirty parameter, since we set to true if it wasn't backed
956 * even if otherwise clean
957 */
958 *dirty_ptr = dirty;
959
960 return 0;
961}
962
963static int do_mem_evict(void *addr)
964{
965 bool dirty;
966 struct z_page_frame *pf;
967 uintptr_t location;
968 int key, ret;
969 uintptr_t flags, phys;
970
971#if CONFIG_DEMAND_PAGING_ALLOW_IRQ
972 __ASSERT(!k_is_in_isr(),
973 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
974 __func__);
975 k_sched_lock();
976#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
977 key = irq_lock();
978 flags = arch_page_info_get(addr, &phys, false);
979 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
980 "address %p isn't mapped", addr);
981 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
982 /* Un-mapped or already evicted. Nothing to do */
983 ret = 0;
984 goto out;
985 }
986
987 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
988 pf = z_phys_to_page_frame(phys);
989 __ASSERT(pf->addr == addr, "page frame address mismatch");
990 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
991 if (ret != 0) {
992 goto out;
993 }
994
995 __ASSERT(ret == 0, "failed to prepare page frame");
996#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
997 irq_unlock(key);
998#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
999 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -07001000 do_backing_store_page_out(location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001001 }
1002#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1003 key = irq_lock();
1004#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1005 page_frame_free_locked(pf);
1006out:
1007 irq_unlock(key);
1008#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1009 k_sched_unlock();
1010#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1011 return ret;
1012}
1013
Andrew Boie6c97ab32021-01-20 17:03:13 -08001014int k_mem_page_out(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001015{
1016 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1017 addr);
1018 z_mem_assert_virtual_region(addr, size);
1019
1020 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1021 void *pos = (uint8_t *)addr + offset;
1022 int ret;
1023
1024 ret = do_mem_evict(pos);
1025 if (ret != 0) {
1026 return ret;
1027 }
1028 }
1029
1030 return 0;
1031}
1032
1033int z_page_frame_evict(uintptr_t phys)
1034{
1035 int key, ret;
1036 struct z_page_frame *pf;
1037 bool dirty;
1038 uintptr_t flags;
1039 uintptr_t location;
1040
1041 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1042 __func__, phys);
1043
1044 /* Implementation is similar to do_page_fault() except there is no
1045 * data page to page-in, see comments in that function.
1046 */
1047
1048#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1049 __ASSERT(!k_is_in_isr(),
1050 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1051 __func__);
1052 k_sched_lock();
1053#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1054 key = irq_lock();
1055 pf = z_phys_to_page_frame(phys);
1056 if (!z_page_frame_is_mapped(pf)) {
1057 /* Nothing to do, free page */
1058 ret = 0;
1059 goto out;
1060 }
1061 flags = arch_page_info_get(pf->addr, NULL, false);
1062 /* Shouldn't ever happen */
1063 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1064 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1065 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1066 if (ret != 0) {
1067 goto out;
1068 }
1069
1070#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1071 irq_unlock(key);
1072#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1073 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -07001074 do_backing_store_page_out(location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001075 }
1076#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1077 key = irq_lock();
1078#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1079 page_frame_free_locked(pf);
1080out:
1081 irq_unlock(key);
1082#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1083 k_sched_unlock();
1084#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1085 return ret;
1086}
1087
Daniel Leungae865192021-03-26 12:03:42 -07001088static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1089 int key)
1090{
1091#ifdef CONFIG_DEMAND_PAGING_STATS
1092 bool is_irq_unlocked = arch_irq_unlocked(key);
1093
1094 paging_stats.pagefaults.cnt++;
1095
1096 if (is_irq_unlocked) {
1097 paging_stats.pagefaults.irq_unlocked++;
1098 } else {
1099 paging_stats.pagefaults.irq_locked++;
1100 }
1101
1102#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1103 faulting_thread->paging_stats.pagefaults.cnt++;
1104
1105 if (is_irq_unlocked) {
1106 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1107 } else {
1108 faulting_thread->paging_stats.pagefaults.irq_locked++;
1109 }
1110#else
1111 ARG_UNUSED(faulting_thread);
1112#endif
1113
1114#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1115 if (k_is_in_isr()) {
1116 paging_stats.pagefaults.in_isr++;
1117
1118#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1119 faulting_thread->paging_stats.pagefaults.in_isr++;
1120#endif
1121 }
1122#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1123#endif /* CONFIG_DEMAND_PAGING_STATS */
1124}
1125
1126static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1127 bool dirty)
1128{
1129#ifdef CONFIG_DEMAND_PAGING_STATS
1130 if (dirty) {
1131 paging_stats.eviction.dirty++;
1132 } else {
1133 paging_stats.eviction.clean++;
1134 }
1135#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1136 if (dirty) {
1137 faulting_thread->paging_stats.eviction.dirty++;
1138 } else {
1139 faulting_thread->paging_stats.eviction.clean++;
1140 }
1141#else
1142 ARG_UNUSED(faulting_thread);
1143#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1144#endif /* CONFIG_DEMAND_PAGING_STATS */
1145}
1146
Daniel Leung8eea5112021-03-30 14:38:00 -07001147static inline struct z_page_frame *do_eviction_select(bool *dirty)
1148{
1149 struct z_page_frame *pf;
1150
1151#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung8eea5112021-03-30 14:38:00 -07001152 uint32_t time_diff;
Daniel Leung09e8db32021-03-31 13:56:05 -07001153
1154#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1155 timing_t time_start, time_end;
1156
1157 time_start = timing_counter_get();
1158#else
1159 uint32_t time_start;
1160
1161 time_start = k_cycle_get_32();
1162#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
Daniel Leung8eea5112021-03-30 14:38:00 -07001163#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1164
1165 pf = z_eviction_select(dirty);
1166
1167#ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
Daniel Leung09e8db32021-03-31 13:56:05 -07001168#ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1169 time_end = timing_counter_get();
1170 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1171#else
Daniel Leung8eea5112021-03-30 14:38:00 -07001172 time_diff = k_cycle_get_32() - time_start;
Daniel Leung09e8db32021-03-31 13:56:05 -07001173#endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1174
Daniel Leung8eea5112021-03-30 14:38:00 -07001175 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1176#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1177
1178 return pf;
1179}
1180
Andrew Boiea5cb8782020-12-10 13:02:24 -08001181static bool do_page_fault(void *addr, bool pin)
1182{
1183 struct z_page_frame *pf;
1184 int key, ret;
1185 uintptr_t page_in_location, page_out_location;
1186 enum arch_page_location status;
1187 bool result;
1188 bool dirty = false;
Daniel Leungae865192021-03-26 12:03:42 -07001189 struct k_thread *faulting_thread = _current_cpu->current;
Andrew Boiea5cb8782020-12-10 13:02:24 -08001190
1191 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1192 addr);
1193
1194 LOG_DBG("page fault at %p", addr);
1195
1196 /*
1197 * TODO: Add performance accounting:
Andrew Boiea5cb8782020-12-10 13:02:24 -08001198 * - z_eviction_select() metrics
Andrew Boiea5cb8782020-12-10 13:02:24 -08001199 * * periodic timer execution time histogram (if implemented)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001200 */
1201
1202#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1203 /* We lock the scheduler so that other threads are never scheduled
1204 * during the page-in/out operation.
1205 *
1206 * We do however re-enable interrupts during the page-in/page-out
1207 * operation iff interrupts were enabled when the exception was taken;
1208 * in this configuration page faults in an ISR are a bug; all their
1209 * code/data must be pinned.
1210 *
1211 * If interrupts were disabled when the exception was taken, the
1212 * arch code is responsible for keeping them that way when entering
1213 * this function.
1214 *
1215 * If this is not enabled, then interrupts are always locked for the
1216 * entire operation. This is far worse for system interrupt latency
1217 * but requires less pinned pages and ISRs may also take page faults.
1218 *
1219 * Support for allowing z_backing_store_page_out() and
1220 * z_backing_store_page_in() to also sleep and allow other threads to
1221 * run (such as in the case where the transfer is async DMA) is not
1222 * implemented. Even if limited to thread context, arbitrary memory
1223 * access triggering exceptions that put a thread to sleep on a
1224 * contended page fault operation will break scheduling assumptions of
1225 * cooperative threads or threads that implement crticial sections with
1226 * spinlocks or disabling IRQs.
1227 */
1228 k_sched_lock();
1229 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1230#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1231
1232 key = irq_lock();
1233 status = arch_page_location_get(addr, &page_in_location);
1234 if (status == ARCH_PAGE_LOCATION_BAD) {
1235 /* Return false to treat as a fatal error */
1236 result = false;
1237 goto out;
1238 }
1239 result = true;
Daniel Leungae865192021-03-26 12:03:42 -07001240
1241 paging_stats_faults_inc(faulting_thread, key);
1242
Andrew Boiea5cb8782020-12-10 13:02:24 -08001243 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1244 if (pin) {
1245 /* It's a physical memory address */
1246 uintptr_t phys = page_in_location;
1247
1248 pf = z_phys_to_page_frame(phys);
1249 pf->flags |= Z_PAGE_FRAME_PINNED;
1250 }
1251 /* We raced before locking IRQs, re-try */
1252 goto out;
1253 }
1254 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1255 "unexpected status value %d", status);
1256
1257 pf = free_page_frame_list_get();
1258 if (pf == NULL) {
1259 /* Need to evict a page frame */
Daniel Leung8eea5112021-03-30 14:38:00 -07001260 pf = do_eviction_select(&dirty);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001261 __ASSERT(pf != NULL, "failed to get a page frame");
1262 LOG_DBG("evicting %p at 0x%lx", pf->addr,
1263 z_page_frame_to_phys(pf));
Daniel Leungae865192021-03-26 12:03:42 -07001264
1265 paging_stats_eviction_inc(faulting_thread, dirty);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001266 }
1267 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1268 __ASSERT(ret == 0, "failed to prepare page frame");
1269
1270#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1271 irq_unlock(key);
1272 /* Interrupts are now unlocked if they were not locked when we entered
1273 * this function, and we may service ISRs. The scheduler is still
1274 * locked.
1275 */
1276#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1277 if (dirty) {
Daniel Leung8eea5112021-03-30 14:38:00 -07001278 do_backing_store_page_out(page_out_location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001279 }
Daniel Leung8eea5112021-03-30 14:38:00 -07001280 do_backing_store_page_in(page_in_location);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001281
1282#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1283 key = irq_lock();
1284 pf->flags &= ~Z_PAGE_FRAME_BUSY;
1285#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1286 if (pin) {
1287 pf->flags |= Z_PAGE_FRAME_PINNED;
1288 }
1289 pf->flags |= Z_PAGE_FRAME_MAPPED;
1290 pf->addr = addr;
1291 arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1292 z_backing_store_page_finalize(pf, page_in_location);
1293out:
1294 irq_unlock(key);
1295#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1296 k_sched_unlock();
1297#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1298
1299 return result;
1300}
1301
1302static void do_page_in(void *addr)
1303{
1304 bool ret;
1305
1306 ret = do_page_fault(addr, false);
1307 __ASSERT(ret, "unmapped memory address %p", addr);
1308 (void)ret;
1309}
1310
Andrew Boie6c97ab32021-01-20 17:03:13 -08001311void k_mem_page_in(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001312{
1313 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1314 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1315 __func__);
1316 virt_region_foreach(addr, size, do_page_in);
1317}
1318
1319static void do_mem_pin(void *addr)
1320{
1321 bool ret;
1322
1323 ret = do_page_fault(addr, true);
1324 __ASSERT(ret, "unmapped memory address %p", addr);
1325 (void)ret;
1326}
1327
Andrew Boie6c97ab32021-01-20 17:03:13 -08001328void k_mem_pin(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001329{
1330 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1331 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1332 __func__);
1333 virt_region_foreach(addr, size, do_mem_pin);
1334}
1335
1336bool z_page_fault(void *addr)
1337{
Daniel Leungae865192021-03-26 12:03:42 -07001338 return do_page_fault(addr, false);
Andrew Boiea5cb8782020-12-10 13:02:24 -08001339}
1340
1341static void do_mem_unpin(void *addr)
1342{
1343 struct z_page_frame *pf;
1344 int key;
1345 uintptr_t flags, phys;
1346
1347 key = irq_lock();
1348 flags = arch_page_info_get(addr, &phys, false);
1349 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1350 "invalid data page at %p", addr);
1351 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1352 pf = z_phys_to_page_frame(phys);
1353 pf->flags &= ~Z_PAGE_FRAME_PINNED;
1354 }
1355 irq_unlock(key);
1356}
1357
Andrew Boie6c97ab32021-01-20 17:03:13 -08001358void k_mem_unpin(void *addr, size_t size)
Andrew Boiea5cb8782020-12-10 13:02:24 -08001359{
1360 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1361 addr);
1362 virt_region_foreach(addr, size, do_mem_unpin);
1363}
Daniel Leungae865192021-03-26 12:03:42 -07001364
Andrew Boiea5cb8782020-12-10 13:02:24 -08001365#endif /* CONFIG_DEMAND_PAGING */