Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2020 Intel Corporation |
| 3 | * |
| 4 | * SPDX-License-Identifier: Apache-2.0 |
| 5 | * |
| 6 | * Routines for managing virtual address spaces |
| 7 | */ |
| 8 | |
Krzysztof Chruscinski | 3ed8083 | 2020-11-26 19:32:34 +0100 | [diff] [blame] | 9 | #include <stdint.h> |
| 10 | #include <kernel_arch_interface.h> |
| 11 | #include <spinlock.h> |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 12 | #include <mmu.h> |
| 13 | #include <init.h> |
| 14 | #include <kernel_internal.h> |
| 15 | #include <linker/linker-defs.h> |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 16 | #include <logging/log.h> |
Krzysztof Chruscinski | 3ed8083 | 2020-11-26 19:32:34 +0100 | [diff] [blame] | 17 | LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL); |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 18 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 19 | /* |
| 20 | * General terminology: |
| 21 | * - A page frame is a page-sized physical memory region in RAM. It is a |
| 22 | * container where a data page may be placed. It is always referred to by |
| 23 | * physical address. We have a convention of using uintptr_t for physical |
| 24 | * addresses. We instantiate a struct z_page_frame to store metadata for |
| 25 | * every page frame. |
| 26 | * |
| 27 | * - A data page is a page-sized region of data. It may exist in a page frame, |
| 28 | * or be paged out to some backing store. Its location can always be looked |
| 29 | * up in the CPU's page tables (or equivalent) by virtual address. |
| 30 | * The data type will always be void * or in some cases uint8_t * when we |
| 31 | * want to do pointer arithmetic. |
| 32 | */ |
| 33 | |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 34 | /* Spinlock to protect any globals in this file and serialize page table |
| 35 | * updates in arch code |
| 36 | */ |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 37 | struct k_spinlock z_mm_lock; |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 38 | |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 39 | /* |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 40 | * General page frame management |
| 41 | */ |
| 42 | |
| 43 | /* Database of all RAM page frames */ |
| 44 | struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES]; |
| 45 | |
| 46 | #if __ASSERT_ON |
| 47 | /* Indicator that z_page_frames has been initialized, many of these APIs do |
| 48 | * not work before POST_KERNEL |
| 49 | */ |
| 50 | static bool page_frames_initialized; |
| 51 | #endif |
| 52 | |
| 53 | /* Add colors to page table dumps to indicate mapping type */ |
| 54 | #define COLOR_PAGE_FRAMES 1 |
| 55 | |
| 56 | #if COLOR_PAGE_FRAMES |
| 57 | #define ANSI_DEFAULT "\x1B[0m" |
| 58 | #define ANSI_RED "\x1B[1;31m" |
| 59 | #define ANSI_GREEN "\x1B[1;32m" |
| 60 | #define ANSI_YELLOW "\x1B[1;33m" |
| 61 | #define ANSI_BLUE "\x1B[1;34m" |
| 62 | #define ANSI_MAGENTA "\x1B[1;35m" |
| 63 | #define ANSI_CYAN "\x1B[1;36m" |
| 64 | #define ANSI_GREY "\x1B[1;90m" |
| 65 | |
| 66 | #define COLOR(x) printk(_CONCAT(ANSI_, x)) |
| 67 | #else |
| 68 | #define COLOR(x) do { } while (0) |
| 69 | #endif |
| 70 | |
| 71 | static void page_frame_dump(struct z_page_frame *pf) |
| 72 | { |
| 73 | if (z_page_frame_is_reserved(pf)) { |
| 74 | COLOR(CYAN); |
| 75 | printk("R"); |
| 76 | } else if (z_page_frame_is_busy(pf)) { |
| 77 | COLOR(MAGENTA); |
| 78 | printk("B"); |
| 79 | } else if (z_page_frame_is_pinned(pf)) { |
| 80 | COLOR(YELLOW); |
| 81 | printk("P"); |
| 82 | } else if (z_page_frame_is_available(pf)) { |
| 83 | COLOR(GREY); |
| 84 | printk("."); |
| 85 | } else if (z_page_frame_is_mapped(pf)) { |
| 86 | COLOR(DEFAULT); |
| 87 | printk("M"); |
| 88 | } else { |
| 89 | COLOR(RED); |
| 90 | printk("?"); |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | void z_page_frames_dump(void) |
| 95 | { |
| 96 | int column = 0; |
| 97 | |
| 98 | __ASSERT(page_frames_initialized, "%s called too early", __func__); |
| 99 | printk("Physical memory from 0x%lx to 0x%lx\n", |
| 100 | Z_PHYS_RAM_START, Z_PHYS_RAM_END); |
| 101 | |
| 102 | for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) { |
| 103 | struct z_page_frame *pf = &z_page_frames[i]; |
| 104 | |
| 105 | page_frame_dump(pf); |
| 106 | |
| 107 | column++; |
| 108 | if (column == 64) { |
| 109 | column = 0; |
| 110 | printk("\n"); |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | COLOR(DEFAULT); |
| 115 | if (column != 0) { |
| 116 | printk("\n"); |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | #define VIRT_FOREACH(_base, _size, _pos) \ |
| 121 | for (_pos = _base; \ |
| 122 | _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE) |
| 123 | |
| 124 | #define PHYS_FOREACH(_base, _size, _pos) \ |
| 125 | for (_pos = _base; \ |
| 126 | _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE) |
| 127 | |
| 128 | /* |
| 129 | * Virtual address space management |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 130 | * |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 131 | * Call all of these functions with z_mm_lock held. |
| 132 | * |
| 133 | * Overall virtual memory map: When the kernel starts, it resides in |
| 134 | * virtual memory in the region Z_BOOT_KERNEL_VIRT_START to |
| 135 | * Z_BOOT_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit |
| 136 | * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings. |
| 137 | * |
| 138 | * +--------------+ <- Z_VIRT_ADDR_START |
| 139 | * | Undefined VM | <- May contain ancillary regions like x86_64's locore |
| 140 | * +--------------+ <- Z_BOOT_KERNEL_VIRT_START (often == Z_VIRT_ADDR_START) |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 141 | * | Mapping for | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 142 | * | main kernel | |
| 143 | * | image | |
| 144 | * | | |
| 145 | * | | |
| 146 | * +--------------+ <- Z_BOOT_KERNEL_VIRT_END |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 147 | * | | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 148 | * | Unused, | |
| 149 | * | Available VM | |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 150 | * | | |
| 151 | * |..............| <- mapping_pos (grows downward as more mappings are made) |
| 152 | * | Mapping | |
| 153 | * +--------------+ |
| 154 | * | Mapping | |
| 155 | * +--------------+ |
| 156 | * | ... | |
| 157 | * +--------------+ |
| 158 | * | Mapping | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 159 | * +--------------+ <- mappings start here |
| 160 | * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED |
| 161 | * +--------------+ <- Z_VIRT_RAM_END |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 162 | * |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 163 | * At the moment we just have one downward-growing area for mappings. |
| 164 | * There is currently no support for un-mapping memory, see #28900. |
| 165 | */ |
| 166 | static uint8_t *mapping_pos = Z_VIRT_RAM_END - Z_VM_RESERVED; |
| 167 | |
| 168 | /* Get a chunk of virtual memory and mark it as being in-use. |
| 169 | * |
| 170 | * This may be called from arch early boot code before z_cstart() is invoked. |
| 171 | * Data will be copied and BSS zeroed, but this must not rely on any |
| 172 | * initialization functions being called prior to work correctly. |
| 173 | */ |
| 174 | static void *virt_region_get(size_t size) |
| 175 | { |
| 176 | uint8_t *dest_addr; |
| 177 | |
| 178 | if ((mapping_pos - size) < Z_KERNEL_VIRT_END) { |
| 179 | LOG_ERR("insufficient virtual address space (requested %zu)", |
| 180 | size); |
| 181 | return NULL; |
| 182 | } |
| 183 | |
| 184 | mapping_pos -= size; |
| 185 | dest_addr = mapping_pos; |
| 186 | |
| 187 | return dest_addr; |
| 188 | } |
| 189 | |
| 190 | /* |
| 191 | * Free page frames management |
| 192 | * |
| 193 | * Call all of these functions with z_mm_lock held. |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 194 | */ |
| 195 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 196 | /* Linked list of unused and available page frames. |
| 197 | * |
| 198 | * TODO: This is very simple and treats all free page frames as being equal. |
| 199 | * However, there are use-cases to consolidate free pages such that entire |
| 200 | * SRAM banks can be switched off to save power, and so obtaining free pages |
| 201 | * may require a more complex ontology which prefers page frames in RAM banks |
| 202 | * which are still active. |
| 203 | * |
| 204 | * This implies in the future there may be multiple slists managing physical |
| 205 | * pages. Each page frame will still just have one snode link. |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 206 | */ |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 207 | static sys_slist_t free_page_frame_list; |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 208 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 209 | /* Number of unused and available free page frames */ |
| 210 | size_t z_free_page_count; |
| 211 | |
| 212 | #define PF_ASSERT(pf, expr, fmt, ...) \ |
| 213 | __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \ |
| 214 | ##__VA_ARGS__) |
| 215 | |
| 216 | /* Get an unused page frame. don't care which one, or NULL if there are none */ |
| 217 | static struct z_page_frame *free_page_frame_list_get(void) |
| 218 | { |
| 219 | sys_snode_t *node; |
| 220 | struct z_page_frame *pf = NULL; |
| 221 | |
| 222 | node = sys_slist_get(&free_page_frame_list); |
| 223 | if (node != NULL) { |
| 224 | z_free_page_count--; |
| 225 | pf = CONTAINER_OF(node, struct z_page_frame, node); |
| 226 | PF_ASSERT(pf, z_page_frame_is_available(pf), |
| 227 | "unavailable but somehow on free list"); |
| 228 | } |
| 229 | |
| 230 | return pf; |
| 231 | } |
| 232 | |
| 233 | /* Release a page frame back into the list of free pages */ |
| 234 | static void free_page_frame_list_put(struct z_page_frame *pf) |
| 235 | { |
| 236 | PF_ASSERT(pf, z_page_frame_is_available(pf), |
| 237 | "unavailable page put on free list"); |
| 238 | sys_slist_append(&free_page_frame_list, &pf->node); |
| 239 | z_free_page_count++; |
| 240 | } |
| 241 | |
| 242 | static void free_page_frame_list_init(void) |
| 243 | { |
| 244 | sys_slist_init(&free_page_frame_list); |
| 245 | } |
| 246 | |
| 247 | /* |
| 248 | * Memory Mapping |
| 249 | */ |
| 250 | |
| 251 | /* Called after the frame is mapped in the arch layer, to update our |
| 252 | * local ontology (and do some assertions while we're at it) |
| 253 | */ |
| 254 | static void frame_mapped_set(struct z_page_frame *pf, void *addr) |
Anas Nashif | 0417b97 | 2021-01-22 07:37:40 -0500 | [diff] [blame^] | 255 | { |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 256 | PF_ASSERT(pf, !z_page_frame_is_reserved(pf), |
| 257 | "attempted to map a reserved page frame"); |
| 258 | |
| 259 | /* We do allow multiple mappings for pinned page frames |
| 260 | * since we will never need to reverse map them. |
| 261 | * This is uncommon, use-cases are for things like the |
| 262 | * Zephyr equivalent of VSDOs |
| 263 | */ |
| 264 | PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf), |
| 265 | "non-pinned and already mapped to %p", pf->addr); |
| 266 | |
| 267 | pf->flags |= Z_PAGE_FRAME_MAPPED; |
| 268 | pf->addr = addr; |
Anas Nashif | 0417b97 | 2021-01-22 07:37:40 -0500 | [diff] [blame^] | 269 | pf->refcount++; |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 270 | } |
| 271 | |
| 272 | |
| 273 | /* This may be called from arch early boot code before z_cstart() is invoked. |
| 274 | * Data will be copied and BSS zeroed, but this must not rely on any |
| 275 | * initialization functions being called prior to work correctly. |
| 276 | */ |
| 277 | void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags) |
| 278 | { |
| 279 | uintptr_t aligned_phys, addr_offset; |
| 280 | size_t aligned_size; |
Anas Nashif | a2ec139 | 2021-01-22 07:37:32 -0500 | [diff] [blame] | 281 | int ret; |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 282 | k_spinlock_key_t key; |
| 283 | uint8_t *dest_addr; |
| 284 | |
| 285 | addr_offset = k_mem_region_align(&aligned_phys, &aligned_size, |
| 286 | phys, size, |
| 287 | CONFIG_MMU_PAGE_SIZE); |
| 288 | __ASSERT(aligned_size != 0, "0-length mapping at 0x%lx", aligned_phys); |
| 289 | __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)), |
| 290 | "wraparound for physical address 0x%lx (size %zu)", |
| 291 | aligned_phys, aligned_size); |
| 292 | |
| 293 | key = k_spin_lock(&z_mm_lock); |
| 294 | /* Obtain an appropriately sized chunk of virtual memory */ |
| 295 | dest_addr = virt_region_get(aligned_size); |
| 296 | if (!dest_addr) { |
| 297 | goto fail; |
| 298 | } |
| 299 | |
| 300 | /* If this fails there's something amiss with virt_region_get */ |
| 301 | __ASSERT((uintptr_t)dest_addr < |
| 302 | ((uintptr_t)dest_addr + (size - 1)), |
| 303 | "wraparound for virtual address %p (size %zu)", |
| 304 | dest_addr, size); |
| 305 | |
| 306 | LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr, |
| 307 | aligned_phys, aligned_size, flags, addr_offset); |
| 308 | |
Anas Nashif | a2ec139 | 2021-01-22 07:37:32 -0500 | [diff] [blame] | 309 | ret = arch_mem_map(dest_addr, aligned_phys, aligned_size, flags); |
| 310 | if (ret != 0) { |
| 311 | LOG_ERR("arch_mem_map() failed with %d", ret); |
| 312 | goto fail; |
| 313 | } |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 314 | k_spin_unlock(&z_mm_lock, key); |
| 315 | |
| 316 | *virt_ptr = dest_addr + addr_offset; |
| 317 | return; |
| 318 | fail: |
| 319 | /* May re-visit this in the future, but for now running out of |
| 320 | * virtual address space or failing the arch_mem_map() call is |
| 321 | * an unrecoverable situation. |
| 322 | * |
| 323 | * Other problems not related to resource exhaustion we leave as |
| 324 | * assertions since they are clearly programming mistakes. |
| 325 | */ |
| 326 | LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed", |
| 327 | phys, size, flags); |
| 328 | k_panic(); |
| 329 | } |
| 330 | |
| 331 | /* |
| 332 | * Miscellaneous |
| 333 | */ |
| 334 | |
| 335 | size_t k_mem_region_align(uintptr_t *aligned_phys, size_t *aligned_size, |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 336 | uintptr_t phys_addr, size_t size, size_t align) |
| 337 | { |
| 338 | size_t addr_offset; |
| 339 | |
| 340 | /* The actual mapped region must be page-aligned. Round down the |
| 341 | * physical address and pad the region size appropriately |
| 342 | */ |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 343 | *aligned_phys = ROUND_DOWN(phys_addr, align); |
| 344 | addr_offset = phys_addr - *aligned_phys; |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 345 | *aligned_size = ROUND_UP(size + addr_offset, align); |
| 346 | |
| 347 | return addr_offset; |
| 348 | } |
| 349 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 350 | #define VM_OFFSET ((CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_VM_OFFSET) - \ |
| 351 | CONFIG_SRAM_BASE_ADDRESS) |
| 352 | |
| 353 | /* Only applies to boot RAM mappings within the Zephyr image that have never |
| 354 | * been remapped or paged out. Never use this unless you know exactly what you |
| 355 | * are doing. |
| 356 | */ |
| 357 | #define BOOT_VIRT_TO_PHYS(virt) ((uintptr_t)(((uint8_t *)virt) + VM_OFFSET)) |
| 358 | |
| 359 | void z_mem_manage_init(void) |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 360 | { |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 361 | uintptr_t phys; |
| 362 | uint8_t *addr; |
| 363 | struct z_page_frame *pf; |
| 364 | k_spinlock_key_t key = k_spin_lock(&z_mm_lock); |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 365 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 366 | free_page_frame_list_init(); |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 367 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 368 | #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES |
| 369 | /* If some page frames are unavailable for use as memory, arch |
| 370 | * code will mark Z_PAGE_FRAME_RESERVED in their flags |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 371 | */ |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 372 | arch_reserved_pages_update(); |
| 373 | #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */ |
| 374 | |
| 375 | /* All pages composing the Zephyr image are mapped at boot in a |
| 376 | * predictable way. This can change at runtime. |
| 377 | */ |
| 378 | VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr) |
| 379 | { |
Anas Nashif | 75ebe4c | 2021-01-22 07:37:21 -0500 | [diff] [blame] | 380 | frame_mapped_set(z_phys_to_page_frame(BOOT_VIRT_TO_PHYS(addr)), |
| 381 | addr); |
Andrew Boie | e433494 | 2020-07-15 14:56:24 -0700 | [diff] [blame] | 382 | } |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 383 | |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 384 | /* Any remaining pages that aren't mapped, reserved, or pinned get |
| 385 | * added to the free pages list |
| 386 | */ |
| 387 | Z_PAGE_FRAME_FOREACH(phys, pf) { |
| 388 | if (z_page_frame_is_available(pf)) { |
| 389 | free_page_frame_list_put(pf); |
| 390 | } |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 391 | } |
Andrew Boie | 2ca5fb7 | 2020-12-09 12:18:40 -0800 | [diff] [blame] | 392 | LOG_DBG("free page frames: %zu", z_free_page_count); |
| 393 | #if __ASSERT_ON |
| 394 | page_frames_initialized = true; |
| 395 | #endif |
| 396 | k_spin_unlock(&z_mm_lock, key); |
Andrew Boie | 06cf6d2 | 2020-06-26 16:17:00 -0700 | [diff] [blame] | 397 | } |