kernel/mmu.c - third_party/github/zephyrproject-rtos/zephyr - Git at Google

 /*
  * Copyright (c) 2020 Intel Corporation
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Routines for managing virtual address spaces
  */

 #include <stdint.h>
 #include <kernel_arch_interface.h>
 #include <zephyr/spinlock.h>
 #include <mmu.h>
 #include <zephyr/init.h>
 #include <kernel_internal.h>
 #include <zephyr/internal/syscall_handler.h>
 #include <zephyr/toolchain.h>
 #include <zephyr/linker/linker-defs.h>
 #include <zephyr/sys/bitarray.h>
 #include <zephyr/timing/timing.h>
 #include <zephyr/logging/log.h>
 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);

 #ifdef CONFIG_DEMAND_PAGING
 #include <zephyr/kernel/mm/demand_paging.h>
 #endif

 /*
  * General terminology:
  * - A page frame is a page-sized physical memory region in RAM. It is a
  *   container where a data page may be placed. It is always referred to by
  *   physical address. We have a convention of using uintptr_t for physical
  *   addresses. We instantiate a struct z_page_frame to store metadata for
  *   every page frame.
  *
  * - A data page is a page-sized region of data. It may exist in a page frame,
  *   or be paged out to some backing store. Its location can always be looked
  *   up in the CPU's page tables (or equivalent) by virtual address.
  *   The data type will always be void * or in some cases uint8_t * when we
  *   want to do pointer arithmetic.
  */

 /* Spinlock to protect any globals in this file and serialize page table
  * updates in arch code
  */
 struct k_spinlock z_mm_lock;

 /*
  * General page frame management
  */

 /* Database of all RAM page frames */
 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];

 #if __ASSERT_ON
 /* Indicator that z_page_frames has been initialized, many of these APIs do
  * not work before POST_KERNEL
  */
 static bool page_frames_initialized;
 #endif

 /* Add colors to page table dumps to indicate mapping type */
 #define COLOR_PAGE_FRAMES	1

 #if COLOR_PAGE_FRAMES
 #define ANSI_DEFAULT "\x1B" "[0m"
 #define ANSI_RED     "\x1B" "[1;31m"
 #define ANSI_GREEN   "\x1B" "[1;32m"
 #define ANSI_YELLOW  "\x1B" "[1;33m"
 #define ANSI_BLUE    "\x1B" "[1;34m"
 #define ANSI_MAGENTA "\x1B" "[1;35m"
 #define ANSI_CYAN    "\x1B" "[1;36m"
 #define ANSI_GREY    "\x1B" "[1;90m"

 #define COLOR(x)	printk(_CONCAT(ANSI_, x))
 #else
 #define COLOR(x)	do { } while (false)
 #endif

 /* LCOV_EXCL_START */
 static void page_frame_dump(struct z_page_frame *pf)
 {
 	if (z_page_frame_is_reserved(pf)) {
 		COLOR(CYAN);
 		printk("R");
 	} else if (z_page_frame_is_busy(pf)) {
 		COLOR(MAGENTA);
 		printk("B");
 	} else if (z_page_frame_is_pinned(pf)) {
 		COLOR(YELLOW);
 		printk("P");
 	} else if (z_page_frame_is_available(pf)) {
 		COLOR(GREY);
 		printk(".");
 	} else if (z_page_frame_is_mapped(pf)) {
 		COLOR(DEFAULT);
 		printk("M");
 	} else {
 		COLOR(RED);
 		printk("?");
 	}
 }

 void z_page_frames_dump(void)
 {
 	int column = 0;

 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
 	printk("Physical memory from 0x%lx to 0x%lx\n",
 	       Z_PHYS_RAM_START, Z_PHYS_RAM_END);

 	for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
 		struct z_page_frame *pf = &z_page_frames[i];

 		page_frame_dump(pf);

 		column++;
 		if (column == 64) {
 			column = 0;
 			printk("\n");
 		}
 	}

 	COLOR(DEFAULT);
 	if (column != 0) {
 		printk("\n");
 	}
 }
 /* LCOV_EXCL_STOP */

 #define VIRT_FOREACH(_base, _size, _pos) \
 	for (_pos = _base; \
 	     _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)

 #define PHYS_FOREACH(_base, _size, _pos) \
 	for (_pos = _base; \
 	     _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)


 /*
  * Virtual address space management
  *
  * Call all of these functions with z_mm_lock held.
  *
  * Overall virtual memory map: When the kernel starts, it resides in
  * virtual memory in the region Z_KERNEL_VIRT_START to
  * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
  * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
  *
  * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
  * but have a mapping for all RAM in place. This is for special architectural
  * purposes and does not otherwise affect page frame accounting or flags;
  * the only guarantee is that such RAM mapping outside of the Zephyr image
  * won't be disturbed by subsequent memory mapping calls.
  *
  * +--------------+ <- Z_VIRT_RAM_START
  * | Undefined VM | <- May contain ancillary regions like x86_64's locore
  * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
  * | Mapping for  |
  * | main kernel  |
  * | image        |
  * |		  |
  * |		  |
  * +--------------+ <- Z_FREE_VM_START
  * |              |
  * | Unused,      |
  * | Available VM |
  * |              |
  * |..............| <- mapping_pos (grows downward as more mappings are made)
  * | Mapping      |
  * +--------------+
  * | Mapping      |
  * +--------------+
  * | ...          |
  * +--------------+
  * | Mapping      |
  * +--------------+ <- mappings start here
  * | Reserved     | <- special purpose virtual page(s) of size Z_VM_RESERVED
  * +--------------+ <- Z_VIRT_RAM_END
  */

 /* Bitmap of virtual addresses where one bit corresponds to one page.
  * This is being used for virt_region_alloc() to figure out which
  * region of virtual addresses can be used for memory mapping.
  *
  * Note that bit #0 is the highest address so that allocation is
  * done in reverse from highest address.
  */
 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
 			   CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);

 static bool virt_region_inited;

 #define Z_VIRT_REGION_START_ADDR	Z_FREE_VM_START
 #define Z_VIRT_REGION_END_ADDR		(Z_VIRT_RAM_END - Z_VM_RESERVED)

 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
 {
 	return POINTER_TO_UINT(Z_VIRT_RAM_END)
 	       - (offset * CONFIG_MMU_PAGE_SIZE) - size;
 }

 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
 {
 	return (POINTER_TO_UINT(Z_VIRT_RAM_END)
 		- POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
 }

 static void virt_region_init(void)
 {
 	size_t offset, num_bits;

 	/* There are regions where we should never map via
 	 * k_mem_map() and z_phys_map(). Mark them as
 	 * already allocated so they will never be used.
 	 */

 	if (Z_VM_RESERVED > 0) {
 		/* Mark reserved region at end of virtual address space */
 		num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
 		(void)sys_bitarray_set_region(&virt_region_bitmap,
 					      num_bits, 0);
 	}

 	/* Mark all bits up to Z_FREE_VM_START as allocated */
 	num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
 		   - POINTER_TO_UINT(Z_VIRT_RAM_START);
 	offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
 	num_bits /= CONFIG_MMU_PAGE_SIZE;
 	(void)sys_bitarray_set_region(&virt_region_bitmap,
 				      num_bits, offset);

 	virt_region_inited = true;
 }

 static void virt_region_free(void *vaddr, size_t size)
 {
 	size_t offset, num_bits;
 	uint8_t *vaddr_u8 = (uint8_t *)vaddr;

 	if (unlikely(!virt_region_inited)) {
 		virt_region_init();
 	}

 #ifndef CONFIG_KERNEL_DIRECT_MAP
 	/* Without the need to support K_DIRECT_MAP, the region must be
 	 * able to be represented in the bitmap. So this case is
 	 * simple.
 	 */

 	__ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
 		 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
 		 "invalid virtual address region %p (%zu)", vaddr_u8, size);
 	if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
 	      && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
 		return;
 	}

 	offset = virt_to_bitmap_offset(vaddr, size);
 	num_bits = size / CONFIG_MMU_PAGE_SIZE;
 	(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
 #else /* !CONFIG_KERNEL_DIRECT_MAP */
 	/* With K_DIRECT_MAP, the region can be outside of the virtual
 	 * memory space, wholly within it, or overlap partially.
 	 * So additional processing is needed to make sure we only
 	 * mark the pages within the bitmap.
 	 */
 	if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
 	     (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
 	    (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
 	     ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
 		uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
 		uint8_t *adjusted_end = MIN(vaddr_u8 + size,
 					    Z_VIRT_REGION_END_ADDR);
 		size_t adjusted_sz = adjusted_end - adjusted_start;

 		offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
 		num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
 		(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
 	}
 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
 }

 static void *virt_region_alloc(size_t size, size_t align)
 {
 	uintptr_t dest_addr;
 	size_t alloc_size;
 	size_t offset;
 	size_t num_bits;
 	int ret;

 	if (unlikely(!virt_region_inited)) {
 		virt_region_init();
 	}

 	/* Possibly request more pages to ensure we can get an aligned virtual address */
 	num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
 	alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
 	ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
 	if (ret != 0) {
 		LOG_ERR("insufficient virtual address space (requested %zu)",
 			size);
 		return NULL;
 	}

 	/* Remember that bit #0 in bitmap corresponds to the highest
 	 * virtual address. So here we need to go downwards (backwards?)
 	 * to get the starting address of the allocated region.
 	 */
 	dest_addr = virt_from_bitmap_offset(offset, alloc_size);

 	if (alloc_size > size) {
 		uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);

 		/* Here is the memory organization when trying to get an aligned
 		 * virtual address:
 		 *
 		 * +--------------+ <- Z_VIRT_RAM_START
 		 * | Undefined VM |
 		 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
 		 * | Mapping for  |
 		 * | main kernel  |
 		 * | image        |
 		 * |		  |
 		 * |		  |
 		 * +--------------+ <- Z_FREE_VM_START
 		 * | ...          |
 		 * +==============+ <- dest_addr
 		 * | Unused       |
 		 * |..............| <- aligned_dest_addr
 		 * |              |
 		 * | Aligned      |
 		 * | Mapping      |
 		 * |              |
 		 * |..............| <- aligned_dest_addr + size
 		 * | Unused       |
 		 * +==============+ <- offset from Z_VIRT_RAM_END == dest_addr + alloc_size
 		 * | ...          |
 		 * +--------------+
 		 * | Mapping      |
 		 * +--------------+
 		 * | Reserved     |
 		 * +--------------+ <- Z_VIRT_RAM_END
 		 */

 		/* Free the two unused regions */
 		virt_region_free(UINT_TO_POINTER(dest_addr),
 				 aligned_dest_addr - dest_addr);
 		if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
 			virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
 					 (dest_addr + alloc_size) - (aligned_dest_addr + size));
 		}

 		dest_addr = aligned_dest_addr;
 	}

 	/* Need to make sure this does not step into kernel memory */
 	if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
 		(void)sys_bitarray_free(&virt_region_bitmap, size, offset);
 		return NULL;
 	}

 	return UINT_TO_POINTER(dest_addr);
 }

 /*
  * Free page frames management
  *
  * Call all of these functions with z_mm_lock held.
  */

 /* Linked list of unused and available page frames.
  *
  * TODO: This is very simple and treats all free page frames as being equal.
  * However, there are use-cases to consolidate free pages such that entire
  * SRAM banks can be switched off to save power, and so obtaining free pages
  * may require a more complex ontology which prefers page frames in RAM banks
  * which are still active.
  *
  * This implies in the future there may be multiple slists managing physical
  * pages. Each page frame will still just have one snode link.
  */
 static sys_slist_t free_page_frame_list;

 /* Number of unused and available free page frames */
 size_t z_free_page_count;

 #define PF_ASSERT(pf, expr, fmt, ...) \
 	__ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
 		 ##__VA_ARGS__)

 /* Get an unused page frame. don't care which one, or NULL if there are none */
 static struct z_page_frame *free_page_frame_list_get(void)
 {
 	sys_snode_t *node;
 	struct z_page_frame *pf = NULL;

 	node = sys_slist_get(&free_page_frame_list);
 	if (node != NULL) {
 		z_free_page_count--;
 		pf = CONTAINER_OF(node, struct z_page_frame, node);
 		PF_ASSERT(pf, z_page_frame_is_available(pf),
 			 "unavailable but somehow on free list");
 	}

 	return pf;
 }

 /* Release a page frame back into the list of free pages */
 static void free_page_frame_list_put(struct z_page_frame *pf)
 {
 	PF_ASSERT(pf, z_page_frame_is_available(pf),
 		 "unavailable page put on free list");
 	/* The structure is packed, which ensures that this is true */
 	void *node = pf;

 	sys_slist_append(&free_page_frame_list, node);
 	z_free_page_count++;
 }

 static void free_page_frame_list_init(void)
 {
 	sys_slist_init(&free_page_frame_list);
 }

 static void page_frame_free_locked(struct z_page_frame *pf)
 {
 	pf->flags = 0;
 	free_page_frame_list_put(pf);
 }

 /*
  * Memory Mapping
  */

 /* Called after the frame is mapped in the arch layer, to update our
  * local ontology (and do some assertions while we're at it)
  */
 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
 {
 	PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
 		  "attempted to map a reserved page frame");

 	/* We do allow multiple mappings for pinned page frames
 	 * since we will never need to reverse map them.
 	 * This is uncommon, use-cases are for things like the
 	 * Zephyr equivalent of VSDOs
 	 */
 	PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
 		 "non-pinned and already mapped to %p", pf->addr);

 	pf->flags |= Z_PAGE_FRAME_MAPPED;
 	pf->addr = addr;
 }

 /* LCOV_EXCL_START */
 /* Go through page frames to find the physical address mapped
  * by a virtual address.
  *
  * @param[in]  virt Virtual Address
  * @param[out] phys Physical address mapped to the input virtual address
  *                  if such mapping exists.
  *
  * @retval 0 if mapping is found and valid
  * @retval -EFAULT if virtual address is not mapped
  */
 static int virt_to_page_frame(void *virt, uintptr_t *phys)
 {
 	uintptr_t paddr;
 	struct z_page_frame *pf;
 	int ret = -EFAULT;

 	Z_PAGE_FRAME_FOREACH(paddr, pf) {
 		if (z_page_frame_is_mapped(pf)) {
 			if (virt == pf->addr) {
 				ret = 0;
 				if (phys != NULL) {
 					*phys = z_page_frame_to_phys(pf);
 				}
 				break;
 			}
 		}
 	}

 	return ret;
 }
 /* LCOV_EXCL_STOP */

 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);

 #ifdef CONFIG_DEMAND_PAGING
 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
 				     bool page_in, uintptr_t *location_ptr);

 static inline void do_backing_store_page_in(uintptr_t location);
 static inline void do_backing_store_page_out(uintptr_t location);
 #endif /* CONFIG_DEMAND_PAGING */

 /* Allocate a free page frame, and map it to a specified virtual address
  *
  * TODO: Add optional support for copy-on-write mappings to a zero page instead
  * of allocating, in which case page frames will be allocated lazily as
  * the mappings to the zero page get touched. This will avoid expensive
  * page-ins as memory is mapped and physical RAM or backing store storage will
  * not be used if the mapped memory is unused. The cost is an empty physical
  * page of zeroes.
  */
 static int map_anon_page(void *addr, uint32_t flags)
 {
 	struct z_page_frame *pf;
 	uintptr_t phys;
 	bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
 	bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;

 	pf = free_page_frame_list_get();
 	if (pf == NULL) {
 #ifdef CONFIG_DEMAND_PAGING
 		uintptr_t location;
 		bool dirty;
 		int ret;

 		pf = k_mem_paging_eviction_select(&dirty);
 		__ASSERT(pf != NULL, "failed to get a page frame");
 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
 			z_page_frame_to_phys(pf));
 		ret = page_frame_prepare_locked(pf, &dirty, false, &location);
 		if (ret != 0) {
 			return -ENOMEM;
 		}
 		if (dirty) {
 			do_backing_store_page_out(location);
 		}
 		pf->flags = 0;
 #else
 		return -ENOMEM;
 #endif /* CONFIG_DEMAND_PAGING */
 	}

 	phys = z_page_frame_to_phys(pf);
 	arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);

 	if (lock) {
 		pf->flags |= Z_PAGE_FRAME_PINNED;
 	}
 	frame_mapped_set(pf, addr);

 	LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);

 	if (!uninit) {
 		/* If we later implement mappings to a copy-on-write
 		 * zero page, won't need this step
 		 */
 		memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
 	}

 	return 0;
 }

 void *k_mem_map(size_t size, uint32_t flags)
 {
 	uint8_t *dst;
 	size_t total_size;
 	int ret;
 	k_spinlock_key_t key;
 	uint8_t *pos;

 	__ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
 		   ((flags & K_MEM_MAP_UNINIT) != 0U)),
 		 "user access to anonymous uninitialized pages is forbidden");
 	__ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
 		 "unaligned size %zu passed to %s", size, __func__);
 	__ASSERT(size != 0, "zero sized memory mapping");
 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
 	__ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
 		 "%s does not support explicit cache settings", __func__);

 	key = k_spin_lock(&z_mm_lock);

 	/* Need extra for the guard pages (before and after) which we
 	 * won't map.
 	 */
 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;

 	dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
 	if (dst == NULL) {
 		/* Address space has no free region */
 		goto out;
 	}

 	/* Unmap both guard pages to make sure accessing them
 	 * will generate fault.
 	 */
 	arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
 	arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
 		       CONFIG_MMU_PAGE_SIZE);

 	/* Skip over the "before" guard page in returned address. */
 	dst += CONFIG_MMU_PAGE_SIZE;

 	VIRT_FOREACH(dst, size, pos) {
 		ret = map_anon_page(pos, flags);

 		if (ret != 0) {
 			/* TODO: call k_mem_unmap(dst, pos - dst)  when
 			 * implemented in #28990 and release any guard virtual
 			 * page as well.
 			 */
 			dst = NULL;
 			goto out;
 		}
 	}
 out:
 	k_spin_unlock(&z_mm_lock, key);
 	return dst;
 }

 void k_mem_unmap(void *addr, size_t size)
 {
 	uintptr_t phys;
 	uint8_t *pos;
 	struct z_page_frame *pf;
 	k_spinlock_key_t key;
 	size_t total_size;
 	int ret;

 	/* Need space for the "before" guard page */
 	__ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);

 	/* Make sure address range is still valid after accounting
 	 * for two guard pages.
 	 */
 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
 	z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));

 	key = k_spin_lock(&z_mm_lock);

 	/* Check if both guard pages are unmapped.
 	 * Bail if not, as this is probably a region not mapped
 	 * using k_mem_map().
 	 */
 	pos = addr;
 	ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
 	if (ret == 0) {
 		__ASSERT(ret == 0,
 			 "%s: cannot find preceding guard page for (%p, %zu)",
 			 __func__, addr, size);
 		goto out;
 	}

 	ret = arch_page_phys_get(pos + size, NULL);
 	if (ret == 0) {
 		__ASSERT(ret == 0,
 			 "%s: cannot find succeeding guard page for (%p, %zu)",
 			 __func__, addr, size);
 		goto out;
 	}

 	VIRT_FOREACH(addr, size, pos) {
 		ret = arch_page_phys_get(pos, &phys);

 		__ASSERT(ret == 0,
 			 "%s: cannot unmap an unmapped address %p",
 			 __func__, pos);
 		if (ret != 0) {
 			/* Found an address not mapped. Do not continue. */
 			goto out;
 		}

 		__ASSERT(z_is_page_frame(phys),
 			 "%s: 0x%lx is not a page frame", __func__, phys);
 		if (!z_is_page_frame(phys)) {
 			/* Physical address has no corresponding page frame
 			 * description in the page frame array.
 			 * This should not happen. Do not continue.
 			 */
 			goto out;
 		}

 		/* Grab the corresponding page frame from physical address */
 		pf = z_phys_to_page_frame(phys);

 		__ASSERT(z_page_frame_is_mapped(pf),
 			 "%s: 0x%lx is not a mapped page frame", __func__, phys);
 		if (!z_page_frame_is_mapped(pf)) {
 			/* Page frame is not marked mapped.
 			 * This should not happen. Do not continue.
 			 */
 			goto out;
 		}

 		arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);

 		/* Put the page frame back into free list */
 		page_frame_free_locked(pf);
 	}

 	/* There are guard pages just before and after the mapped
 	 * region. So we also need to free them from the bitmap.
 	 */
 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
 	virt_region_free(pos, total_size);

 out:
 	k_spin_unlock(&z_mm_lock, key);
 }

 size_t k_mem_free_get(void)
 {
 	size_t ret;
 	k_spinlock_key_t key;

 	__ASSERT(page_frames_initialized, "%s called too early", __func__);

 	key = k_spin_lock(&z_mm_lock);
 #ifdef CONFIG_DEMAND_PAGING
 	if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
 		ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
 	} else {
 		ret = 0;
 	}
 #else
 	ret = z_free_page_count;
 #endif
 	k_spin_unlock(&z_mm_lock, key);

 	return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
 }

 /* Get the default virtual region alignment, here the default MMU page size
  *
  * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
  * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
  *
  * @retval alignment to apply on the virtual address of this region
  */
 static size_t virt_region_align(uintptr_t phys, size_t size)
 {
 	ARG_UNUSED(phys);
 	ARG_UNUSED(size);

 	return CONFIG_MMU_PAGE_SIZE;
 }

 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);

 /* This may be called from arch early boot code before z_cstart() is invoked.
  * Data will be copied and BSS zeroed, but this must not rely on any
  * initialization functions being called prior to work correctly.
  */
 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
 {
 	uintptr_t aligned_phys, addr_offset;
 	size_t aligned_size, align_boundary;
 	k_spinlock_key_t key;
 	uint8_t *dest_addr;
 	size_t num_bits;
 	size_t offset;

 #ifndef CONFIG_KERNEL_DIRECT_MAP
 	__ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
 #endif
 	addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
 					 phys, size,
 					 CONFIG_MMU_PAGE_SIZE);
 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
 	__ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
 		 "wraparound for physical address 0x%lx (size %zu)",
 		 aligned_phys, aligned_size);

 	align_boundary = arch_virt_region_align(aligned_phys, aligned_size);

 	key = k_spin_lock(&z_mm_lock);

 	if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
 	    (flags & K_MEM_DIRECT_MAP)) {
 		dest_addr = (uint8_t *)aligned_phys;

 		/* Mark the region of virtual memory bitmap as used
 		 * if the region overlaps the virtual memory space.
 		 *
 		 * Basically if either end of region is within
 		 * virtual memory space, we need to mark the bits.
 		 */

 		if (IN_RANGE(aligned_phys,
 			      (uintptr_t)Z_VIRT_RAM_START,
 			      (uintptr_t)(Z_VIRT_RAM_END - 1)) ||
 		    IN_RANGE(aligned_phys + aligned_size - 1,
 			      (uintptr_t)Z_VIRT_RAM_START,
 			      (uintptr_t)(Z_VIRT_RAM_END - 1))) {
 			uint8_t *adjusted_start = MAX(dest_addr, Z_VIRT_RAM_START);
 			uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
 						    Z_VIRT_RAM_END);
 			size_t adjusted_sz = adjusted_end - adjusted_start;

 			num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
 			offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
 			if (sys_bitarray_test_and_set_region(
 			    &virt_region_bitmap, num_bits, offset, true))
 				goto fail;
 		}
 	} else {
 		/* Obtain an appropriately sized chunk of virtual memory */
 		dest_addr = virt_region_alloc(aligned_size, align_boundary);
 		if (!dest_addr) {
 			goto fail;
 		}
 	}

 	/* If this fails there's something amiss with virt_region_get */
 	__ASSERT((uintptr_t)dest_addr <
 		 ((uintptr_t)dest_addr + (size - 1)),
 		 "wraparound for virtual address %p (size %zu)",
 		 dest_addr, size);

 	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
 		aligned_phys, aligned_size, flags, addr_offset);

 	arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
 	k_spin_unlock(&z_mm_lock, key);

 	*virt_ptr = dest_addr + addr_offset;
 	return;
 fail:
 	/* May re-visit this in the future, but for now running out of
 	 * virtual address space or failing the arch_mem_map() call is
 	 * an unrecoverable situation.
 	 *
 	 * Other problems not related to resource exhaustion we leave as
 	 * assertions since they are clearly programming mistakes.
 	 */
 	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
 		phys, size, flags);
 	k_panic();
 }

 void z_phys_unmap(uint8_t *virt, size_t size)
 {
 	uintptr_t aligned_virt, addr_offset;
 	size_t aligned_size;
 	k_spinlock_key_t key;

 	addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
 					 POINTER_TO_UINT(virt), size,
 					 CONFIG_MMU_PAGE_SIZE);
 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
 	__ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
 		 "wraparound for virtual address 0x%lx (size %zu)",
 		 aligned_virt, aligned_size);

 	key = k_spin_lock(&z_mm_lock);

 	LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
 		aligned_virt, aligned_size, addr_offset);

 	arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
 	virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
 	k_spin_unlock(&z_mm_lock, key);
 }

 /*
  * Miscellaneous
  */

 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
 			  uintptr_t addr, size_t size, size_t align)
 {
 	size_t addr_offset;

 	/* The actual mapped region must be page-aligned. Round down the
 	 * physical address and pad the region size appropriately
 	 */
 	*aligned_addr = ROUND_DOWN(addr, align);
 	addr_offset = addr - *aligned_addr;
 	*aligned_size = ROUND_UP(size + addr_offset, align);

 	return addr_offset;
 }

 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
 				       bool pin)
 {
 	struct z_page_frame *pf;
 	uint8_t *addr;

 	uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
 					    CONFIG_MMU_PAGE_SIZE);
 	uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
 					CONFIG_MMU_PAGE_SIZE);
 	size_t pinned_size = pinned_end - pinned_start;

 	VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
 	{
 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
 		frame_mapped_set(pf, addr);

 		if (pin) {
 			pf->flags |= Z_PAGE_FRAME_PINNED;
 		} else {
 			pf->flags &= ~Z_PAGE_FRAME_PINNED;
 		}
 	}
 }
 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */

 void z_mem_manage_init(void)
 {
 	uintptr_t phys;
 	uint8_t *addr;
 	struct z_page_frame *pf;
 	k_spinlock_key_t key = k_spin_lock(&z_mm_lock);

 	free_page_frame_list_init();

 	ARG_UNUSED(addr);

 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
 	/* If some page frames are unavailable for use as memory, arch
 	 * code will mark Z_PAGE_FRAME_RESERVED in their flags
 	 */
 	arch_reserved_pages_update();
 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */

 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
 	/* All pages composing the Zephyr image are mapped at boot in a
 	 * predictable way. This can change at runtime.
 	 */
 	VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
 	{
 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
 		frame_mapped_set(pf, addr);

 		/* TODO: for now we pin the whole Zephyr image. Demand paging
 		 * currently tested with anonymously-mapped pages which are not
 		 * pinned.
 		 *
 		 * We will need to setup linker regions for a subset of kernel
 		 * code/data pages which are pinned in memory and
 		 * may not be evicted. This will contain critical CPU data
 		 * structures, and any code used to perform page fault
 		 * handling, page-ins, etc.
 		 */
 		pf->flags |= Z_PAGE_FRAME_PINNED;
 	}
 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */

 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
 	/* Pin the boot section to prevent it from being swapped out during
 	 * boot process. Will be un-pinned once boot process completes.
 	 */
 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
 #endif

 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
 	/* Pin the page frames correspondng to the pinned symbols */
 	mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
 #endif

 	/* Any remaining pages that aren't mapped, reserved, or pinned get
 	 * added to the free pages list
 	 */
 	Z_PAGE_FRAME_FOREACH(phys, pf) {
 		if (z_page_frame_is_available(pf)) {
 			free_page_frame_list_put(pf);
 		}
 	}
 	LOG_DBG("free page frames: %zu", z_free_page_count);

 #ifdef CONFIG_DEMAND_PAGING
 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 	z_paging_histogram_init();
 #endif
 	k_mem_paging_backing_store_init();
 	k_mem_paging_eviction_init();
 #endif
 #if __ASSERT_ON
 	page_frames_initialized = true;
 #endif
 	k_spin_unlock(&z_mm_lock, key);

 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
 	/* If BSS section is not present in memory at boot,
 	 * it would not have been cleared. This needs to be
 	 * done now since paging mechanism has been initialized
 	 * and the BSS pages can be brought into physical
 	 * memory to be cleared.
 	 */
 	z_bss_zero();
 #endif
 }

 void z_mem_manage_boot_finish(void)
 {
 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
 	/* At the end of boot process, unpin the boot sections
 	 * as they don't need to be in memory all the time anymore.
 	 */
 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
 #endif
 }

 #ifdef CONFIG_DEMAND_PAGING

 #ifdef CONFIG_DEMAND_PAGING_STATS
 struct k_mem_paging_stats_t paging_stats;
 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
 #endif

 static inline void do_backing_store_page_in(uintptr_t location)
 {
 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 	uint32_t time_diff;

 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
 	timing_t time_start, time_end;

 	time_start = timing_counter_get();
 #else
 	uint32_t time_start;

 	time_start = k_cycle_get_32();
 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */

 	k_mem_paging_backing_store_page_in(location);

 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
 	time_end = timing_counter_get();
 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
 #else
 	time_diff = k_cycle_get_32() - time_start;
 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */

 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
 			       time_diff);
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
 }

 static inline void do_backing_store_page_out(uintptr_t location)
 {
 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 	uint32_t time_diff;

 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
 	timing_t time_start, time_end;

 	time_start = timing_counter_get();
 #else
 	uint32_t time_start;

 	time_start = k_cycle_get_32();
 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */

 	k_mem_paging_backing_store_page_out(location);

 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
 	time_end = timing_counter_get();
 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
 #else
 	time_diff = k_cycle_get_32() - time_start;
 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */

 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
 			       time_diff);
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
 }

 /* Current implementation relies on interrupt locking to any prevent page table
  * access, which falls over if other CPUs are active. Addressing this is not
  * as simple as using spinlocks as regular memory reads/writes constitute
  * "access" in this sense.
  *
  * Current needs for demand paging are on uniprocessor systems.
  */
 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));

 static void virt_region_foreach(void *addr, size_t size,
 				void (*func)(void *))
 {
 	z_mem_assert_virtual_region(addr, size);

 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
 		func((uint8_t *)addr + offset);
 	}
 }

 /*
  * Perform some preparatory steps before paging out. The provided page frame
  * must be evicted to the backing store immediately after this is called
  * with a call to k_mem_paging_backing_store_page_out() if it contains
  * a data page.
  *
  * - Map page frame to scratch area if requested. This always is true if we're
  *   doing a page fault, but is only set on manual evictions if the page is
  *   dirty.
  * - If mapped:
  *    - obtain backing store location and populate location parameter
  *    - Update page tables with location
  * - Mark page frame as busy
  *
  * Returns -ENOMEM if the backing store is full
  */
 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
 				     bool page_fault, uintptr_t *location_ptr)
 {
 	uintptr_t phys;
 	int ret;
 	bool dirty = *dirty_ptr;

 	phys = z_page_frame_to_phys(pf);
 	__ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
 		 phys);

 	/* If the backing store doesn't have a copy of the page, even if it
 	 * wasn't modified, treat as dirty. This can happen for a few
 	 * reasons:
 	 * 1) Page has never been swapped out before, and the backing store
 	 *    wasn't pre-populated with this data page.
 	 * 2) Page was swapped out before, but the page contents were not
 	 *    preserved after swapping back in.
 	 * 3) Page contents were preserved when swapped back in, but were later
 	 *    evicted from the backing store to make room for other evicted
 	 *    pages.
 	 */
 	if (z_page_frame_is_mapped(pf)) {
 		dirty = dirty || !z_page_frame_is_backed(pf);
 	}

 	if (dirty || page_fault) {
 		arch_mem_scratch(phys);
 	}

 	if (z_page_frame_is_mapped(pf)) {
 		ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
 							      page_fault);
 		if (ret != 0) {
 			LOG_ERR("out of backing store memory");
 			return -ENOMEM;
 		}
 		arch_mem_page_out(pf->addr, *location_ptr);
 	} else {
 		/* Shouldn't happen unless this function is mis-used */
 		__ASSERT(!dirty, "un-mapped page determined to be dirty");
 	}
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	/* Mark as busy so that z_page_frame_is_evictable() returns false */
 	__ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
 		 phys);
 	pf->flags |= Z_PAGE_FRAME_BUSY;
 #endif
 	/* Update dirty parameter, since we set to true if it wasn't backed
 	 * even if otherwise clean
 	 */
 	*dirty_ptr = dirty;

 	return 0;
 }

 static int do_mem_evict(void *addr)
 {
 	bool dirty;
 	struct z_page_frame *pf;
 	uintptr_t location;
 	int key, ret;
 	uintptr_t flags, phys;

 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	__ASSERT(!k_is_in_isr(),
 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
 		 __func__);
 	k_sched_lock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	key = irq_lock();
 	flags = arch_page_info_get(addr, &phys, false);
 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
 		 "address %p isn't mapped", addr);
 	if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
 		/* Un-mapped or already evicted. Nothing to do */
 		ret = 0;
 		goto out;
 	}

 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
 	pf = z_phys_to_page_frame(phys);
 	__ASSERT(pf->addr == addr, "page frame address mismatch");
 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
 	if (ret != 0) {
 		goto out;
 	}

 	__ASSERT(ret == 0, "failed to prepare page frame");
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	irq_unlock(key);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	if (dirty) {
 		do_backing_store_page_out(location);
 	}
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	key = irq_lock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	page_frame_free_locked(pf);
 out:
 	irq_unlock(key);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	k_sched_unlock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	return ret;
 }

 int k_mem_page_out(void *addr, size_t size)
 {
 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
 		 addr);
 	z_mem_assert_virtual_region(addr, size);

 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
 		void *pos = (uint8_t *)addr + offset;
 		int ret;

 		ret = do_mem_evict(pos);
 		if (ret != 0) {
 			return ret;
 		}
 	}

 	return 0;
 }

 int z_page_frame_evict(uintptr_t phys)
 {
 	int key, ret;
 	struct z_page_frame *pf;
 	bool dirty;
 	uintptr_t flags;
 	uintptr_t location;

 	__ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
 		 __func__, phys);

 	/* Implementation is similar to do_page_fault() except there is no
 	 * data page to page-in, see comments in that function.
 	 */

 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	__ASSERT(!k_is_in_isr(),
 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
 		 __func__);
 	k_sched_lock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	key = irq_lock();
 	pf = z_phys_to_page_frame(phys);
 	if (!z_page_frame_is_mapped(pf)) {
 		/* Nothing to do, free page */
 		ret = 0;
 		goto out;
 	}
 	flags = arch_page_info_get(pf->addr, NULL, false);
 	/* Shouldn't ever happen */
 	__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
 	if (ret != 0) {
 		goto out;
 	}

 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	irq_unlock(key);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	if (dirty) {
 		do_backing_store_page_out(location);
 	}
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	key = irq_lock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	page_frame_free_locked(pf);
 out:
 	irq_unlock(key);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	k_sched_unlock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	return ret;
 }

 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
 					   int key)
 {
 #ifdef CONFIG_DEMAND_PAGING_STATS
 	bool is_irq_unlocked = arch_irq_unlocked(key);

 	paging_stats.pagefaults.cnt++;

 	if (is_irq_unlocked) {
 		paging_stats.pagefaults.irq_unlocked++;
 	} else {
 		paging_stats.pagefaults.irq_locked++;
 	}

 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
 	faulting_thread->paging_stats.pagefaults.cnt++;

 	if (is_irq_unlocked) {
 		faulting_thread->paging_stats.pagefaults.irq_unlocked++;
 	} else {
 		faulting_thread->paging_stats.pagefaults.irq_locked++;
 	}
 #else
 	ARG_UNUSED(faulting_thread);
 #endif

 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	if (k_is_in_isr()) {
 		paging_stats.pagefaults.in_isr++;

 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
 		faulting_thread->paging_stats.pagefaults.in_isr++;
 #endif
 	}
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 #endif /* CONFIG_DEMAND_PAGING_STATS */
 }

 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
 					     bool dirty)
 {
 #ifdef CONFIG_DEMAND_PAGING_STATS
 	if (dirty) {
 		paging_stats.eviction.dirty++;
 	} else {
 		paging_stats.eviction.clean++;
 	}
 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
 	if (dirty) {
 		faulting_thread->paging_stats.eviction.dirty++;
 	} else {
 		faulting_thread->paging_stats.eviction.clean++;
 	}
 #else
 	ARG_UNUSED(faulting_thread);
 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
 #endif /* CONFIG_DEMAND_PAGING_STATS */
 }

 static inline struct z_page_frame *do_eviction_select(bool *dirty)
 {
 	struct z_page_frame *pf;

 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 	uint32_t time_diff;

 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
 	timing_t time_start, time_end;

 	time_start = timing_counter_get();
 #else
 	uint32_t time_start;

 	time_start = k_cycle_get_32();
 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */

 	pf = k_mem_paging_eviction_select(dirty);

 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
 	time_end = timing_counter_get();
 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
 #else
 	time_diff = k_cycle_get_32() - time_start;
 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */

 	z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */

 	return pf;
 }

 static bool do_page_fault(void *addr, bool pin)
 {
 	struct z_page_frame *pf;
 	int key, ret;
 	uintptr_t page_in_location, page_out_location;
 	enum arch_page_location status;
 	bool result;
 	bool dirty = false;
 	struct k_thread *faulting_thread = _current_cpu->current;

 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
 		 addr);

 	LOG_DBG("page fault at %p", addr);

 	/*
 	 * TODO: Add performance accounting:
 	 * - k_mem_paging_eviction_select() metrics
 	 *   * periodic timer execution time histogram (if implemented)
 	 */

 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	/* We lock the scheduler so that other threads are never scheduled
 	 * during the page-in/out operation.
 	 *
 	 * We do however re-enable interrupts during the page-in/page-out
 	 * operation iff interrupts were enabled when the exception was taken;
 	 * in this configuration page faults in an ISR are a bug; all their
 	 * code/data must be pinned.
 	 *
 	 * If interrupts were disabled when the exception was taken, the
 	 * arch code is responsible for keeping them that way when entering
 	 * this function.
 	 *
 	 * If this is not enabled, then interrupts are always locked for the
 	 * entire operation. This is far worse for system interrupt latency
 	 * but requires less pinned pages and ISRs may also take page faults.
 	 *
 	 * Support for allowing k_mem_paging_backing_store_page_out() and
 	 * k_mem_paging_backing_store_page_in() to also sleep and allow
 	 * other threads to run (such as in the case where the transfer is
 	 * async DMA) is not implemented. Even if limited to thread context,
 	 * arbitrary memory access triggering exceptions that put a thread to
 	 * sleep on a contended page fault operation will break scheduling
 	 * assumptions of cooperative threads or threads that implement
 	 * crticial sections with spinlocks or disabling IRQs.
 	 */
 	k_sched_lock();
 	__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */

 	key = irq_lock();
 	status = arch_page_location_get(addr, &page_in_location);
 	if (status == ARCH_PAGE_LOCATION_BAD) {
 		/* Return false to treat as a fatal error */
 		result = false;
 		goto out;
 	}
 	result = true;

 	if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
 		if (pin) {
 			/* It's a physical memory address */
 			uintptr_t phys = page_in_location;

 			pf = z_phys_to_page_frame(phys);
 			pf->flags |= Z_PAGE_FRAME_PINNED;
 		}

 		/* This if-block is to pin the page if it is
 		 * already present in physical memory. There is
 		 * no need to go through the following code to
 		 * pull in the data pages. So skip to the end.
 		 */
 		goto out;
 	}
 	__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
 		 "unexpected status value %d", status);

 	paging_stats_faults_inc(faulting_thread, key);

 	pf = free_page_frame_list_get();
 	if (pf == NULL) {
 		/* Need to evict a page frame */
 		pf = do_eviction_select(&dirty);
 		__ASSERT(pf != NULL, "failed to get a page frame");
 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
 			z_page_frame_to_phys(pf));

 		paging_stats_eviction_inc(faulting_thread, dirty);
 	}
 	ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
 	__ASSERT(ret == 0, "failed to prepare page frame");

 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	irq_unlock(key);
 	/* Interrupts are now unlocked if they were not locked when we entered
 	 * this function, and we may service ISRs. The scheduler is still
 	 * locked.
 	 */
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	if (dirty) {
 		do_backing_store_page_out(page_out_location);
 	}
 	do_backing_store_page_in(page_in_location);

 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	key = irq_lock();
 	pf->flags &= ~Z_PAGE_FRAME_BUSY;
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	if (pin) {
 		pf->flags |= Z_PAGE_FRAME_PINNED;
 	}
 	pf->flags |= Z_PAGE_FRAME_MAPPED;
 	pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
 				   & ~(CONFIG_MMU_PAGE_SIZE - 1));

 	arch_mem_page_in(addr, z_page_frame_to_phys(pf));
 	k_mem_paging_backing_store_page_finalize(pf, page_in_location);
 out:
 	irq_unlock(key);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	k_sched_unlock();
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */

 	return result;
 }

 static void do_page_in(void *addr)
 {
 	bool ret;

 	ret = do_page_fault(addr, false);
 	__ASSERT(ret, "unmapped memory address %p", addr);
 	(void)ret;
 }

 void k_mem_page_in(void *addr, size_t size)
 {
 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
 		 __func__);
 	virt_region_foreach(addr, size, do_page_in);
 }

 static void do_mem_pin(void *addr)
 {
 	bool ret;

 	ret = do_page_fault(addr, true);
 	__ASSERT(ret, "unmapped memory address %p", addr);
 	(void)ret;
 }

 void k_mem_pin(void *addr, size_t size)
 {
 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
 		 __func__);
 	virt_region_foreach(addr, size, do_mem_pin);
 }

 bool z_page_fault(void *addr)
 {
 	return do_page_fault(addr, false);
 }

 static void do_mem_unpin(void *addr)
 {
 	struct z_page_frame *pf;
 	unsigned int key;
 	uintptr_t flags, phys;

 	key = irq_lock();
 	flags = arch_page_info_get(addr, &phys, false);
 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
 		 "invalid data page at %p", addr);
 	if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
 		pf = z_phys_to_page_frame(phys);
 		pf->flags &= ~Z_PAGE_FRAME_PINNED;
 	}
 	irq_unlock(key);
 }

 void k_mem_unpin(void *addr, size_t size)
 {
 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
 		 addr);
 	virt_region_foreach(addr, size, do_mem_unpin);
 }

 #endif /* CONFIG_DEMAND_PAGING */