kernel: add page frame management

Initialize the page frame ontology at boot and update it
when we do memory mappings.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
diff --git a/include/arch/x86/intel64/linker.ld b/include/arch/x86/intel64/linker.ld
index ffa7949..ef091a4 100644
--- a/include/arch/x86/intel64/linker.ld
+++ b/include/arch/x86/intel64/linker.ld
@@ -9,14 +9,16 @@
 #define ROMABLE_REGION RAM
 #define RAMABLE_REGION RAM
 
+#define MMU_PAGE_ALIGN		. = ALIGN(CONFIG_MMU_PAGE_SIZE);
+
 /* Used to align areas with separate memory permission characteristics
  * so that the page permissions can be set in the MMU. Without this,
  * the kernel is just one blob with the same RWX permissions on all RAM
  */
 #ifdef CONFIG_SRAM_REGION_PERMISSIONS
-	#define MMU_PAGE_ALIGN	. = ALIGN(CONFIG_MMU_PAGE_SIZE);
+	#define MMU_PAGE_ALIGN_PERM	MMU_PAGE_ALIGN
 #else
-	#define MMU_PAGE_ALIGN
+	#define MMU_PAGE_ALIGN_PERM
 #endif
 
 ENTRY(CONFIG_KERNEL_ENTRY)
@@ -34,12 +36,12 @@
 	_locore_start = .;
 	*(.locore)
 	*(.locore.*)
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 	_locore_end = .;
 
 	_lorodata_start = .;
 	*(.lorodata)
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 	_lodata_start = .;
 
 	*(.lodata)
@@ -54,7 +56,7 @@
 	 * On x86-64 the IDT is in rodata and doesn't need to be in the
 	 * trampoline page.
 	 */
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 	z_shared_kernel_page_start = .;
 #endif /* CONFIG_X86_KPTI */
 
@@ -63,7 +65,7 @@
 
 #ifdef CONFIG_X86_KPTI
 	*(.trampolines)
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 	z_shared_kernel_page_end = .;
 
 	ASSERT(z_shared_kernel_page_end - z_shared_kernel_page_start == 4096,
@@ -93,7 +95,7 @@
 
 	#include <linker/kobject-text.ld>
 
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 	} GROUP_LINK_IN(ROMABLE_REGION)
 
 	_image_text_end = .;
@@ -123,15 +125,15 @@
 
 #include <linker/cplusplus-rom.ld>
 
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 	_image_rodata_end = .;
 	_image_rodata_size = _image_rodata_end - _image_rodata_start;
 	_image_rom_end = .;
 
 #ifdef CONFIG_USERSPACE
 	/* APP SHARED MEMORY REGION */
-#define SMEM_PARTITION_ALIGN(size) MMU_PAGE_ALIGN
-#define APP_SHARED_ALIGN  MMU_PAGE_ALIGN
+#define SMEM_PARTITION_ALIGN(size) MMU_PAGE_ALIGN_PERM
+#define APP_SHARED_ALIGN  MMU_PAGE_ALIGN_PERM
 
 #include <app_smem.ld>
 
@@ -148,7 +150,7 @@
 
 	SECTION_PROLOGUE(_BSS_SECTION_NAME, (NOLOAD), ALIGN(16))
 	{
-	MMU_PAGE_ALIGN
+	MMU_PAGE_ALIGN_PERM
 #ifndef CONFIG_USERSPACE
 	_image_ram_start = .;
 #endif
@@ -180,7 +182,7 @@
 
 /* Must be last in RAM */
 #include <linker/kobject.ld>
-	. = ALIGN(CONFIG_MMU_PAGE_SIZE);
+	MMU_PAGE_ALIGN
 	_image_ram_end = .;
 	z_mapped_end = .;
 	_end = .;
diff --git a/include/sys/mem_manage.h b/include/sys/mem_manage.h
index faa903b..fdc2793 100644
--- a/include/sys/mem_manage.h
+++ b/include/sys/mem_manage.h
@@ -51,9 +51,10 @@
 /**
  * Map a physical memory region into the kernel's virtual address space
  *
- * Given a physical address and a size, return a linear address
- * representing the base of where the physical region is mapped in
- * the virtual address space for the Zephyr kernel.
+ * This function is intended for mapping memory-mapped I/O regions into
+ * the virtual address space. Given a physical address and a size, return a
+ * linear address representing the base of where the physical region is mapped
+ * in the virtual address space for the Zephyr kernel.
  *
  * This function alters the active page tables in the area reserved
  * for the kernel. This function will choose the virtual address
@@ -70,12 +71,18 @@
  * with user access and code execution forbidden. This policy is changed
  * by passing K_MEM_CACHE_* and K_MEM_PERM_* macros into the 'flags' parameter.
  *
- * If there is insufficient virtual address space for the mapping, or
- * bad flags are passed in, or if additional memory is needed to update
- * page tables that is not available, this will generate a kernel panic.
+ * If there is insufficient virtual address space for the mapping this will
+ * generate a kernel panic.
  *
  * This API is only available if CONFIG_MMU is enabled.
  *
+ * It is highly discouraged to use this function to map system RAM page
+ * frames. It may conflict with anonymous memory mappings and demand paging
+ * and produce undefined behavior.  Do not use this for RAM unless you know
+ * exactly what you are doing. If you need a chunk of memory, use k_mem_map().
+ * If you need a contiguous buffer of physical memory, statically declare it
+ * and pin it at build time, it will be mapped when the system boots.
+ *
  * This API is part of infrastructure still under development and may
  * change.
  *
diff --git a/kernel/include/kernel_internal.h b/kernel/include/kernel_internal.h
index a05925d..3e0c4f2 100644
--- a/kernel/include/kernel_internal.h
+++ b/kernel/include/kernel_internal.h
@@ -192,6 +192,11 @@
 
 #endif /* CONFIG_INSTRUMENT_THREAD_SWITCHING */
 
+/* Init hook for page frame management, invoked immediately upon entry of
+ * main thread, before POST_KERNEL tasks
+ */
+void z_mem_manage_init(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/kernel/include/mmu.h b/kernel/include/mmu.h
new file mode 100644
index 0000000..712e719
--- /dev/null
+++ b/kernel/include/mmu.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef KERNEL_INCLUDE_MMU_H
+#define KERNEL_INCLUDE_MMU_H
+
+#ifdef CONFIG_MMU
+
+#include <stdint.h>
+#include <sys/slist.h>
+#include <sys/__assert.h>
+#include <sys/util.h>
+#include <sys/mem_manage.h>
+#include <linker/linker-defs.h>
+
+/*
+ * At present, page frame management is only done for main system RAM,
+ * and we generate paging structures based on CONFIG_SRAM_BASE_ADDRESS
+ * and CONFIG_SRAM_SIZE.
+ *
+ * If we have other RAM regions (DCCM, etc) these typically have special
+ * properties and shouldn't be used generically for demand paging or
+ * anonymous mappings. We don't currently maintain an ontology of these in the
+ * core kernel.
+ */
+#define Z_PHYS_RAM_START	((uintptr_t)CONFIG_SRAM_BASE_ADDRESS)
+#define Z_PHYS_RAM_SIZE		((size_t)KB(CONFIG_SRAM_SIZE))
+#define Z_PHYS_RAM_END		(Z_PHYS_RAM_START + Z_PHYS_RAM_SIZE)
+#define Z_NUM_PAGE_FRAMES	(Z_PHYS_RAM_SIZE / CONFIG_MMU_PAGE_SIZE)
+
+/** End virtual address of virtual address space */
+#define Z_VIRT_RAM_START	((uint8_t *)CONFIG_KERNEL_VM_BASE)
+#define Z_VIRT_RAM_SIZE		((size_t)CONFIG_KERNEL_VM_SIZE)
+#define Z_VIRT_RAM_END		(Z_VIRT_RAM_START + Z_VIRT_RAM_SIZE)
+
+/* Boot-time virtual location of the kernel image. */
+#define Z_KERNEL_VIRT_START	((uint8_t *)(&z_mapped_start))
+#define Z_KERNEL_VIRT_END	((uint8_t *)(&z_mapped_end))
+#define Z_KERNEL_VIRT_SIZE	((size_t)(&z_mapped_size))
+
+/*
+ * Macros and data structures for physical page frame accounting,
+ * APIs for use by eviction and backing store algorithms. This code
+ * is otherwise not application-facing.
+ */
+
+/*
+ * z_page_frame flags bits
+ */
+
+/** This page contains critical kernel data and will never be swapped */
+#define Z_PAGE_FRAME_PINNED		BIT(0)
+
+/** This physical page is reserved by hardware; we will never use it */
+#define Z_PAGE_FRAME_RESERVED		BIT(1)
+
+/**
+ * This physical page is mapped to some virtual memory address
+ *
+ * Currently, we just support one mapping per page frame. If a page frame
+ * is mapped to multiple virtual pages then it must be pinned.
+ */
+#define Z_PAGE_FRAME_MAPPED		BIT(2)
+
+/**
+ * This page frame is currently involved in a page-in/out operation
+ */
+#define Z_PAGE_FRAME_BUSY		BIT(3)
+
+/**
+ * Data structure for physical page frames
+ *
+ * An array of these is instantiated, one element per physical RAM page.
+ * Hence it's necessary to constrain its size as much as possible.
+ */
+struct z_page_frame {
+	union {
+		/* If mapped, virtual address this page is mapped to */
+		void *addr;
+
+		/* If unmapped and available, free pages list membership. */
+		sys_snode_t node;
+	};
+
+	/* Z_PAGE_FRAME_* flags */
+	uint8_t flags;
+
+	/* TODO: Backing store and eviction algorithms may both need to
+	 * introduce custom members for accounting purposes. Come up with
+	 * a layer of abstraction for this. They may also want additional
+	 * flags bits which shouldn't clobber each other. At all costs
+	 * the total size of struct z_page_frame must be minimized.
+	 */
+} __packed;
+
+static inline bool z_page_frame_is_pinned(struct z_page_frame *pf)
+{
+	return (pf->flags & Z_PAGE_FRAME_PINNED) != 0;
+}
+
+static inline bool z_page_frame_is_reserved(struct z_page_frame *pf)
+{
+	return (pf->flags & Z_PAGE_FRAME_RESERVED) != 0;
+}
+
+static inline bool z_page_frame_is_mapped(struct z_page_frame *pf)
+{
+	return (pf->flags & Z_PAGE_FRAME_MAPPED) != 0;
+}
+
+static inline bool z_page_frame_is_busy(struct z_page_frame *pf)
+{
+	return (pf->flags & Z_PAGE_FRAME_BUSY) != 0;
+}
+
+static inline bool z_page_frame_is_evictable(struct z_page_frame *pf)
+{
+	return (!z_page_frame_is_reserved(pf) && z_page_frame_is_mapped(pf) &&
+		!z_page_frame_is_pinned(pf) && !z_page_frame_is_busy(pf));
+}
+
+/* If true, page is not being used for anything, is not reserved, is a member
+ * of some free pages list, isn't busy, and may be mapped in memory
+ */
+static inline bool z_page_frame_is_available(struct z_page_frame *page)
+{
+	return page->flags == 0;
+}
+
+static inline void z_assert_phys_aligned(uintptr_t phys)
+{
+	__ASSERT(phys % CONFIG_MMU_PAGE_SIZE == 0,
+		 "physical address 0x%lx is not page-aligned", phys);
+	(void)phys;
+}
+
+/* Reserved pages */
+#define Z_VM_RESERVED	0
+
+extern struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
+
+static inline uintptr_t z_page_frame_to_phys(struct z_page_frame *pf)
+{
+	return (uintptr_t)((pf - z_page_frames) * CONFIG_MMU_PAGE_SIZE) +
+			Z_PHYS_RAM_START;
+}
+
+/* Presumes there is but one mapping in the virtual address space */
+static inline void *z_page_frame_to_virt(struct z_page_frame *pf)
+{
+	return pf->addr;
+}
+
+static inline bool z_is_page_frame(uintptr_t phys)
+{
+	z_assert_phys_aligned(phys);
+	return (phys >= Z_PHYS_RAM_START) && (phys < Z_PHYS_RAM_END);
+}
+
+static inline struct z_page_frame *z_phys_to_page_frame(uintptr_t phys)
+{
+	__ASSERT(z_is_page_frame(phys),
+		 "0x%lx not an SRAM physical address", phys);
+
+	return &z_page_frames[(phys - Z_PHYS_RAM_START) /
+			      CONFIG_MMU_PAGE_SIZE];
+}
+
+static inline void z_mem_assert_virtual_region(uint8_t *addr, size_t size)
+{
+	__ASSERT((uintptr_t)addr % CONFIG_MMU_PAGE_SIZE == 0,
+		 "unaligned addr %p", addr);
+	__ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0,
+		 "unaligned size %zu", size);
+	__ASSERT(addr + size > addr,
+		 "region %p size %zu zero or wraps around", addr, size);
+	__ASSERT(addr >= Z_VIRT_RAM_START && addr + size < Z_VIRT_RAM_END,
+		 "invalid virtual address region %p (%zu)", addr, size);
+}
+
+/* Debug function, pretty-print page frame information for all frames
+ * concisely to printk.
+ */
+void z_page_frames_dump(void);
+
+/* Number of free page frames. This information may go stale immediately */
+extern size_t z_free_page_count;
+
+/* Convenience macro for iterating over all page frames */
+#define Z_PAGE_FRAME_FOREACH(_phys, _pageframe) \
+	for (_phys = Z_PHYS_RAM_START, _pageframe = z_page_frames; \
+	     _phys < Z_PHYS_RAM_END; \
+	     _phys += CONFIG_MMU_PAGE_SIZE, _pageframe++)
+
+#endif /* CONFIG_MMU */
+#endif /* KERNEL_INCLUDE_MMU_H */
diff --git a/kernel/init.c b/kernel/init.c
index 9e2956a..7d1ce4a 100644
--- a/kernel/init.c
+++ b/kernel/init.c
@@ -136,6 +136,14 @@
 	ARG_UNUSED(unused2);
 	ARG_UNUSED(unused3);
 
+#ifdef CONFIG_MMU
+	/* Invoked here such that backing store or eviction algorithms may
+	 * initialize kernel objects, and that all POST_KERNEL and later tasks
+	 * may perform memory management tasks (except for z_phys_map() which
+	 * is allowed at any time)
+	 */
+	z_mem_manage_init();
+#endif /* CONFIG_MMU */
 	z_sys_post_kernel = true;
 
 	z_sys_init_run_level(_SYS_INIT_LEVEL_POST_KERNEL);
diff --git a/kernel/mmu.c b/kernel/mmu.c
index b618061..200ae42 100644
--- a/kernel/mmu.c
+++ b/kernel/mmu.c
@@ -9,28 +9,144 @@
 #include <stdint.h>
 #include <kernel_arch_interface.h>
 #include <spinlock.h>
+#include <mmu.h>
+#include <init.h>
+#include <kernel_internal.h>
+#include <linker/linker-defs.h>
 #include <logging/log.h>
 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
 
+/*
+ * General terminology:
+ * - A page frame is a page-sized physical memory region in RAM. It is a
+ *   container where a data page may be placed. It is always referred to by
+ *   physical address. We have a convention of using uintptr_t for physical
+ *   addresses. We instantiate a struct z_page_frame to store metadata for
+ *   every page frame.
+ *
+ * - A data page is a page-sized region of data. It may exist in a page frame,
+ *   or be paged out to some backing store. Its location can always be looked
+ *   up in the CPU's page tables (or equivalent) by virtual address.
+ *   The data type will always be void * or in some cases uint8_t * when we
+ *   want to do pointer arithmetic.
+ */
+
 /* Spinlock to protect any globals in this file and serialize page table
  * updates in arch code
  */
-static struct k_spinlock mm_lock;
+struct k_spinlock z_mm_lock;
 
 /*
- * Overall virtual memory map. When the kernel starts, it is expected that all
- * memory regions are mapped into one large virtual region at the beginning of
- * CONFIG_KERNEL_VM_BASE. Unused virtual memory up to the limit noted by
- * CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
+ * General page frame management
+ */
+
+/* Database of all RAM page frames */
+struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
+
+#if __ASSERT_ON
+/* Indicator that z_page_frames has been initialized, many of these APIs do
+ * not work before POST_KERNEL
+ */
+static bool page_frames_initialized;
+#endif
+
+/* Add colors to page table dumps to indicate mapping type */
+#define COLOR_PAGE_FRAMES	1
+
+#if COLOR_PAGE_FRAMES
+#define ANSI_DEFAULT "\x1B[0m"
+#define ANSI_RED     "\x1B[1;31m"
+#define ANSI_GREEN   "\x1B[1;32m"
+#define ANSI_YELLOW  "\x1B[1;33m"
+#define ANSI_BLUE    "\x1B[1;34m"
+#define ANSI_MAGENTA "\x1B[1;35m"
+#define ANSI_CYAN    "\x1B[1;36m"
+#define ANSI_GREY    "\x1B[1;90m"
+
+#define COLOR(x)	printk(_CONCAT(ANSI_, x))
+#else
+#define COLOR(x)	do { } while (0)
+#endif
+
+static void page_frame_dump(struct z_page_frame *pf)
+{
+	if (z_page_frame_is_reserved(pf)) {
+		COLOR(CYAN);
+		printk("R");
+	} else if (z_page_frame_is_busy(pf)) {
+		COLOR(MAGENTA);
+		printk("B");
+	} else if (z_page_frame_is_pinned(pf)) {
+		COLOR(YELLOW);
+		printk("P");
+	} else if (z_page_frame_is_available(pf)) {
+		COLOR(GREY);
+		printk(".");
+	} else if (z_page_frame_is_mapped(pf)) {
+		COLOR(DEFAULT);
+		printk("M");
+	} else {
+		COLOR(RED);
+		printk("?");
+	}
+}
+
+void z_page_frames_dump(void)
+{
+	int column = 0;
+
+	__ASSERT(page_frames_initialized, "%s called too early", __func__);
+	printk("Physical memory from 0x%lx to 0x%lx\n",
+	       Z_PHYS_RAM_START, Z_PHYS_RAM_END);
+
+	for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
+		struct z_page_frame *pf = &z_page_frames[i];
+
+		page_frame_dump(pf);
+
+		column++;
+		if (column == 64) {
+			column = 0;
+			printk("\n");
+		}
+	}
+
+	COLOR(DEFAULT);
+	if (column != 0) {
+		printk("\n");
+	}
+}
+
+#define VIRT_FOREACH(_base, _size, _pos) \
+	for (_pos = _base; \
+	     _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
+
+#define PHYS_FOREACH(_base, _size, _pos) \
+	for (_pos = _base; \
+	     _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
+
+/*
+ * Virtual address space management
  *
- * +--------------+ <- CONFIG_KERNEL_VM_BASE
+ * Call all of these functions with z_mm_lock held.
+ *
+ * Overall virtual memory map: When the kernel starts, it resides in
+ * virtual memory in the region Z_BOOT_KERNEL_VIRT_START to
+ * Z_BOOT_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
+ * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
+ *
+ * +--------------+ <- Z_VIRT_ADDR_START
+ * | Undefined VM | <- May contain ancillary regions like x86_64's locore
+ * +--------------+ <- Z_BOOT_KERNEL_VIRT_START (often == Z_VIRT_ADDR_START)
  * | Mapping for  |
- * | all RAM      |
+ * | main kernel  |
+ * | image        |
+ * |		  |
+ * |		  |
+ * +--------------+ <- Z_BOOT_KERNEL_VIRT_END
  * |              |
- * |              |
- * +--------------+ <- CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_RAM_SIZE
- * | Available    |    also the mapping limit as mappings grown downward
- * | virtual mem  |
+ * | Unused,      |
+ * | Available VM |
  * |              |
  * |..............| <- mapping_pos (grows downward as more mappings are made)
  * | Mapping      |
@@ -40,31 +156,183 @@
  * | ...          |
  * +--------------+
  * | Mapping      |
- * +--------------+ <- CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_VM_SIZE
+ * +--------------+ <- mappings start here
+ * | Reserved     | <- special purpose virtual page(s) of size Z_VM_RESERVED
+ * +--------------+ <- Z_VIRT_RAM_END
  *
- * At the moment we just have one area for mappings and they are permanent.
- * This is under heavy development and may change.
+ * At the moment we just have one downward-growing area for mappings.
+ * There is currently no support for un-mapping memory, see #28900.
+ */
+static uint8_t *mapping_pos = Z_VIRT_RAM_END - Z_VM_RESERVED;
+
+/* Get a chunk of virtual memory and mark it as being in-use.
+ *
+ * This may be called from arch early boot code before z_cstart() is invoked.
+ * Data will be copied and BSS zeroed, but this must not rely on any
+ * initialization functions being called prior to work correctly.
+ */
+static void *virt_region_get(size_t size)
+{
+	uint8_t *dest_addr;
+
+	if ((mapping_pos - size) < Z_KERNEL_VIRT_END) {
+		LOG_ERR("insufficient virtual address space (requested %zu)",
+			size);
+		return NULL;
+	}
+
+	mapping_pos -= size;
+	dest_addr = mapping_pos;
+
+	return dest_addr;
+}
+
+/*
+ * Free page frames management
+ *
+ * Call all of these functions with z_mm_lock held.
  */
 
- /* Current position for memory mappings in kernel memory.
-  * At the moment, all kernel memory mappings are permanent.
-  * Memory mappings start at the end of the address space, and grow
-  * downward.
-  *
-  * All of this is under heavy development and is subject to change.
-  */
-static uint8_t *mapping_pos =
-		(uint8_t *)((uintptr_t)CONFIG_KERNEL_VM_BASE +
-			    (uintptr_t)CONFIG_KERNEL_VM_SIZE);
-
-/* Lower-limit of virtual address mapping. Immediately below this is the
- * permanent identity mapping for all SRAM.
+/* Linked list of unused and available page frames.
+ *
+ * TODO: This is very simple and treats all free page frames as being equal.
+ * However, there are use-cases to consolidate free pages such that entire
+ * SRAM banks can be switched off to save power, and so obtaining free pages
+ * may require a more complex ontology which prefers page frames in RAM banks
+ * which are still active.
+ *
+ * This implies in the future there may be multiple slists managing physical
+ * pages. Each page frame will still just have one snode link.
  */
-static uint8_t *mapping_limit =
-	(uint8_t *)((uintptr_t)CONFIG_KERNEL_VM_BASE +
-		    (size_t)CONFIG_KERNEL_RAM_SIZE);
+static sys_slist_t free_page_frame_list;
 
-size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
+/* Number of unused and available free page frames */
+size_t z_free_page_count;
+
+#define PF_ASSERT(pf, expr, fmt, ...) \
+	__ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
+		 ##__VA_ARGS__)
+
+/* Get an unused page frame. don't care which one, or NULL if there are none */
+static struct z_page_frame *free_page_frame_list_get(void)
+{
+	sys_snode_t *node;
+	struct z_page_frame *pf = NULL;
+
+	node = sys_slist_get(&free_page_frame_list);
+	if (node != NULL) {
+		z_free_page_count--;
+		pf = CONTAINER_OF(node, struct z_page_frame, node);
+		PF_ASSERT(pf, z_page_frame_is_available(pf),
+			 "unavailable but somehow on free list");
+	}
+
+	return pf;
+}
+
+/* Release a page frame back into the list of free pages */
+static void free_page_frame_list_put(struct z_page_frame *pf)
+{
+	PF_ASSERT(pf, z_page_frame_is_available(pf),
+		 "unavailable page put on free list");
+	sys_slist_append(&free_page_frame_list, &pf->node);
+	z_free_page_count++;
+}
+
+static void free_page_frame_list_init(void)
+{
+	sys_slist_init(&free_page_frame_list);
+}
+
+/*
+ * Memory Mapping
+ */
+
+/* Called after the frame is mapped in the arch layer, to update our
+ * local ontology (and do some assertions while we're at it)
+ */
+static void frame_mapped_set(struct z_page_frame *pf, void *addr)
+{	
+	PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
+		  "attempted to map a reserved page frame");
+
+	/* We do allow multiple mappings for pinned page frames
+	 * since we will never need to reverse map them.
+	 * This is uncommon, use-cases are for things like the
+	 * Zephyr equivalent of VSDOs
+	 */
+	PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
+		 "non-pinned and already mapped to %p", pf->addr);
+
+	pf->flags |= Z_PAGE_FRAME_MAPPED;
+	pf->addr = addr;
+	pf->refcount++;
+}
+
+
+/* This may be called from arch early boot code before z_cstart() is invoked.
+ * Data will be copied and BSS zeroed, but this must not rely on any
+ * initialization functions being called prior to work correctly.
+ */
+void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
+{
+	uintptr_t aligned_phys, addr_offset;
+	size_t aligned_size;
+	int ret;
+	k_spinlock_key_t key;
+	uint8_t *dest_addr;
+
+	addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
+					 phys, size,
+					 CONFIG_MMU_PAGE_SIZE);
+	__ASSERT(aligned_size != 0, "0-length mapping at 0x%lx", aligned_phys);
+	__ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
+		 "wraparound for physical address 0x%lx (size %zu)",
+		 aligned_phys, aligned_size);
+
+	key = k_spin_lock(&z_mm_lock);
+	/* Obtain an appropriately sized chunk of virtual memory */
+	dest_addr = virt_region_get(aligned_size);
+	if (!dest_addr) {
+		goto fail;
+	}
+
+	/* If this fails there's something amiss with virt_region_get */
+	__ASSERT((uintptr_t)dest_addr <
+		 ((uintptr_t)dest_addr + (size - 1)),
+		 "wraparound for virtual address %p (size %zu)",
+		 dest_addr, size);
+
+	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
+		aligned_phys, aligned_size, flags, addr_offset);
+
+	ret = arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
+	if (ret != 0) {
+		LOG_ERR("arch_mem_map() failed with %d", ret);
+		goto fail;
+	}
+	k_spin_unlock(&z_mm_lock, key);
+
+	*virt_ptr = dest_addr + addr_offset;
+	return;
+fail:
+	/* May re-visit this in the future, but for now running out of
+	 * virtual address space or failing the arch_mem_map() call is
+	 * an unrecoverable situation.
+	 *
+	 * Other problems not related to resource exhaustion we leave as
+	 * assertions since they are clearly programming mistakes.
+	 */
+	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
+		phys, size, flags);
+	k_panic();
+}
+
+/*
+ * Miscellaneous
+ */
+
+size_t k_mem_region_align(uintptr_t *aligned_phys, size_t *aligned_size,
 			  uintptr_t phys_addr, size_t size, size_t align)
 {
 	size_t addr_offset;
@@ -72,66 +340,58 @@
 	/* The actual mapped region must be page-aligned. Round down the
 	 * physical address and pad the region size appropriately
 	 */
-	*aligned_addr = ROUND_DOWN(phys_addr, align);
-	addr_offset = phys_addr - *aligned_addr;
+	*aligned_phys = ROUND_DOWN(phys_addr, align);
+	addr_offset = phys_addr - *aligned_phys;
 	*aligned_size = ROUND_UP(size + addr_offset, align);
 
 	return addr_offset;
 }
 
-void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
+#define VM_OFFSET	 ((CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_VM_OFFSET) - \
+			  CONFIG_SRAM_BASE_ADDRESS)
+
+/* Only applies to boot RAM mappings within the Zephyr image that have never
+ * been remapped or paged out. Never use this unless you know exactly what you
+ * are doing.
+ */
+#define BOOT_VIRT_TO_PHYS(virt) ((uintptr_t)(((uint8_t *)virt) + VM_OFFSET))
+
+void z_mem_manage_init(void)
 {
-	uintptr_t aligned_addr, addr_offset;
-	size_t aligned_size;
-	int ret;
-	k_spinlock_key_t key;
-	uint8_t *dest_virt;
+	uintptr_t phys;
+	uint8_t *addr;
+	struct z_page_frame *pf;
+	k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
 
-	addr_offset = k_mem_region_align(&aligned_addr, &aligned_size,
-					 phys, size,
-					 CONFIG_MMU_PAGE_SIZE);
+	free_page_frame_list_init();
 
-	key = k_spin_lock(&mm_lock);
-
-	/* Carve out some unused virtual memory from the top of the
-	 * address space
+#ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
+	/* If some page frames are unavailable for use as memory, arch
+	 * code will mark Z_PAGE_FRAME_RESERVED in their flags
 	 */
-	if ((mapping_pos - aligned_size) < mapping_limit) {
-		LOG_ERR("insufficient kernel virtual address space");
-		goto fail;
+	arch_reserved_pages_update();
+#endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
+
+	/* All pages composing the Zephyr image are mapped at boot in a
+	 * predictable way. This can change at runtime.
+	 */
+	VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
+	{
+		frame_mapped_set(z_phys_to_page_frame(BOOT_VIRT_TO_PHYS(addr)),
+				 addr);
 	}
-	mapping_pos -= aligned_size;
-	dest_virt = mapping_pos;
 
-	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu\n", dest_virt,
-		aligned_addr, aligned_size, flags, addr_offset);
-	__ASSERT(dest_virt != NULL, "NULL page memory mapping");
-	__ASSERT(aligned_size != 0, "0-length mapping at 0x%lx", aligned_addr);
-	__ASSERT((uintptr_t)dest_virt <
-		 ((uintptr_t)dest_virt + (aligned_size - 1)),
-		 "wraparound for virtual address %p (size %zu)",
-		 dest_virt, size);
-	__ASSERT(aligned_addr < (aligned_addr + (size - 1)),
-		 "wraparound for physical address 0x%lx (size %zu)",
-		 aligned_addr, size);
-
-	ret = arch_mem_map(dest_virt, aligned_addr, aligned_size, flags);
-	k_spin_unlock(&mm_lock, key);
-
-	if (ret == 0) {
-		*virt_ptr = dest_virt + addr_offset;
-	} else {
-		/* This happens if there is an insurmountable problem
-		 * with the selected cache modes or access flags
-		 * with no safe fallback
-		 */
-
-		LOG_ERR("arch_mem_map() to %p returned %d", dest_virt, ret);
-		goto fail;
+	/* Any remaining pages that aren't mapped, reserved, or pinned get
+	 * added to the free pages list
+	 */
+	Z_PAGE_FRAME_FOREACH(phys, pf) {
+		if (z_page_frame_is_available(pf)) {
+			free_page_frame_list_put(pf);
+		}
 	}
-	return;
-fail:
-	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
-		phys, size, flags);
-	k_panic();
+	LOG_DBG("free page frames: %zu", z_free_page_count);
+#if __ASSERT_ON
+	page_frames_initialized = true;
+#endif
+	k_spin_unlock(&z_mm_lock, key);
 }