x86: 32-bit: restore virtual linking capability
This reverts commit 7d32e9f9a588fd3ed17277fb88210d453f2c9864.
We now allow the kernel to be linked virtually. This patch:
- Properly converts between virtual/physical addresses
- Handles early boot instruction pointer transition
- Double-maps SRAM to both virtual and physical locations
in boot page tables to facilitate instruction pointer
transition, with logic to clean this up after completed.
Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
Signed-off-by: Daniel Leung <daniel.leung@intel.com>
diff --git a/arch/x86/core/ia32/crt0.S b/arch/x86/core/ia32/crt0.S
index 0b7717f..976821c 100644
--- a/arch/x86/core/ia32/crt0.S
+++ b/arch/x86/core/ia32/crt0.S
@@ -21,6 +21,7 @@
#include <arch/cpu.h>
#include <arch/x86/multiboot.h>
#include <x86_mmu.h>
+#include <sys/mem_manage.h>
/* exports (private APIs) */
@@ -43,6 +44,51 @@
GDATA(_sse_mxcsr_default_value)
#endif
+.macro install_page_tables
+#ifdef CONFIG_X86_MMU
+ /* Enable paging. If virtual memory is enabled, the instruction pointer
+ * is currently at a physical address. There is an identity mapping
+ * for all RAM, plus a virtual mapping of RAM starting at
+ * CONFIG_KERNEL_VM_BASE using the same paging structures.
+ *
+ * Until we enable these page tables, only physical memory addresses
+ * work.
+ */
+ movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
+ movl %eax, %cr3
+
+#ifdef CONFIG_X86_PAE
+ /* Enable PAE */
+ movl %cr4, %eax
+ orl $CR4_PAE, %eax
+ movl %eax, %cr4
+
+ /* IA32_EFER NXE bit set */
+ movl $0xC0000080, %ecx
+ rdmsr
+ orl $0x800, %eax
+ wrmsr
+#endif /* CONFIG_X86_PAE */
+
+ /* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
+ movl %cr0, %eax
+ orl $(CR0_PG | CR0_WP), %eax
+ movl %eax, %cr0
+
+#ifdef Z_VM_KERNEL
+ /* Jump to a virtual address, which works because the identity and
+ * virtual mappings both are to the same physical address.
+ */
+ ljmp $CODE_SEG, $vm_enter
+vm_enter:
+ /* We are now executing in virtual memory. We'll un-map the identity
+ * mappings later once we are in the C domain
+ */
+#endif /* Z_VM_KERNEL */
+
+#endif /* CONFIG_X86_MMU */
+.endm
+
SECTION_FUNC(TEXT_START, __start)
#include "../common.S"
@@ -158,9 +204,6 @@
addl $CONFIG_ISR_STACK_SIZE, %esp
#endif
- /* Clear BSS */
- call z_bss_zero
-
#ifdef CONFIG_XIP
/* Copy data from flash to RAM.
*
@@ -179,53 +222,18 @@
mov $MAIN_TSS, %ax
ltr %ax
#endif
- /* load 32-bit operand size IDT */
- lidt Z_MEM_PHYS_ADDR(z_x86_idt)
-#ifdef CONFIG_X86_MMU
- /* Install page tables */
- movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
- movl %eax, %cr3
-
-#ifdef CONFIG_X86_PAE
- /* Enable PAE */
- movl %cr4, %eax
- orl $CR4_PAE, %eax
- movl %eax, %cr4
-
- /* IA32_EFER NXE bit set */
- movl $0xC0000080, %ecx
- rdmsr
- orl $0x800, %eax
- wrmsr
-#endif /* CONFIG_X86_PAE */
-
- /* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
- movl %cr0, %eax
- orl $(CR0_PG | CR0_WP), %eax
- movl %eax, %cr0
-#endif /* CONFIG_X86_MMU */
-
-#if (Z_MEM_PHYS_ADDR(0x1000) != 0x1000)
- /*
- * Page table loaded so we can start executing in
- * virtual address space.
- *
- * Note that __prep_c and z_x86_prep_c() must be
- * mapped in virtual address space in gen_mmu.py.
- *
- * This jump must be done due to relative
- * addressing code emitted by the toolchain
- * (think EIP + offset). If we are not already in
- * virtual address space, everything afterwards
- * would still be referenced via physical addresses
- * and will crash if we have a kernel bigger than
- * physical memory with demand paging (for example).
+ /* Note that installing page tables must be done after
+ * z_data_copy() as the page tables are being copied into
+ * RAM there.
*/
- ljmp $CODE_SEG, $__prep_c
+ install_page_tables
-__prep_c:
-#endif
+ /* Clear BSS */
+ call z_bss_zero
+
+ /* load 32-bit operand size IDT */
+ lidt z_x86_idt
pushl %ebx /* pointer to multiboot info, or NULL */
call z_x86_prep_c /* enter kernel; never returns */
@@ -250,8 +258,8 @@
* setup by the BIOS (or GRUB?).
*/
- /* physical start address */
- .long Z_MEM_PHYS_ADDR(_idt_base_address)
+ /* IDT table start address */
+ .long _idt_base_address
#ifdef CONFIG_SET_GDT
diff --git a/arch/x86/core/intel64/locore.S b/arch/x86/core/intel64/locore.S
index 0088622..5a56205 100644
--- a/arch/x86/core/intel64/locore.S
+++ b/arch/x86/core/intel64/locore.S
@@ -42,7 +42,7 @@
/* Page tables created at build time by gen_mmu.py
* NOTE: Presumes phys=virt
*/
- movl $z_x86_kernel_ptables, %eax
+ movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
movl %eax, %cr3
set_efer
@@ -64,7 +64,7 @@
clts
/* NOTE: Presumes phys=virt */
- movq $z_x86_kernel_ptables, %rax
+ movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
movq %rax, %cr3
set_efer
diff --git a/arch/x86/core/intel64/userspace.S b/arch/x86/core/intel64/userspace.S
index 06d1469..b6353ef 100644
--- a/arch/x86/core/intel64/userspace.S
+++ b/arch/x86/core/intel64/userspace.S
@@ -87,7 +87,7 @@
pushq %rax
/* NOTE: Presumes phys=virt */
- movq $z_x86_kernel_ptables, %rax
+ movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
movq %rax, %cr3
popq %rax
movq $0, -8(%rsp) /* Delete stashed RAX data */
diff --git a/arch/x86/core/prep_c.c b/arch/x86/core/prep_c.c
index 62b1420..cdecdec 100644
--- a/arch/x86/core/prep_c.c
+++ b/arch/x86/core/prep_c.c
@@ -40,6 +40,10 @@
ARG_UNUSED(info);
#endif
+#ifdef CONFIG_MMU
+ z_x86_mmu_init();
+#endif
+
#if CONFIG_X86_STACK_PROTECTION
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
z_x86_set_stack_guard(z_interrupt_stacks[i]);
diff --git a/arch/x86/core/x86_mmu.c b/arch/x86/core/x86_mmu.c
index 17461043..8d85ee0 100644
--- a/arch/x86/core/x86_mmu.c
+++ b/arch/x86/core/x86_mmu.c
@@ -517,9 +517,12 @@
if (phys == virt) {
/* Identity mappings */
COLOR(YELLOW);
- } else {
- /* Other mappings */
+ } else if (phys + Z_MEM_VM_OFFSET == virt) {
+ /* Permanent RAM mappings */
COLOR(GREEN);
+ } else {
+ /* General mapped pages */
+ COLOR(CYAN);
}
} else {
/* Intermediate entry */
@@ -580,7 +583,8 @@
}
#endif
- printk("%s at %p: ", info->name, table);
+ printk("%s at %p (0x%" PRIxPTR "): ", info->name, table,
+ z_mem_phys_addr(table));
if (level == 0) {
printk("entire address space\n");
} else {
@@ -1111,6 +1115,42 @@
MASK_ALL, 0);
}
+static void identity_map_remove(void)
+{
+#ifdef Z_VM_KERNEL
+ size_t size, scope = get_entry_scope(0);
+ uint8_t *pos;
+
+ k_mem_region_align((uintptr_t *)&pos, &size,
+ (uintptr_t)CONFIG_SRAM_BASE_ADDRESS,
+ (size_t)CONFIG_SRAM_SIZE * 1024U, scope);
+
+ /* We booted with RAM mapped both to its identity and virtual
+ * mapping starting at CONFIG_KERNEL_VM_BASE. This was done by
+ * double-linking the relevant tables in the top-level table.
+ * At this point we don't need the identity mapping(s) any more,
+ * zero the top-level table entries corresponding to the
+ * physical mapping.
+ */
+ while (size) {
+ pentry_t *entry = get_entry_ptr(z_x86_kernel_ptables, pos, 0);
+
+ /* set_pte */
+ *entry = 0;
+ pos += scope;
+ size -= scope;
+ }
+#endif
+}
+
+/* Invoked to remove the identity mappings in the page tables,
+ * they were only needed to tranisition the instruction pointer at early boot
+ */
+void z_x86_mmu_init(void)
+{
+ identity_map_remove();
+}
+
#if CONFIG_X86_STACK_PROTECTION
void z_x86_set_stack_guard(k_thread_stack_t *stack)
{
diff --git a/arch/x86/include/x86_mmu.h b/arch/x86/include/x86_mmu.h
index 06d8ba4..baa109c 100644
--- a/arch/x86/include/x86_mmu.h
+++ b/arch/x86/include/x86_mmu.h
@@ -229,5 +229,8 @@
#ifdef CONFIG_X86_COMMON_PAGE_TABLE
void z_x86_swap_update_common_page_table(struct k_thread *incoming);
#endif
+
+/* Early-boot paging setup tasks, called from prep_c */
+void z_x86_mmu_init(void);
#endif /* _ASMLANGUAGE */
#endif /* ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H */