x86: 32-bit: restore virtual linking capability

This reverts commit 7d32e9f9a588fd3ed17277fb88210d453f2c9864.

We now allow the kernel to be linked virtually. This patch:

- Properly converts between virtual/physical addresses
- Handles early boot instruction pointer transition
- Double-maps SRAM to both virtual and physical locations
  in boot page tables to facilitate instruction pointer
  transition, with logic to clean this up after completed.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
Signed-off-by: Daniel Leung <daniel.leung@intel.com>
diff --git a/arch/x86/core/ia32/crt0.S b/arch/x86/core/ia32/crt0.S
index 0b7717f..976821c 100644
--- a/arch/x86/core/ia32/crt0.S
+++ b/arch/x86/core/ia32/crt0.S
@@ -21,6 +21,7 @@
 #include <arch/cpu.h>
 #include <arch/x86/multiboot.h>
 #include <x86_mmu.h>
+#include <sys/mem_manage.h>
 
 	/* exports (private APIs) */
 
@@ -43,6 +44,51 @@
 	GDATA(_sse_mxcsr_default_value)
 #endif
 
+.macro install_page_tables
+#ifdef CONFIG_X86_MMU
+	/* Enable paging. If virtual memory is enabled, the instruction pointer
+	 * is currently at a physical address. There is an identity mapping
+	 * for all RAM, plus a virtual mapping of RAM starting at
+	 * CONFIG_KERNEL_VM_BASE using the same paging structures.
+	 *
+	 * Until we enable these page tables, only physical memory addresses
+	 * work.
+	 */
+	movl	$Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
+	movl	%eax, %cr3
+
+#ifdef CONFIG_X86_PAE
+	/* Enable PAE */
+	movl	%cr4, %eax
+	orl	$CR4_PAE, %eax
+	movl	%eax, %cr4
+
+	/* IA32_EFER NXE bit set */
+	movl	$0xC0000080, %ecx
+	rdmsr
+	orl	$0x800, %eax
+	wrmsr
+#endif /* CONFIG_X86_PAE */
+
+	/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
+	movl	%cr0, %eax
+	orl	$(CR0_PG | CR0_WP), %eax
+	movl	%eax, %cr0
+
+#ifdef Z_VM_KERNEL
+	/* Jump to a virtual address, which works because the identity and
+	 * virtual mappings both are to the same physical address.
+	 */
+	ljmp    $CODE_SEG, $vm_enter
+vm_enter:
+	/* We are now executing in virtual memory. We'll un-map the identity
+	 * mappings later once we are in the C domain
+	 */
+#endif /* Z_VM_KERNEL */
+
+#endif /* CONFIG_X86_MMU */
+.endm
+
 SECTION_FUNC(TEXT_START, __start)
 
 #include "../common.S"
@@ -158,9 +204,6 @@
 	addl	$CONFIG_ISR_STACK_SIZE, %esp
 #endif
 
-	/* Clear BSS */
-	call	z_bss_zero
-
 #ifdef CONFIG_XIP
 	/* Copy data from flash to RAM.
 	 *
@@ -179,53 +222,18 @@
 	mov $MAIN_TSS, %ax
 	ltr %ax
 #endif
-	/* load 32-bit operand size IDT */
-	lidt	Z_MEM_PHYS_ADDR(z_x86_idt)
 
-#ifdef CONFIG_X86_MMU
-	/* Install page tables */
-	movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
-	movl %eax, %cr3
-
-#ifdef CONFIG_X86_PAE
-	/* Enable PAE */
-	movl %cr4, %eax
-	orl $CR4_PAE, %eax
-	movl %eax, %cr4
-
-	/* IA32_EFER NXE bit set */
-	movl $0xC0000080, %ecx
-	rdmsr
-	orl $0x800, %eax
-	wrmsr
-#endif /* CONFIG_X86_PAE */
-
-	/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
-	movl %cr0, %eax
-	orl $(CR0_PG | CR0_WP), %eax
-	movl %eax, %cr0
-#endif /* CONFIG_X86_MMU */
-
-#if (Z_MEM_PHYS_ADDR(0x1000) != 0x1000)
-	/*
-	 * Page table loaded so we can start executing in
-	 * virtual address space.
-	 *
-	 * Note that __prep_c and z_x86_prep_c() must be
-	 * mapped in virtual address space in gen_mmu.py.
-	 *
-	 * This jump must be done due to relative
-	 * addressing code emitted by the toolchain
-	 * (think EIP + offset). If we are not already in
-	 * virtual address space, everything afterwards
-	 * would still be referenced via physical addresses
-	 * and will crash if we have a kernel bigger than
-	 * physical memory with demand paging (for example).
+	/* Note that installing page tables must be done after
+	 * z_data_copy() as the page tables are being copied into
+	 * RAM there.
 	 */
-	ljmp	$CODE_SEG, $__prep_c
+	install_page_tables
 
-__prep_c:
-#endif
+	/* Clear BSS */
+	call	z_bss_zero
+
+	/* load 32-bit operand size IDT */
+	lidt	z_x86_idt
 
 	pushl	%ebx		/* pointer to multiboot info, or NULL */
 	call	z_x86_prep_c	/* enter kernel; never returns */
@@ -250,8 +258,8 @@
 	 * setup by the BIOS (or GRUB?).
 	 */
 
-	/* physical start address */
-	.long	Z_MEM_PHYS_ADDR(_idt_base_address)
+	/* IDT table start address */
+	.long	_idt_base_address
 
 
 #ifdef CONFIG_SET_GDT
diff --git a/arch/x86/core/intel64/locore.S b/arch/x86/core/intel64/locore.S
index 0088622..5a56205 100644
--- a/arch/x86/core/intel64/locore.S
+++ b/arch/x86/core/intel64/locore.S
@@ -42,7 +42,7 @@
 	/* Page tables created at build time by gen_mmu.py
 	 * NOTE: Presumes phys=virt
 	 */
-	movl $z_x86_kernel_ptables, %eax
+	movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
 	movl %eax, %cr3
 
 	set_efer
@@ -64,7 +64,7 @@
 	clts
 
 	/* NOTE: Presumes phys=virt */
-	movq $z_x86_kernel_ptables, %rax
+	movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
 	movq %rax, %cr3
 
 	set_efer
diff --git a/arch/x86/core/intel64/userspace.S b/arch/x86/core/intel64/userspace.S
index 06d1469..b6353ef 100644
--- a/arch/x86/core/intel64/userspace.S
+++ b/arch/x86/core/intel64/userspace.S
@@ -87,7 +87,7 @@
 	pushq	%rax
 
 	/* NOTE: Presumes phys=virt */
-	movq	$z_x86_kernel_ptables, %rax
+	movq	$Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
 	movq	%rax, %cr3
 	popq	%rax
 	movq	$0, -8(%rsp)	/* Delete stashed RAX data */
diff --git a/arch/x86/core/prep_c.c b/arch/x86/core/prep_c.c
index 62b1420..cdecdec 100644
--- a/arch/x86/core/prep_c.c
+++ b/arch/x86/core/prep_c.c
@@ -40,6 +40,10 @@
 	ARG_UNUSED(info);
 #endif
 
+#ifdef CONFIG_MMU
+	z_x86_mmu_init();
+#endif
+
 #if CONFIG_X86_STACK_PROTECTION
 	for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
 		z_x86_set_stack_guard(z_interrupt_stacks[i]);
diff --git a/arch/x86/core/x86_mmu.c b/arch/x86/core/x86_mmu.c
index 17461043..8d85ee0 100644
--- a/arch/x86/core/x86_mmu.c
+++ b/arch/x86/core/x86_mmu.c
@@ -517,9 +517,12 @@
 				if (phys == virt) {
 					/* Identity mappings */
 					COLOR(YELLOW);
-				} else {
-					/* Other mappings */
+				} else if (phys + Z_MEM_VM_OFFSET == virt) {
+					/* Permanent RAM mappings */
 					COLOR(GREEN);
+				} else {
+					/* General mapped pages */
+					COLOR(CYAN);
 				}
 			} else {
 				/* Intermediate entry */
@@ -580,7 +583,8 @@
 	}
 #endif
 
-	printk("%s at %p: ", info->name, table);
+	printk("%s at %p (0x%" PRIxPTR "): ", info->name, table,
+	       z_mem_phys_addr(table));
 	if (level == 0) {
 		printk("entire address space\n");
 	} else {
@@ -1111,6 +1115,42 @@
 			   MASK_ALL, 0);
 }
 
+static void identity_map_remove(void)
+{
+#ifdef Z_VM_KERNEL
+	size_t size, scope = get_entry_scope(0);
+	uint8_t *pos;
+
+	k_mem_region_align((uintptr_t *)&pos, &size,
+			   (uintptr_t)CONFIG_SRAM_BASE_ADDRESS,
+			   (size_t)CONFIG_SRAM_SIZE * 1024U, scope);
+
+	/* We booted with RAM mapped both to its identity and virtual
+	 * mapping starting at CONFIG_KERNEL_VM_BASE. This was done by
+	 * double-linking the relevant tables in the top-level table.
+	 * At this point we don't need the identity mapping(s) any more,
+	 * zero the top-level table entries corresponding to the
+	 * physical mapping.
+	 */
+	while (size) {
+		pentry_t *entry = get_entry_ptr(z_x86_kernel_ptables, pos, 0);
+
+		/* set_pte */
+		*entry = 0;
+		pos += scope;
+		size -= scope;
+	}
+#endif
+}
+
+/* Invoked to remove the identity mappings in the page tables,
+ * they were only needed to tranisition the instruction pointer at early boot
+ */
+void z_x86_mmu_init(void)
+{
+	identity_map_remove();
+}
+
 #if CONFIG_X86_STACK_PROTECTION
 void z_x86_set_stack_guard(k_thread_stack_t *stack)
 {
diff --git a/arch/x86/include/x86_mmu.h b/arch/x86/include/x86_mmu.h
index 06d8ba4..baa109c 100644
--- a/arch/x86/include/x86_mmu.h
+++ b/arch/x86/include/x86_mmu.h
@@ -229,5 +229,8 @@
 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
 void z_x86_swap_update_common_page_table(struct k_thread *incoming);
 #endif
+
+/* Early-boot paging setup tasks, called from prep_c */
+void z_x86_mmu_init(void);
 #endif /* _ASMLANGUAGE */
 #endif /* ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H */