blob: d60b8ef42c53e12de6e688848060ca4fd0ea1b53 [file] [log] [blame]
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "xuk-config.h"
#include "shared-page.h"
#include "x86_64-hw.h"
#ifdef CONFIG_XUK_DEBUG
#include "printf.h"
#include "vgacon.h"
#include "serial.h"
#else
int printf(const char *fmt, ...)
{
return 0;
}
#endif
/* This i386 code stub is designed to link internally (i.e. it shares
* nothing with the 64 bit world) and be loaded into RAM in high
* memory (generally at 0x100000) in a single (R/W/X) block with its
* .text, .rodata, .data and .bss included. Its stack lives in the
* fifth page of memory at 0x04000-0x4fff. After finishing 64 bit
* initialization, it will JMP to the 16-byte-aligned address that
* immediately follows this block in memory (exposed by the linker as
* _start64), which should then be able to run in an environment where
* all of physical RAM is mapped, except for the bottom 16kb.
*
* Memory layout on exit:
*
* + Pages 0-3 are an unmapped NULL guard
* + Page 4: contains stack and bss for the setup code, and a GDT.
* After 64 bit setup, it's likely this will be reused .
* + Pages 5-11: are the bootstrap page table
*
* Note that the initial page table makes no attempt to identify
* memory regions. Everything in the first 4G is mapped as cachable
* RAM. MMIO drivers will need to remap their memory based on PCI BAR
* regions or whatever.
*/
/* Cute trick to turn a preprocessor macro containing a number literal
* into a string immediate in gcc basic asm context
*/
#define _ASM_IMM(s) #s
#define ASM_IMM(s) "$" _ASM_IMM(s)
/* Entry point, to be linked at the very start of the image. Set a
* known-good stack (either the top of the shared page for the boot
* CPU, or one provided by stub16 on others), push the multiboot
* arguments in EAX, EBX and call into C code.
*/
__asm__(".pushsection .start32\n"
" mov $0x5000, %esp\n"
" xor %edx, %edx\n"
" cmp " ASM_IMM(BOOT_MAGIC_STUB16) ", %eax\n"
" cmove 0x4000(%edx), %esp\n"
" pushl %ebx\n"
" pushl %eax\n"
" call cstart\n"
".popsection\n");
/* The multiboot header can be anywhere in the first 4k of the file.
* This stub doesn't get that big, so we don't bother with special
* linkage.
*/
#define MULTIBOOT_MAGIC 0x1badb002
#define MULTIBOOT_FLAGS (1<<1) /* 2nd bit is "want memory map" */
const int multiboot_header[] = {
MULTIBOOT_MAGIC,
MULTIBOOT_FLAGS,
-(MULTIBOOT_MAGIC + MULTIBOOT_FLAGS), /* csum: -(magic+flags) */
};
/* Creates and returns a generic/sane page table for 64 bit startup
* (64 bit mode requires paging enabled). All of the bottom 4G
* (whether backing memory is present or not) gets a mapping with 2M
* pages, except that the bottom 2M are mapped with 4k pages and leave
* the first four pages unmapped as a NULL guard.
*
* Makes no attempt to identify non-RAM/MMIO regions, it just maps
* everything. We rely on the firmware to have set up MTRRs for us
* where needed, otherwise that will all be cacheable memory.
*/
void *init_page_tables(void)
{
/* Top level PML4E points to a single PDPTE in its first entry */
struct pte64 *pml4e = alloc_page(1);
struct pte64 *pdpte = alloc_page(1);
pml4e[0].addr = (unsigned long)pdpte;
pml4e[0].present = 1;
pml4e[0].writable = 1;
/* The PDPTE has four entries covering the first 4G of memory,
* each pointing to a PDE
*/
for (unsigned int gb = 0; gb < 4; gb++) {
struct pte64 *pde = alloc_page(0);
pdpte[gb].addr = (unsigned long)pde;
pdpte[gb].present = 1;
pdpte[gb].writable = 1;
/* Each PDE filled with 2M supervisor pages */
for (int i = 0; i < 512; i++) {
if (!(gb == 0U && i == 0)) {
pde[i].addr = (gb << 30) | (i << 21);
pde[i].present = 1;
pde[i].writable = 1;
pde[i].pagesize_pat = 1;
} else {
/* EXCEPT the very first entry of the
* first GB, which is a pointer to a
* PTE of 4k pages so that we can have
* a 16k (4-page) NULL guard unmapped.
*/
struct pte64 *pte = alloc_page(0);
pde[0].addr = (unsigned long)pte;
pde[0].present = 1;
pde[0].writable = 1;
for (int j = 0; j < 512; j++) {
if (j < 4) {
pte[j].addr = 0;
} else {
pte[j].addr = j << 12;
pte[j].present = 1;
pte[j].writable = 1;
}
}
}
}
}
/* Flush caches out of paranoia. In theory, x86 page walking
* happens downstream of the system-coherent dcache and this
* isn't needed.
*/
__asm__ volatile("wbinvd");
return pml4e;
}
#ifdef CONFIG_XUK_DEBUG
void putchar(int c)
{
serial_putc(c);
vgacon_putc(c);
}
#endif
void cstart(unsigned int magic, unsigned int arg)
{
if (magic == BOOT_MAGIC_STUB16) {
printf("SMP CPU up in 32 bit protected mode. Stack ~%xh\n",
&magic);
}
if (magic != BOOT_MAGIC_STUB16) {
shared_init();
#ifdef CONFIG_XUK_DEBUG
serial_init();
z_putchar = putchar;
#endif
printf("Entering stub32 on boot cpu, magic %xh stack ~%xh\n",
magic, (int)&magic);
}
/* The multiboot memory map turns out not to be very useful.
* The basic numbers logged here are only a subset of the true
* memory map if it has holes or >4G memory, and the full map
* passed in the second argument tends to live in low memory
* and get easily clobbered by our own muckery. If we care
* about reading memory maps at runtime we probably want to be
* using BIOS e820 like Linux does.
*/
if (magic == BOOT_MAGIC_MULTIBOOT) {
printf("Magic: %p MBI Addr: %p\n", (void *)magic, (void *)arg);
int mem_lower = *(int *)(arg + 4);
int mem_upper = *(int *)(arg + 8);
int mmap_length = *(int *)(arg + 44);
int *mmap_addr = *(void **)(arg + 48);
printf("mem lower %d upper %d mmap_len %d mmap_addr %p\n",
mem_lower, mem_upper, mmap_length, mmap_addr);
}
/* Choose a stack pointer and CPU ID for the 64 bit code to
* use. Then if we're not the boot CPU, release the spinlock
* (taken in stub16) so the other CPUs can continue.
*/
int cpu_id = 0;
unsigned int init_stack = 0x5000;
if (magic == BOOT_MAGIC_STUB16) {
cpu_id = _shared.num_active_cpus++;
init_stack = _shared.smpinit_stack;
_shared.smpinit_stack = 0U;
__asm__ volatile("movl $0, (%0)" : : "m"(_shared.smpinit_lock));
}
/* Page table goes in CR3. This is a noop until paging is
* enabled later
*/
if (magic != BOOT_MAGIC_STUB16) {
_shared.base_cr3 = (unsigned int)init_page_tables();
}
SET_CR("cr3", _shared.base_cr3);
/* Enable PAE bit (5) in CR4, required because in long mode
* we'll be using the 64 bit page entry format. Likewise a
* noop until the CPU starts loading pages.
*/
SET_CR_BIT("cr4", 5);
/* Set LME (long mode enable) in IA32_EFER. Still not a mode
* transition, simply tells the CPU that, once paging is
* enabled, we should enter long mode. At that point the LMA
* bit (10) will be set to indicate that it's active.
*/
const int MSR_IA32_EFER = 0xc0000080;
set_msr_bit(MSR_IA32_EFER, 8);
/* NOW we transition by turning paging on. The CPU will start
* page translation (which has been carefully
* identity-mapped!) and enter the 32 bit compatibility
* submode of long mode. So we're reading 64 bit page tables
* but still executing 32 bit instructions.
*/
SET_CR_BIT("cr0", 31);
printf("Hello memory mapped world!\n");
/* Now we can enter true 64 bit long mode via a far call to a
* code segment with the 64 bit flag set. Allocate a 2-entry
* GDT (entry 0 is always a "null segment" architecturally and
* can't be used) here on the stack and throw it away after
* the jump. The 64 bit OS code will need to set the
* descriptors up for itself anyway
*/
struct gdt64 cs[] = {
{ },
{
.readable = 1,
.codeseg = 1,
.notsystem = 1,
.present = 1,
.long64 = 1,
},
};
/* The limit comes first, but is 16 bits. The dummy is there
* for alignment, though docs aren't clear on whether it's
* required or not
*/
struct {
unsigned short dummy;
unsigned short limit;
unsigned int addr;
} gdtp = { .limit = sizeof(cs), .addr = (int)&cs[0], };
printf("CS descriptor 0x%x 0x%x\n", cs[1].dwords[1], cs[1].dwords[0]);
__asm__ volatile("lgdt %0" : : "m"(gdtp.limit) : "memory");
/* Finally, make a far jump into the 64 bit world. The entry
* point is a 16-byte-aligned address that immediately follows
* our stub, and is exposed by our linkage as "_start64".
*
* Indirect far jumps have a similar crazy setup to descriptor
* tables, but here the segment selector comes last so no
* alignment worries.
*
* The 64 bit entry reuses the same stack we're on, and takes
* the cpu_id in its first argument.
*/
extern int _start64;
unsigned int jmpaddr = (unsigned int) &_start64;
struct {
unsigned int addr;
unsigned short segment;
} farjmp = { .segment = GDT_SELECTOR(1), .addr = jmpaddr };
printf("Making far jump to 64 bit mode @%xh...\n", &_start64);
__asm__ volatile("mov %0, %%esp; ljmp *%1" ::
"r"(init_stack), "m"(farjmp), "D"(cpu_id)
: "memory");
}