blob: 8ca90736e7a11a56ba75d980cc26a08b8de87147 [file] [log] [blame]
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "xuk-config.h"
#include "x86_64-hw.h"
#include "xuk.h"
#include "serial.h"
#ifdef CONFIG_XUK_DEBUG
#include "vgacon.h"
#include "printf.h"
#else
#define printf(...)
#endif
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
/* Defined at the linker level in xuk-stubs.c */
extern char _xuk_stub16_start, _xuk_stub16_end;
/* 64 bit entry point. Lives immediately after the 32 bit stub.
* Expects to have its stack already set up.
*/
__asm__(".pushsection .xuk_start64\n"
".align 16\n"
" jmp _cstart64\n"
".popsection\n");
/* Interrupt/exception entry points stored in the IDT.
*
* FIXME: the assembly below uses XCHG r/m, because I'm lazy and this
* was SO much easier than hand coding the musical chairs required to
* emulate it. But that instruction is outrageously slow (like 20+
* cycle latency on most CPUs!), and this is interrupt entry.
* Replace, once we have a test available to detect bad register
* contents
*/
extern char _isr_entry_err, _isr_entry_noerr;
__asm__(/* Exceptions that push an error code arrive here. */
".align 16\n"
"_isr_entry_err:\n"
" xchg %rdx, (%rsp)\n"
" jmp _isr_entry2\n"
/* IRQs with no error code land here, then fall through */
".align 16\n"
"_isr_entry_noerr:\n"
" push %rdx\n"
/* Arrive here with RDX already pushed to the stack below the
* interrupt frame and (if needed) populated with the error
* code from the exception. It will become the third argument
* to the C handler. Stuff the return address from the call
* in the stub table into RDI (the first argument).
*/
"_isr_entry2:\n"
" xchg %rdi, 8(%rsp)\n"
" push %rax\n"
" push %rcx\n"
" push %rsi\n"
" push %r8\n"
" push %r9\n"
" push %r10\n"
" push %r11\n"
" mov %rsp, %rsi\n" /* stack in second arg */
" call _isr_c_top\n"
/* We have pushed only the caller-save registers at this
* point. Check return value to see if we are returning back
* into the same context or if we need to do a full dump and
* restore.
*/
" test %rax, %rax\n"
" jnz _switch_bottom\n"
" pop %r11\n"
" pop %r10\n"
" pop %r9\n"
" pop %r8\n"
" pop %rsi\n"
" pop %rcx\n"
" pop %rax\n"
" pop %rdx\n"
" pop %rdi\n"
" iretq\n");
/* Top half of a context switch. Arrive here with the "CPU pushed"
* part of the exception frame (SS, RSP, RFLAGS, CS, RIP) already on
* the stack, the context pointer to which to switch stored in RAX and
* a pointer into which to store the current context in RDX (NOTE:
* this will be a pointer to a 32 bit memory location if we are in x32
* mode!). It will push the first half of the register set (the same
* caller-save registers pushed by an ISR) and then continue on to
* _switch_bottom to finish up.
*/
__asm__(".align 16\n"
".global _switch_top\n"
"_switch_top:\n"
" push %rdi\n"
" push %rdx\n"
" push %rax\n"
" push %rcx\n"
" push %rsi\n"
" push %r8\n"
" push %r9\n"
" push %r10\n"
" push %r11\n"
" mov %rsp, %r8\n"
" sub $48, %r8\n"
#ifdef XUK_64_BIT_ABI
" movq %r8, (%rdx)\n"
#else
" movl %r8d, (%rdx)\n"
#endif
/* Fall through... */
/* Bottom half of a switch, used by both ISR return and
* context switching. Arrive here with the exception frame
* and caller-saved registers already on the stack and the
* stack pointer to use for the restore in RAX. It will push
* the remaining registers and then restore.
*/
".align 16\n"
"_switch_bottom:\n"
" push %rbx\n"
" push %rbp\n"
" push %r12\n"
" push %r13\n"
" push %r14\n"
" push %r15\n"
" mov %rax, %rsp\n"
" pop %r15\n"
" pop %r14\n"
" pop %r13\n"
" pop %r12\n"
" pop %rbp\n"
" pop %rbx\n"
" pop %r11\n"
" pop %r10\n"
" pop %r9\n"
" pop %r8\n"
" pop %rsi\n"
" pop %rcx\n"
" pop %rax\n"
" pop %rdx\n"
" pop %rdi\n"
" iretq\n");
static unsigned int isr_stub_base;
struct vhandler {
void (*fn)(void*, int);
void *arg;
};
static struct vhandler *vector_handlers;
static void putchar(int c)
{
serial_putc(c);
#ifdef XUK_DEBUG
vgacon_putc(c);
#endif
}
long _isr_c_top(unsigned long vecret, unsigned long rsp,
unsigned long err)
{
/* The vector stubs are 8-byte-aligned, so to get the vector
* index from the return address we just shift off the bottom
* bits
*/
int vector = (vecret - isr_stub_base) >> 3;
struct vhandler *h = &vector_handlers[vector];
struct xuk_entry_frame *frame = (void *)rsp;
z_isr_entry();
/* Set current priority in CR8 to the currently-serviced IRQ
* and re-enable interrupts
*/
unsigned long long cr8, cr8new = vector >> 4;
__asm__ volatile("movq %%cr8, %0;"
"movq %1, %%cr8;"
"sti"
: "=r"(cr8) : "r"(cr8new));
if (h->fn) {
h->fn(h->arg, err);
} else {
z_unhandled_vector(vector, err, frame);
}
/* Mask interrupts to finish processing (they'll get restored
* in the upcoming IRET) and restore CR8
*/
__asm__ volatile("cli; movq %0, %%cr8" : : "r"(cr8));
/* Signal EOI if it's an APIC-managed interrupt */
if (vector > 0x1f) {
_apic.EOI = 0U;
}
/* Subtle: for the "interrupted context pointer", we pass in
* the value our stack pointer WILL have once we finish
* spilling registers after this function returns. If this
* hook doesn't want to switch, it will return null and never
* save the value of the pointer.
*/
return (long)z_isr_exit_restore_stack((void *)(rsp - 48));
}
static long choose_isr_entry(int vector)
{
/* Constructed with 1's in the vector indexes defined to
* generate an error code. Couldn't find a clean way to make
* the compiler generate this code
*/
const int mask = 0x27d00; /* 0b00100111110100000000 */
if (vector < 32 && ((1 << vector) & mask)) {
return (long)&_isr_entry_err;
} else {
return (long)&_isr_entry_noerr;
}
}
void xuk_set_isr(int interrupt, int priority,
void (*handler)(void *, int), void *arg)
{
int v = interrupt - 0x100;
/* Need to choose a vector number? Try all vectors at the
* specified priority. Clobber one if we have to.
*/
if (interrupt < 0x100 || interrupt > 0x1ff) {
for (int pi = 0; pi <= 0xf; pi++) {
v = (priority << 4) | pi;
if (!vector_handlers[v].fn) {
break;
}
}
}
/* Need to set up IO-APIC? Set it up to deliver to all CPUs
* here (another API later will probably allow for IRQ
* affinity). Do a read/write cycle to avoid clobbering
* settings like edge triggering & polarity that might have
* been set up by other platform layers. We only want to muck
* with routing.
*/
if (interrupt < 0x100) {
struct ioapic_red red;
int regidx = 0x10 + 2 * interrupt;
red.regvals[0] = ioapic_read(regidx);
red.regvals[1] = ioapic_read(regidx + 1);
red.vector = v;
red.logical = 0U;
red.destination = 0xffU;
red.masked = 1U;
ioapic_write(regidx, red.regvals[0]);
ioapic_write(regidx + 1, red.regvals[1]);
}
/* Is it a special interrupt? */
if (interrupt == INT_APIC_LVT_TIMER) {
struct apic_lvt lvt = {
.vector = v,
.mode = ONESHOT,
};
_apic.LVT_TIMER = lvt;
}
printf("set_isr v %d\n", v);
vector_handlers[v].fn = handler;
vector_handlers[v].arg = arg;
}
/* Note: "raw vector" interrupt numbers cannot be masked, as the APIC
* doesn't have a per-vector mask bit. Only specific LVT interrupts
* (we handle timer below) and IOAPIC-generated interrupts can be
* masked on x86. In practice, this isn't a problem as that API is a
* special-purpose kind of thing. Real devices will always go through
* the supported channel.
*/
void xuk_set_isr_mask(int interrupt, int masked)
{
if (interrupt == INT_APIC_LVT_TIMER) {
struct apic_lvt lvt = _apic.LVT_TIMER;
lvt.masked = masked;
_apic.LVT_TIMER = lvt;
} else if (interrupt < 0x100) {
struct ioapic_red red;
int regidx = 0x10 + 2 * interrupt;
red.regvals[0] = ioapic_read(regidx);
red.regvals[1] = ioapic_read(regidx + 1);
red.masked = masked;
ioapic_write(regidx, red.regvals[0]);
ioapic_write(regidx + 1, red.regvals[1]);
}
}
/* Note: these base pointers live together in a big block. Eventually
* we will probably want one of them for userspace TLS, which means it
* will need to be retargetted to point somewhere within the
* application memory. But this is fine for now.
*/
static void setup_fg_segs(int cpu)
{
int fi = 3 + 2 * cpu, gi = 3 + 2 * cpu + 1;
struct gdt64 *fs = (struct gdt64 *) &_shared.gdt[fi];
struct gdt64 *gs = (struct gdt64 *) &_shared.gdt[gi];
gdt64_set_base(fs, (long)&_shared.fs_ptrs[cpu]);
gdt64_set_base(gs, (long)&_shared.gs_ptrs[cpu]);
int fsel = GDT_SELECTOR(fi), gsel = GDT_SELECTOR(gi);
__asm__("mov %0, %%fs; mov %1, %%gs" : : "r"(fsel), "r"(gsel));
}
static void init_gdt(void)
{
printf("Initializing 64 bit IDT\n");
/* Need a GDT for ourselves, not whatever the previous layer
* set up. The scheme is that segment zero is the null
* segment (required and enforced architecturally), segment
* one (selector 8) is the code segment, two (16) is a
* data/stack segment (ignored by code at runtime, but
* required to be present in the L/GDT when executing an
* IRET), and remaining segments come in pairs to provide
* FS/GS segment bases for each CPU.
*/
_shared.gdt[0] = (struct gdt64) {};
_shared.gdt[1] = (struct gdt64) {
.readable = 1,
.codeseg = 1,
.notsystem = 1,
.present = 1,
.long64 = 1,
};
_shared.gdt[2] = (struct gdt64) {
.readable = 1,
.codeseg = 0,
.notsystem = 1,
.present = 1,
.long64 = 1,
};
for (int i = 3; i < ARRAY_SIZE(_shared.gdt); i++) {
_shared.gdt[i] = (struct gdt64) {
.readable = 1,
.codeseg = 0,
.notsystem = 1,
.present = 1,
.long64 = 1,
};
}
}
static void init_idt(void)
{
printf("Initializing 64 bit IDT\n");
/* Make an IDT in the next unused page and fill in all 256
* entries
*/
struct idt64 *idt = alloc_page(0);
_shared.idt_addr = (unsigned int)(long)idt;
for (int i = 0; i < 256; i++) {
idt[i] = (struct idt64) {
.segment = GDT_SELECTOR(1),
.type = 14, /* == 64 bit interrupt gate */
.present = 1,
};
}
/* Hand-encode stubs for each vector that are a simple 5-byte
* CALL instruction to the single handler entry point. That's
* an opcode of 0xe8 followd by a 4-byte offset from the start
* of the next (!) instruction. The call is used to push a
* return address on the stack that points into the stub,
* allowing us to extract the vector index by what stub it
* points into.
*/
struct istub {
unsigned char opcode; /* 0xe8 == CALLQ */
int off;
unsigned char _unused[3];
} __attribute__((packed)) *stubtab = alloc_page(0);
isr_stub_base = (long)stubtab;
/* FIXME: on x32, the entries in this handlers table are half
* the size as a native 64 bit build, and could be packed into
* the same page as the stubs above, saving the page of low
* memory.
*/
vector_handlers = alloc_page(1);
for (int i = 0; i < 256; i++) {
struct istub *st = &stubtab[i];
st->opcode = 0xe8;
st->off = choose_isr_entry(i) - (long)st - 5;
idt64_set_isr(&idt[i], st);
}
}
static void smp_init(void)
{
/* Generate a GDT for the 16 bit stub to use when
* transitioning to 32 bit protected mode (so the poor thing
* doesn't have to do it itself). It can live right here on
* our stack.
*/
struct gdt64 gdt16[] = {
{},
{
.codeseg = 1,
.default_size = 1,
.readable = 1,
.notsystem = 1,
.present = 1,
.limit_lo16 = 0xffff,
.limit_hi4 = 0xf,
.page_granularity = 1,
},
{
.readable = 1,
.default_size = 1,
.notsystem = 1,
.present = 1,
.limit_lo16 = 0xffff,
.limit_hi4 = 0xf,
.page_granularity = 1,
},
};
struct {
short dummy;
short limit;
unsigned int addr;
} gdtp16 = { .limit = sizeof(gdt16), .addr = (long)&gdt16[0] };
_shared.gdt16_addr = (long)&gdtp16.limit;
/* FIXME: this is only used at startup, and only for a ~150
* byte chunk of code. Find a way to return it, or maybe put
* it in the low memory null guard instead?
*/
char *sipi_page = alloc_page(1);
int s16bytes = &_xuk_stub16_end - &_xuk_stub16_start;
printf("Copying %d bytes of 16 bit code into page %p\n",
s16bytes, (int)(long)sipi_page);
for (int i = 0; i < s16bytes; i++) {
sipi_page[i] = ((char *)&_xuk_stub16_start)[i];
}
/* First send an INIT interrupt to all CPUs. This resets them
* regardless of whatever they were doing and they enter a
* "wait for SIPI" state
*/
printf("Sending INIT IPI\n");
_apic.ICR_LO = (struct apic_icr_lo) {
.delivery_mode = INIT,
.shorthand = NOTSELF,
};
while (_apic.ICR_LO.send_pending) {
}
/* Now send the startup IPI (SIPI) to all CPUs. They will
* begin executing in real mode with IP=0 and CS pointing to
* the page we allocated.
*/
_shared.smpinit_lock = 0;
_shared.smpinit_stack = 0U;
_shared.num_active_cpus = 1;
printf("Sending SIPI IPI\n");
_apic.ICR_LO = (struct apic_icr_lo) {
.delivery_mode = STARTUP,
.shorthand = NOTSELF,
.vector = ((long)sipi_page) >> 12,
};
while (_apic.ICR_LO.send_pending) {
}
}
void xuk_start_cpu(int cpu, unsigned int stack)
{
int act = _shared.num_active_cpus;
printf("Granting stack @ %xh to CPU %d\n", stack, cpu);
_shared.smpinit_stack = stack;
while (_shared.num_active_cpus == act) {
__asm__ volatile("pause");
}
}
void _cstart64(int cpu_id)
{
if (cpu_id == 0) {
extern char __bss_start, __bss_end;
__builtin_memset(&__bss_start, 0, &__bss_end - &__bss_start);
}
#ifdef CONFIG_XUK_DEBUG
z_putchar = putchar;
#endif
printf("\n==\nHello from 64 bit C code on CPU%d (stack ~%xh)\n",
cpu_id, (int)(long)&cpu_id);
printf("sizeof(int) = %d, sizeof(long) = %d, sizeof(void*) = %d\n",
sizeof(int), sizeof(long), sizeof(void *));
if (cpu_id == 0) {
init_gdt();
}
struct {
unsigned short dummy[3];
unsigned short limit;
unsigned long long addr;
} gdtp = { .limit = sizeof(_shared.gdt), .addr = (long)_shared.gdt };
printf("Loading 64 bit GDT\n");
__asm__ volatile("lgdt %0" : : "m"(gdtp.limit));
/* Need to actually set the data & stack segments with those
* indexes. Whatever we have in those hidden registers works
* for data access *now*, but the next interrupt will push
* whatever the selector index was, and we need to know that
* our table contains the same layout!
*/
int selector = GDT_SELECTOR(2);
__asm__ volatile("mov %0, %%ds; mov %0, %%ss" : : "r"(selector));
if (cpu_id == 0) {
init_idt();
}
struct {
unsigned short dummy[3];
unsigned short limit;
unsigned long long addr;
} idtp = { .limit = 4096, .addr = _shared.idt_addr };
printf("Loading IDT lim %d addr %xh\n", idtp.limit, idtp.addr);
__asm__ volatile("lidt %0" : : "m"(idtp.limit));
/* Classic PC architecture gotcha: disable 8259 PICs before
* they fires a timer interrupt into our exception table.
* Write 1's into the interrupt masks.
*/
if (cpu_id == 0) {
printf("Disabling 8259 PICs\n");
ioport_out8(0xa1, 0xff); /* slave */
ioport_out8(0x21, 0xff); /* master */
}
/* Enable APIC. Set both the MSR bit and the "software
* enable" bit in the spurious interrupt vector register.
*/
const unsigned int IA32_APIC_BASE = 0x1b;
printf("Enabling APIC id %xh ver %xh\n", _apic.ID, _apic.VER);
set_msr_bit(IA32_APIC_BASE, 11);
_apic.SPURIOUS |= 1<<8;
_apic.LDR = cpu_id << 24;
_apic.DIVIDE_CONF = APIC_DIVISOR(CONFIG_XUK_APIC_TSC_SHIFT);
printf("Initializing FS/GS segments for local CPU%d\n", cpu_id);
setup_fg_segs(cpu_id);
if (cpu_id == 0) {
printf("Brining up auxiliary CPUs...\n");
smp_init();
}
printf("Calling z_cpu_start on CPU %d\n", cpu_id);
z_cpu_start(cpu_id);
}
long xuk_setup_stack(long sp, void *fn, unsigned int eflags,
long *args, int nargs)
{
struct xuk_stack_frame *f;
/* Align downward and make room for one frame */
f = &((struct xuk_stack_frame *)(sp & ~7))[-1];
*f = (struct xuk_stack_frame) {
.entry.ss = GDT_SELECTOR(2),
.entry.rsp = sp,
.entry.rflags = eflags,
.entry.cs = GDT_SELECTOR(1),
.entry.rip = (long)fn,
.entry.rdi = nargs >= 1 ? args[0] : 0,
.entry.rsi = nargs >= 2 ? args[1] : 0,
.entry.rdx = nargs >= 3 ? args[2] : 0,
.entry.rcx = nargs >= 4 ? args[3] : 0,
.entry.r8 = nargs >= 5 ? args[4] : 0,
.entry.r9 = nargs >= 6 ? args[5] : 0,
};
return (long)f;
}
int z_arch_printk_char_out(int c)
{
putchar(c);
return 0;
}