blob: 747c6183c3ccbce475d68052ef28bd324409413f [file] [log] [blame]
/*
* Copyright (c) 2017 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/toolchain.h>
#include <zephyr/arch/cpu.h>
#include <offsets_short.h>
#include <zephyr/syscall.h>
#include <zephyr/sys/mem_manage.h>
#ifdef CONFIG_X86_KPTI
/* Copy interrupt return stack context to the trampoline stack, switch back
* to the user page table, and only then 'iret'. We jump to this instead
* of calling 'iret' if KPTI is turned on. This must be invoked with interrupts
* locked.
*
* Stack layout is expected to be what 'iretq' expects, which is as follows:
*
* 32 SS
* 24 RSP
* 16 RFLAGS
* 8 CS
* 0 RIP
*/
.global z_x86_trampoline_to_user
z_x86_trampoline_to_user:
/* Stash EDI, need a free register */
pushq %rdi
/* Store old stack pointer and switch to trampoline stack */
movq %rsp, %rdi
movq %gs:__x86_tss64_t_ist2_OFFSET, %rsp
/* Copy context */
pushq 40(%rdi) /* SS */
pushq 32(%rdi) /* RSP */
pushq 24(%rdi) /* RFLAGS */
pushq 16(%rdi) /* CS */
pushq 8(%rdi) /* RIP */
xchgq %rdi, (%rdi) /* Exchange old rdi to restore it and put
trampoline stack address in its old storage
area */
/* Switch to thread's page table */
pushq %rax
movq %gs:__x86_tss64_t_cpu_OFFSET, %rax
movq ___cpu_t_current_OFFSET(%rax), %rax
movq _thread_offset_to_ptables(%rax), %rax
movq %rax, %cr3
popq %rax
movq $0, -8(%rsp) /* Delete stashed RAX data */
/* Trampoline stack should have nothing sensitive in it at this point */
swapgs
iretq
#endif /* CONFIG_X86_KPTI */
/* Landing site for 'syscall' instruction
*
* Call id is in RAX
* Arguments are in RDI, RSI, RDX, R10, R8, R9
* Return address stored by CPU in RCX
* User RFLAGS store by CPU in R11
* Current RFLAGS has been masked with ~X86_FMASK_MSR
*/
.global z_x86_syscall_entry_stub
z_x86_syscall_entry_stub:
swapgs
/* Save original stack pointer from user mode in memory, at the
* moment we have no free registers or stack to save it to. This
* eventually gets put on the stack before we re-enable interrupts
* as this is a per-cpu and not per-thread area.
*/
movq %rsp, %gs:__x86_tss64_t_usp_OFFSET
#ifdef CONFIG_X86_KPTI
/* We need to switch to the trampoline stack so that we can
* switch to the kernel's page table
*/
movq %gs:__x86_tss64_t_ist2_OFFSET, %rsp
/* Load kernel's page table */
pushq %rax
/* NOTE: Presumes phys=virt */
movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
movq %rax, %cr3
popq %rax
movq $0, -8(%rsp) /* Delete stashed RAX data */
#endif /* CONFIG_X86_KPTI */
/* Switch to the privilege mode stack pointer stored in
* x86_tss64.psp
*/
movq %gs:__x86_tss64_t_psp_OFFSET, %rsp
/* We're now on the privilege mode stack; push the old user stack
* pointer onto it
*/
pushq %gs:__x86_tss64_t_usp_OFFSET
#ifdef CONFIG_X86_KPTI
movq $0, %gs:__x86_tss64_t_usp_OFFSET
#endif
sti /* re-enable interrupts */
/* call_id is in RAX. bounds-check it, must be less than
* K_SYSCALL_LIMIT.
*/
cmp $K_SYSCALL_LIMIT, %rax
jae _bad_syscall
_id_ok:
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
/* Prevent speculation with bogus system call IDs */
lfence
#endif
/* Remaining registers not involved in the syscall operation are
* RBX, RBP, R12-R15, plus floating point / SIMD registers.
*
* We save caller-saved registers so we can restore to original values
* when we call 'sysretq' at the end.
*/
pushq %rdi
subq $X86_FXSAVE_SIZE, %rsp
fxsave (%rsp)
pushq %rsi
pushq %rdx
pushq %r8
pushq %r9
pushq %r10
pushq %r11 /* RFLAGS */
pushq %rcx /* Return address stored by 'syscall' */
pushq %rsp /* SSF parameter */
/* All other args are in the right registers, except arg4 which
* we had to put in r10 instead of RCX
*/
movq %r10, %rcx
/* from the call ID in RAX, load R10 with the actual function pointer
* to call by looking it up in the system call dispatch table
*/
xorq %r11, %r11
movq _k_syscall_table(%r11, %rax, 8), %r10
/* Run the marshal function, which is some entry in _k_syscall_table */
call *%r10
/* RAX now contains the return value
*
* Callee-saved registers are un-touched from original values per C
* calling convention, but sensitive data may lurk in caller-saved regs
* RDI, RSI, RDX, R8, R9, R10, XMM* after we have serviced the system
* call. We saved them earlier, restore their original values when
* the syscall was made. This also preserves these registers if they
* were not used as arguments.
*
* We also can't have RCX and R11 clobbered as we need the original
* values to successfully 'sysretq'.
*/
addq $8, %rsp /* Discard ssf */
popq %rcx /* Restore return address for 'sysretq' */
popq %r11 /* Restore RFLAGS for 'sysretq' */
popq %r10
popq %r9
popq %r8
popq %rdx
popq %rsi
fxrstor (%rsp)
addq $X86_FXSAVE_SIZE, %rsp
popq %rdi
#ifdef CONFIG_X86_KPTI
/* Lock IRQs as we are using per-cpu memory areas and the
* trampoline stack
*/
cli
/* Stash user stack pointer and switch to trampoline stack */
popq %gs:__x86_tss64_t_usp_OFFSET
movq %gs:__x86_tss64_t_ist2_OFFSET, %rsp
/* Switch to thread's page table */
pushq %rax
movq %gs:__x86_tss64_t_cpu_OFFSET, %rax
movq ___cpu_t_current_OFFSET(%rax), %rax
movq _thread_offset_to_ptables(%rax), %rax
movq %rax, %cr3
popq %rax
movq $0, -8(%rsp) /* Delete stashed RAX data */
/* Restore saved user stack pointer */
movq %gs:__x86_tss64_t_usp_OFFSET, %rsp
movq $0, %gs:__x86_tss64_t_usp_OFFSET
#else
/* Restore user stack pointer */
popq %rsp
/* Return to user mode, locking interrupts as the normal interrupt
* handling path will get very confused if it occurs between
* 'swapgs' and 'sysretq'
*/
cli
#endif /* CONFIG_X86_KPTI */
swapgs
sysretq
_bad_syscall:
/* RAX had a bogus syscall value in it, replace with the bad syscall
* handler's ID, and put the bad ID as its first argument.
*
* TODO: On this and all other arches, simply immediately return
* with -ENOSYS, once all syscalls have a return value
*/
movq %rax, %rdi
movq $K_SYSCALL_BAD, %rax
jmp _id_ok
/*
* size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg)
* ^ RDI ^ RSI ^ RDX
*/
.global arch_user_string_nlen
arch_user_string_nlen:
/* Initial error value, strlen_done adjusts this if we succeed */
movl $-1, %r8d
/* use RAX as our length count (this function's return value) */
xor %rax, %rax
/* This code might page fault */
strlen_loop:
.global z_x86_user_string_nlen_fault_start
z_x86_user_string_nlen_fault_start:
cmpb $0x0, (%rdi, %rax, 1) /* *(RDI + RAX) == 0? Could fault. */
.global z_x86_user_string_nlen_fault_end
z_x86_user_string_nlen_fault_end:
je strlen_done
cmp %rsi, %rax /* Max length reached? */
je strlen_done
inc %rax /* EAX++ and loop again */
jmp strlen_loop
strlen_done:
/* Set error value to 0 since we succeeded */
xorl %r8d, %r8d
.global z_x86_user_string_nlen_fixup
z_x86_user_string_nlen_fixup:
/* Write error value to 32-bit integer err pointer parameter */
movl %r8d, (%rdx)
retq
/*
* Trampoline function to put the p3 parameter in the register expected
* by the calling convention, we couldn't use RCX when we called 'sysret'
*/
z_x86_userspace_landing_site:
/* Place argument 4 in the correct position */
movq %r10, %rcx
call z_thread_entry
/* FUNC_NORETURN void z_x86_userspace_enter(
* k_thread_entry_t user_entry, <- RDI
* void *p1, void *p2, void *p3, <- RSI, RDX, RCX
* uintptr_t stack_end, <- R8
* uintptr_t stack_start) <- R9
*
* A one-way trip to userspace.
*/
.global z_x86_userspace_enter
z_x86_userspace_enter:
/* RCX is sysret return address, pass along p3 in r10,
* z_x86_userspace_landing_site will fix this up
*/
movq %rcx, %r10
/* switch to privilege mode stack so we can erase thread stack buffer,
* the buffer is the page immediately before the thread stack
*/
movq %r9, %rsp
/* Push callee-saved regs and go back into C code to erase the stack
* buffer and set US bit in page tables for it
*/
pushq %rdx
pushq %rsi
pushq %rdi
pushq %r8
pushq %r10
callq z_x86_current_stack_perms
popq %r10
popq %r8
popq %rdi
popq %rsi
popq %rdx
/* Reset to the beginning of the user stack */
movq %r8, %rsp
/* set sysret entry point */
movq $z_x86_userspace_landing_site, %rcx
/* Copy RFLAGS into r11, required by sysret */
pushfq
movq (%rsp), %r11
movq $0, (%rsp) /* Now a debugger-friendly return address */
/* cleanse other registers */
xorq %rbx, %rbx
xorq %rbp, %rbp
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
cli
#ifdef CONFIG_X86_KPTI
/* Switch to thread's page table. We have free registers so no need
* to involve the trampoline stack.
*/
movq %gs:__x86_tss64_t_cpu_OFFSET, %rax
movq ___cpu_t_current_OFFSET(%rax), %rax
movq _thread_offset_to_ptables(%rax), %rax
movq %rax, %cr3
#endif
swapgs
sysretq