Print this page
11859 need swapgs mitigation
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
*** 7,17 ****
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
! * Copyright 2018 Joyent, Inc.
*/
/*
* This file contains the trampolines that are used by KPTI in order to be
* able to take interrupts/trap/etc while on the "user" page table.
--- 7,17 ----
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
! * Copyright 2019 Joyent, Inc.
*/
/*
* This file contains the trampolines that are used by KPTI in order to be
* able to take interrupts/trap/etc while on the "user" page table.
*** 86,96 ****
* it survives whatever corruption brings down the rest of the kernel!
*
* Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
* cases) in that they do not push an interrupt frame (and also have some other
* effects). In the syscall trampolines, we assume that we can only be taking
! * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
* We do not do any stack pivoting for syscalls (and we leave SYSENTER's
* existing %rsp pivot untouched) -- instead we spill registers into
* %gs:CPU_KPTI_* as we need to.
*
* Note that the normal %cr3 values do not cause invalidations with PCIDE - see
--- 86,96 ----
* it survives whatever corruption brings down the rest of the kernel!
*
* Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
* cases) in that they do not push an interrupt frame (and also have some other
* effects). In the syscall trampolines, we assume that we can only be taking
! * the call from userland and use swapgs and an unconditional overwrite of %cr3.
* We do not do any stack pivoting for syscalls (and we leave SYSENTER's
* existing %rsp pivot untouched) -- instead we spill registers into
* %gs:CPU_KPTI_* as we need to.
*
* Note that the normal %cr3 values do not cause invalidations with PCIDE - see
*** 501,511 ****
pushq %gs:CPU_KPTI_RSP; \
pushq %gs:CPU_KPTI_RFLAGS; \
pushq %gs:CPU_KPTI_CS; \
pushq %gs:CPU_KPTI_RIP; \
mov %gs:CPU_KPTI_R13, %r13; \
! SWAPGS; \
jmp isr; \
SET_SIZE(tr_/**/isr)
MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int)
MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int)
--- 501,511 ----
pushq %gs:CPU_KPTI_RSP; \
pushq %gs:CPU_KPTI_RFLAGS; \
pushq %gs:CPU_KPTI_CS; \
pushq %gs:CPU_KPTI_RIP; \
mov %gs:CPU_KPTI_R13, %r13; \
! swapgs; \
jmp isr; \
SET_SIZE(tr_/**/isr)
MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int)
MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int)
*** 534,547 ****
SET_SIZE(tr_iret_kernel)
ENTRY_NP(tr_iret_user)
#if DEBUG
/*
! * Ensure that we return to user land with CR0.TS clear. We do this
! * before we trampoline back and pivot the stack and %cr3. This way
! * we're still on the kernel stack and kernel %cr3, though we are on the
! * user GSBASE.
*/
pushq %rax
mov %cr0, %rax
testq $CR0_TS, %rax
jz 1f
--- 534,546 ----
SET_SIZE(tr_iret_kernel)
ENTRY_NP(tr_iret_user)
#if DEBUG
/*
! * Panic if we find CR0.TS set. We're still on the kernel stack and
! * %cr3, but we do need to swap back to the kernel gs. (We don't worry
! * about swapgs speculation here.)
*/
pushq %rax
mov %cr0, %rax
testq $CR0_TS, %rax
jz 1f
*** 557,574 ****
#endif
cmpq $1, kpti_enable
jne 1f
swapgs
mov %r13, %gs:CPU_KPTI_R13
PIVOT_KPTI_STK(%r13)
SET_USER_CR3(%r13)
mov %gs:CPU_KPTI_R13, %r13
! /* Zero these to make sure they didn't leak from a kernel trap */
movq $0, %gs:CPU_KPTI_R13
movq $0, %gs:CPU_KPTI_R14
swapgs
1:
iretq
SET_SIZE(tr_iret_user)
--- 556,583 ----
#endif
cmpq $1, kpti_enable
jne 1f
+ /*
+ * KPTI enabled: we're on the user gsbase at this point, so we
+ * need to swap back so we can pivot stacks.
+ *
+ * The swapgs lfence mitigation is probably not needed here
+ * since a mis-speculation of the above branch would imply KPTI
+ * is disabled, but we'll do so anyway.
+ */
swapgs
+ lfence
mov %r13, %gs:CPU_KPTI_R13
PIVOT_KPTI_STK(%r13)
SET_USER_CR3(%r13)
mov %gs:CPU_KPTI_R13, %r13
! /* Zero these to make sure they didn't leak from a kernel trap. */
movq $0, %gs:CPU_KPTI_R13
movq $0, %gs:CPU_KPTI_R14
+ /* And back to user gsbase again. */
swapgs
1:
iretq
SET_SIZE(tr_iret_user)