Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 18,28 ****
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright 2015 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
--- 18,28 ----
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright 2018 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
*** 489,498 ****
--- 489,512 ----
movq %rbx, REGOFF_FS(%rsp)
movw %gs, %bx
movq %rbx, REGOFF_GS(%rsp)
/*
+ * If we're trying to use TRAPTRACE though, I take that back: we're
+ * probably debugging some problem in the SWAPGS logic and want to know
+ * what the incoming gsbase was.
+ *
+ * Since we already did SWAPGS, record the KGSBASE.
+ */
+ #if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv)
+ movl $MSR_AMD_KGSBASE, %ecx
+ rdmsr
+ movl %eax, REGOFF_GSBASE(%rsp)
+ movl %edx, REGOFF_GSBASE+4(%rsp)
+ #endif
+
+ /*
* Machine state saved in the regs structure on the stack
* First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
* %eax is the syscall number
* %rsp is the thread's stack, %r15 is curthread
* REG_RSP(%rsp) is the user's stack
*** 669,680 ****
*/
ASSERT_UPCALL_MASK_IS_SET
SYSRETQ
#else
ALTENTRY(nopop_sys_syscall_swapgs_sysretq)
! SWAPGS /* user gsbase */
! SYSRETQ
#endif
/*NOTREACHED*/
SET_SIZE(nopop_sys_syscall_swapgs_sysretq)
_syscall_pre:
--- 683,693 ----
*/
ASSERT_UPCALL_MASK_IS_SET
SYSRETQ
#else
ALTENTRY(nopop_sys_syscall_swapgs_sysretq)
! jmp tr_sysretq
#endif
/*NOTREACHED*/
SET_SIZE(nopop_sys_syscall_swapgs_sysretq)
_syscall_pre:
*** 771,780 ****
--- 784,807 ----
movq %rbx, REGOFF_FS(%rsp)
movw %gs, %bx
movq %rbx, REGOFF_GS(%rsp)
/*
+ * If we're trying to use TRAPTRACE though, I take that back: we're
+ * probably debugging some problem in the SWAPGS logic and want to know
+ * what the incoming gsbase was.
+ *
+ * Since we already did SWAPGS, record the KGSBASE.
+ */
+ #if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv)
+ movl $MSR_AMD_KGSBASE, %ecx
+ rdmsr
+ movl %eax, REGOFF_GSBASE(%rsp)
+ movl %edx, REGOFF_GSBASE+4(%rsp)
+ #endif
+
+ /*
* Application state saved in the regs structure on the stack
* %eax is the syscall number
* %rsp is the thread's stack, %r15 is curthread
* REG_RSP(%rsp) is the user's stack
*/
*** 887,898 ****
movl REGOFF_RIP(%rsp), %ecx /* %ecx -> %eip */
movl REGOFF_RSP(%rsp), %esp
ASSERT_UPCALL_MASK_IS_SET
ALTENTRY(nopop_sys_syscall32_swapgs_sysretl)
! SWAPGS /* user gsbase */
! SYSRETL
SET_SIZE(nopop_sys_syscall32_swapgs_sysretl)
/*NOTREACHED*/
_full_syscall_postsys32:
STI
--- 914,924 ----
movl REGOFF_RIP(%rsp), %ecx /* %ecx -> %eip */
movl REGOFF_RSP(%rsp), %esp
ASSERT_UPCALL_MASK_IS_SET
ALTENTRY(nopop_sys_syscall32_swapgs_sysretl)
! jmp tr_sysretl
SET_SIZE(nopop_sys_syscall32_swapgs_sysretl)
/*NOTREACHED*/
_full_syscall_postsys32:
STI
*** 933,959 ****
*
* Note that we are unable to return both "rvals" to userland with
* this call, as %edx is used by the sysexit instruction.
*
* One final complication in this routine is its interaction with
! * single-stepping in a debugger. For most of the system call mechanisms,
! * the CPU automatically clears the single-step flag before we enter the
! * kernel. The sysenter mechanism does not clear the flag, so a user
! * single-stepping through a libc routine may suddenly find themself
! * single-stepping through the kernel. To detect this, kmdb compares the
! * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
! * If it finds that we have single-stepped to a sysenter entry point, it
! * explicitly clears the flag and executes the sys_sysenter routine.
*
! * One final complication in this final complication is the fact that we
! * have two different entry points for sysenter: brand_sys_sysenter and
! * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping
! * through the kernel with kmdb, we will eventually hit the instruction at
! * sys_sysenter. kmdb cannot distinguish between that valid single-step
! * and the undesirable one mentioned above. To avoid this situation, we
! * simply add a jump over the instruction at sys_sysenter to make it
! * impossible to single-step to it.
*/
#if defined(__lint)
void
sys_sysenter()
--- 959,984 ----
*
* Note that we are unable to return both "rvals" to userland with
* this call, as %edx is used by the sysexit instruction.
*
* One final complication in this routine is its interaction with
! * single-stepping in a debugger. For most of the system call mechanisms, the
! * CPU automatically clears the single-step flag before we enter the kernel.
! * The sysenter mechanism does not clear the flag, so a user single-stepping
! * through a libc routine may suddenly find themself single-stepping through the
! * kernel. To detect this, kmdb and trap() both compare the trap %pc to the
! * [brand_]sys_enter addresses on each single-step trap. If it finds that we
! * have single-stepped to a sysenter entry point, it explicitly clears the flag
! * and executes the sys_sysenter routine.
*
! * One final complication in this final complication is the fact that we have
! * two different entry points for sysenter: brand_sys_sysenter and sys_sysenter.
! * If we enter at brand_sys_sysenter and start single-stepping through the
! * kernel with kmdb, we will eventually hit the instruction at sys_sysenter.
! * kmdb cannot distinguish between that valid single-step and the undesirable
! * one mentioned above. To avoid this situation, we simply add a jump over the
! * instruction at sys_sysenter to make it impossible to single-step to it.
*/
#if defined(__lint)
void
sys_sysenter()
*** 962,987 ****
#else /* __lint */
ENTRY_NP(brand_sys_sysenter)
SWAPGS /* kernel gsbase */
ALTENTRY(_brand_sys_sysenter_post_swapgs)
BRAND_CALLBACK(BRAND_CB_SYSENTER, BRAND_URET_FROM_REG(%rdx))
/*
* Jump over sys_sysenter to allow single-stepping as described
* above.
*/
jmp _sys_sysenter_post_swapgs
ALTENTRY(sys_sysenter)
SWAPGS /* kernel gsbase */
-
ALTENTRY(_sys_sysenter_post_swapgs)
movq %gs:CPU_THREAD, %r15
movl $U32CS_SEL, REGOFF_CS(%rsp)
movl %ecx, REGOFF_RSP(%rsp) /* wrapper: %esp -> %ecx */
movl %edx, REGOFF_RIP(%rsp) /* wrapper: %eip -> %edx */
pushfq
popq %r10
movl $UDS_SEL, REGOFF_SS(%rsp)
/*
--- 987,1017 ----
#else /* __lint */
ENTRY_NP(brand_sys_sysenter)
SWAPGS /* kernel gsbase */
ALTENTRY(_brand_sys_sysenter_post_swapgs)
+
BRAND_CALLBACK(BRAND_CB_SYSENTER, BRAND_URET_FROM_REG(%rdx))
/*
* Jump over sys_sysenter to allow single-stepping as described
* above.
*/
jmp _sys_sysenter_post_swapgs
ALTENTRY(sys_sysenter)
SWAPGS /* kernel gsbase */
ALTENTRY(_sys_sysenter_post_swapgs)
+
movq %gs:CPU_THREAD, %r15
movl $U32CS_SEL, REGOFF_CS(%rsp)
movl %ecx, REGOFF_RSP(%rsp) /* wrapper: %esp -> %ecx */
movl %edx, REGOFF_RIP(%rsp) /* wrapper: %eip -> %edx */
+ /*
+ * NOTE: none of the instructions that run before we get here should
+ * clobber bits in (R)FLAGS! This includes the kpti trampoline.
+ */
pushfq
popq %r10
movl $UDS_SEL, REGOFF_SS(%rsp)
/*
*** 1019,1028 ****
--- 1049,1072 ----
movq %rbx, REGOFF_FS(%rsp)
movw %gs, %bx
movq %rbx, REGOFF_GS(%rsp)
/*
+ * If we're trying to use TRAPTRACE though, I take that back: we're
+ * probably debugging some problem in the SWAPGS logic and want to know
+ * what the incoming gsbase was.
+ *
+ * Since we already did SWAPGS, record the KGSBASE.
+ */
+ #if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv)
+ movl $MSR_AMD_KGSBASE, %ecx
+ rdmsr
+ movl %eax, REGOFF_GSBASE(%rsp)
+ movl %edx, REGOFF_GSBASE+4(%rsp)
+ #endif
+
+ /*
* Application state saved in the regs structure on the stack
* %eax is the syscall number
* %rsp is the thread's stack, %r15 is curthread
* REG_RSP(%rsp) is the user's stack
*/
*** 1116,1125 ****
--- 1160,1171 ----
/*
* We must protect ourselves from being descheduled here;
* If we were, and we ended up on another cpu, or another
* lwp got int ahead of us, it could change the segment
* registers without us noticing before we return to userland.
+ *
+ * This cli is undone in the tr_sysexit trampoline code.
*/
cli
CHECK_POSTSYS_NE(%r15, %r14, %ebx)
jne _full_syscall_postsys32
SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
*** 1149,1161 ****
movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */
pushq REGOFF_RFL(%rsp)
popfq
movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */
ALTENTRY(sys_sysenter_swapgs_sysexit)
! swapgs
! sti
! sysexit
SET_SIZE(sys_sysenter_swapgs_sysexit)
SET_SIZE(sys_sysenter)
SET_SIZE(_sys_sysenter_post_swapgs)
SET_SIZE(brand_sys_sysenter)
--- 1195,1205 ----
movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */
pushq REGOFF_RFL(%rsp)
popfq
movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */
ALTENTRY(sys_sysenter_swapgs_sysexit)
! jmp tr_sysexit
SET_SIZE(sys_sysenter_swapgs_sysexit)
SET_SIZE(sys_sysenter)
SET_SIZE(_sys_sysenter_post_swapgs)
SET_SIZE(brand_sys_sysenter)
*** 1202,1215 ****
/*
* There should be no instructions between this label and SWAPGS/IRET
* or we could end up breaking branded zone support. See the usage of
* this label in lx_brand_int80_callback and sn1_brand_int91_callback
* for examples.
*/
ALTENTRY(sys_sysint_swapgs_iret)
! SWAPGS /* user gsbase */
! IRET
/*NOTREACHED*/
SET_SIZE(sys_sysint_swapgs_iret)
SET_SIZE(sys_syscall_int)
SET_SIZE(brand_sys_syscall_int)
--- 1246,1262 ----
/*
* There should be no instructions between this label and SWAPGS/IRET
* or we could end up breaking branded zone support. See the usage of
* this label in lx_brand_int80_callback and sn1_brand_int91_callback
* for examples.
+ *
+ * We want to swapgs to maintain the invariant that all entries into
+ * tr_iret_user are done on the user gsbase.
*/
ALTENTRY(sys_sysint_swapgs_iret)
! SWAPGS
! jmp tr_iret_user
/*NOTREACHED*/
SET_SIZE(sys_sysint_swapgs_iret)
SET_SIZE(sys_syscall_int)
SET_SIZE(brand_sys_syscall_int)