Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -18,11 +18,11 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
@@ -489,10 +489,24 @@
movq %rbx, REGOFF_FS(%rsp)
movw %gs, %bx
movq %rbx, REGOFF_GS(%rsp)
/*
+ * If we're trying to use TRAPTRACE though, I take that back: we're
+ * probably debugging some problem in the SWAPGS logic and want to know
+ * what the incoming gsbase was.
+ *
+ * Since we already did SWAPGS, record the KGSBASE.
+ */
+#if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv)
+ movl $MSR_AMD_KGSBASE, %ecx
+ rdmsr
+ movl %eax, REGOFF_GSBASE(%rsp)
+ movl %edx, REGOFF_GSBASE+4(%rsp)
+#endif
+
+ /*
* Machine state saved in the regs structure on the stack
* First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
* %eax is the syscall number
* %rsp is the thread's stack, %r15 is curthread
* REG_RSP(%rsp) is the user's stack
@@ -669,12 +683,11 @@
*/
ASSERT_UPCALL_MASK_IS_SET
SYSRETQ
#else
ALTENTRY(nopop_sys_syscall_swapgs_sysretq)
- SWAPGS /* user gsbase */
- SYSRETQ
+ jmp tr_sysretq
#endif
/*NOTREACHED*/
SET_SIZE(nopop_sys_syscall_swapgs_sysretq)
_syscall_pre:
@@ -771,10 +784,24 @@
movq %rbx, REGOFF_FS(%rsp)
movw %gs, %bx
movq %rbx, REGOFF_GS(%rsp)
/*
+ * If we're trying to use TRAPTRACE though, I take that back: we're
+ * probably debugging some problem in the SWAPGS logic and want to know
+ * what the incoming gsbase was.
+ *
+ * Since we already did SWAPGS, record the KGSBASE.
+ */
+#if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv)
+ movl $MSR_AMD_KGSBASE, %ecx
+ rdmsr
+ movl %eax, REGOFF_GSBASE(%rsp)
+ movl %edx, REGOFF_GSBASE+4(%rsp)
+#endif
+
+ /*
* Application state saved in the regs structure on the stack
* %eax is the syscall number
* %rsp is the thread's stack, %r15 is curthread
* REG_RSP(%rsp) is the user's stack
*/
@@ -887,12 +914,11 @@
movl REGOFF_RIP(%rsp), %ecx /* %ecx -> %eip */
movl REGOFF_RSP(%rsp), %esp
ASSERT_UPCALL_MASK_IS_SET
ALTENTRY(nopop_sys_syscall32_swapgs_sysretl)
- SWAPGS /* user gsbase */
- SYSRETL
+ jmp tr_sysretl
SET_SIZE(nopop_sys_syscall32_swapgs_sysretl)
/*NOTREACHED*/
_full_syscall_postsys32:
STI
@@ -933,27 +959,26 @@
*
* Note that we are unable to return both "rvals" to userland with
* this call, as %edx is used by the sysexit instruction.
*
* One final complication in this routine is its interaction with
- * single-stepping in a debugger. For most of the system call mechanisms,
- * the CPU automatically clears the single-step flag before we enter the
- * kernel. The sysenter mechanism does not clear the flag, so a user
- * single-stepping through a libc routine may suddenly find themself
- * single-stepping through the kernel. To detect this, kmdb compares the
- * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
- * If it finds that we have single-stepped to a sysenter entry point, it
- * explicitly clears the flag and executes the sys_sysenter routine.
+ * single-stepping in a debugger. For most of the system call mechanisms, the
+ * CPU automatically clears the single-step flag before we enter the kernel.
+ * The sysenter mechanism does not clear the flag, so a user single-stepping
+ * through a libc routine may suddenly find themself single-stepping through the
+ * kernel. To detect this, kmdb and trap() both compare the trap %pc to the
+ * [brand_]sys_enter addresses on each single-step trap. If it finds that we
+ * have single-stepped to a sysenter entry point, it explicitly clears the flag
+ * and executes the sys_sysenter routine.
*
- * One final complication in this final complication is the fact that we
- * have two different entry points for sysenter: brand_sys_sysenter and
- * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping
- * through the kernel with kmdb, we will eventually hit the instruction at
- * sys_sysenter. kmdb cannot distinguish between that valid single-step
- * and the undesirable one mentioned above. To avoid this situation, we
- * simply add a jump over the instruction at sys_sysenter to make it
- * impossible to single-step to it.
+ * One final complication in this final complication is the fact that we have
+ * two different entry points for sysenter: brand_sys_sysenter and sys_sysenter.
+ * If we enter at brand_sys_sysenter and start single-stepping through the
+ * kernel with kmdb, we will eventually hit the instruction at sys_sysenter.
+ * kmdb cannot distinguish between that valid single-step and the undesirable
+ * one mentioned above. To avoid this situation, we simply add a jump over the
+ * instruction at sys_sysenter to make it impossible to single-step to it.
*/
#if defined(__lint)
void
sys_sysenter()
@@ -962,26 +987,31 @@
#else /* __lint */
ENTRY_NP(brand_sys_sysenter)
SWAPGS /* kernel gsbase */
ALTENTRY(_brand_sys_sysenter_post_swapgs)
+
BRAND_CALLBACK(BRAND_CB_SYSENTER, BRAND_URET_FROM_REG(%rdx))
/*
* Jump over sys_sysenter to allow single-stepping as described
* above.
*/
jmp _sys_sysenter_post_swapgs
ALTENTRY(sys_sysenter)
SWAPGS /* kernel gsbase */
-
ALTENTRY(_sys_sysenter_post_swapgs)
+
movq %gs:CPU_THREAD, %r15
movl $U32CS_SEL, REGOFF_CS(%rsp)
movl %ecx, REGOFF_RSP(%rsp) /* wrapper: %esp -> %ecx */
movl %edx, REGOFF_RIP(%rsp) /* wrapper: %eip -> %edx */
+ /*
+ * NOTE: none of the instructions that run before we get here should
+ * clobber bits in (R)FLAGS! This includes the kpti trampoline.
+ */
pushfq
popq %r10
movl $UDS_SEL, REGOFF_SS(%rsp)
/*
@@ -1019,10 +1049,24 @@
movq %rbx, REGOFF_FS(%rsp)
movw %gs, %bx
movq %rbx, REGOFF_GS(%rsp)
/*
+ * If we're trying to use TRAPTRACE though, I take that back: we're
+ * probably debugging some problem in the SWAPGS logic and want to know
+ * what the incoming gsbase was.
+ *
+ * Since we already did SWAPGS, record the KGSBASE.
+ */
+#if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv)
+ movl $MSR_AMD_KGSBASE, %ecx
+ rdmsr
+ movl %eax, REGOFF_GSBASE(%rsp)
+ movl %edx, REGOFF_GSBASE+4(%rsp)
+#endif
+
+ /*
* Application state saved in the regs structure on the stack
* %eax is the syscall number
* %rsp is the thread's stack, %r15 is curthread
* REG_RSP(%rsp) is the user's stack
*/
@@ -1116,10 +1160,12 @@
/*
* We must protect ourselves from being descheduled here;
* If we were, and we ended up on another cpu, or another
* lwp got int ahead of us, it could change the segment
* registers without us noticing before we return to userland.
+ *
+ * This cli is undone in the tr_sysexit trampoline code.
*/
cli
CHECK_POSTSYS_NE(%r15, %r14, %ebx)
jne _full_syscall_postsys32
SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
@@ -1149,13 +1195,11 @@
movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */
pushq REGOFF_RFL(%rsp)
popfq
movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */
ALTENTRY(sys_sysenter_swapgs_sysexit)
- swapgs
- sti
- sysexit
+ jmp tr_sysexit
SET_SIZE(sys_sysenter_swapgs_sysexit)
SET_SIZE(sys_sysenter)
SET_SIZE(_sys_sysenter_post_swapgs)
SET_SIZE(brand_sys_sysenter)
@@ -1202,14 +1246,17 @@
/*
* There should be no instructions between this label and SWAPGS/IRET
* or we could end up breaking branded zone support. See the usage of
* this label in lx_brand_int80_callback and sn1_brand_int91_callback
* for examples.
+ *
+ * We want to swapgs to maintain the invariant that all entries into
+ * tr_iret_user are done on the user gsbase.
*/
ALTENTRY(sys_sysint_swapgs_iret)
- SWAPGS /* user gsbase */
- IRET
+ SWAPGS
+ jmp tr_iret_user
/*NOTREACHED*/
SET_SIZE(sys_sysint_swapgs_iret)
SET_SIZE(sys_syscall_int)
SET_SIZE(brand_sys_syscall_int)