Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 18,28 **** * * CDDL HEADER END */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright 2015 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ #include <sys/asm_linkage.h> #include <sys/asm_misc.h> --- 18,28 ---- * * CDDL HEADER END */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright 2018 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ #include <sys/asm_linkage.h> #include <sys/asm_misc.h>
*** 489,498 **** --- 489,512 ---- movq %rbx, REGOFF_FS(%rsp) movw %gs, %bx movq %rbx, REGOFF_GS(%rsp) /* + * If we're trying to use TRAPTRACE though, I take that back: we're + * probably debugging some problem in the SWAPGS logic and want to know + * what the incoming gsbase was. + * + * Since we already did SWAPGS, record the KGSBASE. + */ + #if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv) + movl $MSR_AMD_KGSBASE, %ecx + rdmsr + movl %eax, REGOFF_GSBASE(%rsp) + movl %edx, REGOFF_GSBASE+4(%rsp) + #endif + + /* * Machine state saved in the regs structure on the stack * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9 * %eax is the syscall number * %rsp is the thread's stack, %r15 is curthread * REG_RSP(%rsp) is the user's stack
*** 669,680 **** */ ASSERT_UPCALL_MASK_IS_SET SYSRETQ #else ALTENTRY(nopop_sys_syscall_swapgs_sysretq) ! SWAPGS /* user gsbase */ ! SYSRETQ #endif /*NOTREACHED*/ SET_SIZE(nopop_sys_syscall_swapgs_sysretq) _syscall_pre: --- 683,693 ---- */ ASSERT_UPCALL_MASK_IS_SET SYSRETQ #else ALTENTRY(nopop_sys_syscall_swapgs_sysretq) ! jmp tr_sysretq #endif /*NOTREACHED*/ SET_SIZE(nopop_sys_syscall_swapgs_sysretq) _syscall_pre:
*** 771,780 **** --- 784,807 ---- movq %rbx, REGOFF_FS(%rsp) movw %gs, %bx movq %rbx, REGOFF_GS(%rsp) /* + * If we're trying to use TRAPTRACE though, I take that back: we're + * probably debugging some problem in the SWAPGS logic and want to know + * what the incoming gsbase was. + * + * Since we already did SWAPGS, record the KGSBASE. + */ + #if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv) + movl $MSR_AMD_KGSBASE, %ecx + rdmsr + movl %eax, REGOFF_GSBASE(%rsp) + movl %edx, REGOFF_GSBASE+4(%rsp) + #endif + + /* * Application state saved in the regs structure on the stack * %eax is the syscall number * %rsp is the thread's stack, %r15 is curthread * REG_RSP(%rsp) is the user's stack */
*** 887,898 **** movl REGOFF_RIP(%rsp), %ecx /* %ecx -> %eip */ movl REGOFF_RSP(%rsp), %esp ASSERT_UPCALL_MASK_IS_SET ALTENTRY(nopop_sys_syscall32_swapgs_sysretl) ! SWAPGS /* user gsbase */ ! SYSRETL SET_SIZE(nopop_sys_syscall32_swapgs_sysretl) /*NOTREACHED*/ _full_syscall_postsys32: STI --- 914,924 ---- movl REGOFF_RIP(%rsp), %ecx /* %ecx -> %eip */ movl REGOFF_RSP(%rsp), %esp ASSERT_UPCALL_MASK_IS_SET ALTENTRY(nopop_sys_syscall32_swapgs_sysretl) ! jmp tr_sysretl SET_SIZE(nopop_sys_syscall32_swapgs_sysretl) /*NOTREACHED*/ _full_syscall_postsys32: STI
*** 933,959 **** * * Note that we are unable to return both "rvals" to userland with * this call, as %edx is used by the sysexit instruction. * * One final complication in this routine is its interaction with ! * single-stepping in a debugger. For most of the system call mechanisms, ! * the CPU automatically clears the single-step flag before we enter the ! * kernel. The sysenter mechanism does not clear the flag, so a user ! * single-stepping through a libc routine may suddenly find themself ! * single-stepping through the kernel. To detect this, kmdb compares the ! * trap %pc to the [brand_]sys_enter addresses on each single-step trap. ! * If it finds that we have single-stepped to a sysenter entry point, it ! * explicitly clears the flag and executes the sys_sysenter routine. * ! * One final complication in this final complication is the fact that we ! * have two different entry points for sysenter: brand_sys_sysenter and ! * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping ! * through the kernel with kmdb, we will eventually hit the instruction at ! * sys_sysenter. kmdb cannot distinguish between that valid single-step ! * and the undesirable one mentioned above. To avoid this situation, we ! * simply add a jump over the instruction at sys_sysenter to make it ! * impossible to single-step to it. */ #if defined(__lint) void sys_sysenter() --- 959,984 ---- * * Note that we are unable to return both "rvals" to userland with * this call, as %edx is used by the sysexit instruction. * * One final complication in this routine is its interaction with ! * single-stepping in a debugger. For most of the system call mechanisms, the ! * CPU automatically clears the single-step flag before we enter the kernel. ! * The sysenter mechanism does not clear the flag, so a user single-stepping ! * through a libc routine may suddenly find themself single-stepping through the ! * kernel. To detect this, kmdb and trap() both compare the trap %pc to the ! * [brand_]sys_enter addresses on each single-step trap. If it finds that we ! * have single-stepped to a sysenter entry point, it explicitly clears the flag ! * and executes the sys_sysenter routine. * ! * One final complication in this final complication is the fact that we have ! * two different entry points for sysenter: brand_sys_sysenter and sys_sysenter. ! * If we enter at brand_sys_sysenter and start single-stepping through the ! * kernel with kmdb, we will eventually hit the instruction at sys_sysenter. ! * kmdb cannot distinguish between that valid single-step and the undesirable ! * one mentioned above. To avoid this situation, we simply add a jump over the ! * instruction at sys_sysenter to make it impossible to single-step to it. */ #if defined(__lint) void sys_sysenter()
*** 962,987 **** #else /* __lint */ ENTRY_NP(brand_sys_sysenter) SWAPGS /* kernel gsbase */ ALTENTRY(_brand_sys_sysenter_post_swapgs) BRAND_CALLBACK(BRAND_CB_SYSENTER, BRAND_URET_FROM_REG(%rdx)) /* * Jump over sys_sysenter to allow single-stepping as described * above. */ jmp _sys_sysenter_post_swapgs ALTENTRY(sys_sysenter) SWAPGS /* kernel gsbase */ - ALTENTRY(_sys_sysenter_post_swapgs) movq %gs:CPU_THREAD, %r15 movl $U32CS_SEL, REGOFF_CS(%rsp) movl %ecx, REGOFF_RSP(%rsp) /* wrapper: %esp -> %ecx */ movl %edx, REGOFF_RIP(%rsp) /* wrapper: %eip -> %edx */ pushfq popq %r10 movl $UDS_SEL, REGOFF_SS(%rsp) /* --- 987,1017 ---- #else /* __lint */ ENTRY_NP(brand_sys_sysenter) SWAPGS /* kernel gsbase */ ALTENTRY(_brand_sys_sysenter_post_swapgs) + BRAND_CALLBACK(BRAND_CB_SYSENTER, BRAND_URET_FROM_REG(%rdx)) /* * Jump over sys_sysenter to allow single-stepping as described * above. */ jmp _sys_sysenter_post_swapgs ALTENTRY(sys_sysenter) SWAPGS /* kernel gsbase */ ALTENTRY(_sys_sysenter_post_swapgs) + movq %gs:CPU_THREAD, %r15 movl $U32CS_SEL, REGOFF_CS(%rsp) movl %ecx, REGOFF_RSP(%rsp) /* wrapper: %esp -> %ecx */ movl %edx, REGOFF_RIP(%rsp) /* wrapper: %eip -> %edx */ + /* + * NOTE: none of the instructions that run before we get here should + * clobber bits in (R)FLAGS! This includes the kpti trampoline. + */ pushfq popq %r10 movl $UDS_SEL, REGOFF_SS(%rsp) /*
*** 1019,1028 **** --- 1049,1072 ---- movq %rbx, REGOFF_FS(%rsp) movw %gs, %bx movq %rbx, REGOFF_GS(%rsp) /* + * If we're trying to use TRAPTRACE though, I take that back: we're + * probably debugging some problem in the SWAPGS logic and want to know + * what the incoming gsbase was. + * + * Since we already did SWAPGS, record the KGSBASE. + */ + #if defined(DEBUG) && defined(TRAPTRACE) && !defined(__xpv) + movl $MSR_AMD_KGSBASE, %ecx + rdmsr + movl %eax, REGOFF_GSBASE(%rsp) + movl %edx, REGOFF_GSBASE+4(%rsp) + #endif + + /* * Application state saved in the regs structure on the stack * %eax is the syscall number * %rsp is the thread's stack, %r15 is curthread * REG_RSP(%rsp) is the user's stack */
*** 1116,1125 **** --- 1160,1171 ---- /* * We must protect ourselves from being descheduled here; * If we were, and we ended up on another cpu, or another * lwp got int ahead of us, it could change the segment * registers without us noticing before we return to userland. + * + * This cli is undone in the tr_sysexit trampoline code. */ cli CHECK_POSTSYS_NE(%r15, %r14, %ebx) jne _full_syscall_postsys32 SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
*** 1149,1161 **** movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */ pushq REGOFF_RFL(%rsp) popfq movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */ ALTENTRY(sys_sysenter_swapgs_sysexit) ! swapgs ! sti ! sysexit SET_SIZE(sys_sysenter_swapgs_sysexit) SET_SIZE(sys_sysenter) SET_SIZE(_sys_sysenter_post_swapgs) SET_SIZE(brand_sys_sysenter) --- 1195,1205 ---- movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */ pushq REGOFF_RFL(%rsp) popfq movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */ ALTENTRY(sys_sysenter_swapgs_sysexit) ! jmp tr_sysexit SET_SIZE(sys_sysenter_swapgs_sysexit) SET_SIZE(sys_sysenter) SET_SIZE(_sys_sysenter_post_swapgs) SET_SIZE(brand_sys_sysenter)
*** 1202,1215 **** /* * There should be no instructions between this label and SWAPGS/IRET * or we could end up breaking branded zone support. See the usage of * this label in lx_brand_int80_callback and sn1_brand_int91_callback * for examples. */ ALTENTRY(sys_sysint_swapgs_iret) ! SWAPGS /* user gsbase */ ! IRET /*NOTREACHED*/ SET_SIZE(sys_sysint_swapgs_iret) SET_SIZE(sys_syscall_int) SET_SIZE(brand_sys_syscall_int) --- 1246,1262 ---- /* * There should be no instructions between this label and SWAPGS/IRET * or we could end up breaking branded zone support. See the usage of * this label in lx_brand_int80_callback and sn1_brand_int91_callback * for examples. + * + * We want to swapgs to maintain the invariant that all entries into + * tr_iret_user are done on the user gsbase. */ ALTENTRY(sys_sysint_swapgs_iret) ! SWAPGS ! jmp tr_iret_user /*NOTREACHED*/ SET_SIZE(sys_sysint_swapgs_iret) SET_SIZE(sys_syscall_int) SET_SIZE(brand_sys_syscall_int)