Print this page
9441 kmdb should stash %cr3 in kdiregs
Reviewed by: John Levon <john.levon@joyent.com>
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 20,40 **** */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - #pragma ident "%Z%%M% %I% %E% SMI" - /* ! * Debugger entry for both master and slave CPUs */ #if defined(__lint) #include <sys/types.h> ! #endif #include <sys/segments.h> #include <sys/asm_linkage.h> #include <sys/controlregs.h> #include <sys/x86_archext.h> --- 20,41 ---- */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2018 Joyent, Inc. */ /* ! * Debugger entry and exit for both master and slave CPUs. kdi_idthdl.s contains ! * the IDT stubs that drop into here (mainly via kdi_cmnint). */ #if defined(__lint) #include <sys/types.h> ! #else #include <sys/segments.h> #include <sys/asm_linkage.h> #include <sys/controlregs.h> #include <sys/x86_archext.h>
*** 44,56 **** #include <sys/psw.h> #include <sys/uadmin.h> #ifdef __xpv #include <sys/hypervisor.h> #endif - - #ifdef _ASM - #include <kdi_assym.h> #include <assym.h> /* clobbers %rdx, %rcx, returns addr in %rax, CPU ID in %rbx */ #define GET_CPUSAVE_ADDR \ --- 45,54 ----
*** 78,87 **** --- 76,88 ---- movq %r11, KRS_GDT(%rax); \ 1: #ifdef __xpv + /* + * Already on kernel gsbase via the hypervisor. + */ #define SAVE_GSBASE(reg) /* nothing */ #define RESTORE_GSBASE(reg) /* nothing */ #else
*** 88,99 **** #define SAVE_GSBASE(base) \ movl $MSR_AMD_GSBASE, %ecx; \ rdmsr; \ shlq $32, %rdx; \ orq %rax, %rdx; \ ! movq %rdx, REG_OFF(KDIREG_GSBASE)(base) #define RESTORE_GSBASE(base) \ movq REG_OFF(KDIREG_GSBASE)(base), %rdx; \ movq %rdx, %rax; \ shrq $32, %rdx; \ movl $MSR_AMD_GSBASE, %ecx; \ --- 89,108 ---- #define SAVE_GSBASE(base) \ movl $MSR_AMD_GSBASE, %ecx; \ rdmsr; \ shlq $32, %rdx; \ orq %rax, %rdx; \ ! movq %rdx, REG_OFF(KDIREG_GSBASE)(base); \ ! movl $MSR_AMD_KGSBASE, %ecx; \ ! rdmsr; \ ! shlq $32, %rdx; \ ! orq %rax, %rdx; \ ! movq %rdx, REG_OFF(KDIREG_KGSBASE)(base) + /* + * We shouldn't have stomped on KGSBASE, so don't try to restore it. + */ #define RESTORE_GSBASE(base) \ movq REG_OFF(KDIREG_GSBASE)(base), %rdx; \ movq %rdx, %rax; \ shrq $32, %rdx; \ movl $MSR_AMD_GSBASE, %ecx; \
*** 100,112 **** wrmsr #endif /* __xpv */ /* ! * %ss, %rsp, %rflags, %cs, %rip, %err, %trapno are already on the stack. Note ! * that on the hypervisor, we skip the save/restore of GSBASE: it's slow, and ! * unnecessary. */ #define KDI_SAVE_REGS(base) \ movq %rdi, REG_OFF(KDIREG_RDI)(base); \ movq %rsi, REG_OFF(KDIREG_RSI)(base); \ movq %rdx, REG_OFF(KDIREG_RDX)(base); \ --- 109,119 ---- wrmsr #endif /* __xpv */ /* ! * %ss, %rsp, %rflags, %cs, %rip, %err, %trapno are already on the stack. */ #define KDI_SAVE_REGS(base) \ movq %rdi, REG_OFF(KDIREG_RDI)(base); \ movq %rsi, REG_OFF(KDIREG_RSI)(base); \ movq %rdx, REG_OFF(KDIREG_RDX)(base); \
*** 123,132 **** --- 130,141 ---- movq %r14, REG_OFF(KDIREG_R14)(base); \ movq %r15, REG_OFF(KDIREG_R15)(base); \ movq %rbp, REG_OFF(KDIREG_SAVFP)(base); \ movq REG_OFF(KDIREG_RIP)(base), %rax; \ movq %rax, REG_OFF(KDIREG_SAVPC)(base); \ + movq %cr2, %rax; \ + movq %rax, REG_OFF(KDIREG_CR2)(base); \ clrq %rax; \ movw %ds, %ax; \ movq %rax, REG_OFF(KDIREG_DS)(base); \ movw %es, %ax; \ movq %rax, REG_OFF(KDIREG_ES)(base); \
*** 141,150 **** --- 150,161 ---- RESTORE_GSBASE(%rdi); \ movq REG_OFF(KDIREG_ES)(%rdi), %rax; \ movw %ax, %es; \ movq REG_OFF(KDIREG_DS)(%rdi), %rax; \ movw %ax, %ds; \ + movq REG_OFF(KDIREG_CR2)(base), %rax; \ + movq %rax, %cr2; \ movq REG_OFF(KDIREG_R15)(%rdi), %r15; \ movq REG_OFF(KDIREG_R14)(%rdi), %r14; \ movq REG_OFF(KDIREG_R13)(%rdi), %r13; \ movq REG_OFF(KDIREG_R12)(%rdi), %r12; \ movq REG_OFF(KDIREG_R11)(%rdi), %r11; \
*** 160,177 **** movq REG_OFF(KDIREG_RDI)(%rdi), %rdi /* * Given the address of the current CPU's cpusave area in %rax, the following * macro restores the debugging state to said CPU. Restored state includes ! * the debug registers from the global %dr variables, and debugging MSRs from ! * the CPU save area. This code would be in a separate routine, but for the ! * fact that some of the MSRs are jump-sensitive. As such, we need to minimize ! * the number of jumps taken subsequent to the update of said MSRs. We can ! * remove one jump (the ret) by using a macro instead of a function for the ! * debugging state restoration code. * ! * Takes the cpusave area in %rdi as a parameter, clobbers %rax-%rdx */ #define KDI_RESTORE_DEBUGGING_STATE \ pushq %rdi; \ leaq kdi_drreg(%rip), %r15; \ movl $7, %edi; \ --- 171,183 ---- movq REG_OFF(KDIREG_RDI)(%rdi), %rdi /* * Given the address of the current CPU's cpusave area in %rax, the following * macro restores the debugging state to said CPU. Restored state includes ! * the debug registers from the global %dr variables. * ! * Takes the cpusave area in %rdi as a parameter. */ #define KDI_RESTORE_DEBUGGING_STATE \ pushq %rdi; \ leaq kdi_drreg(%rip), %r15; \ movl $7, %edi; \
*** 192,245 **** movq DRADDR_OFF(2)(%r15), %rsi; \ call kdi_dreg_set; \ movl $3, %edi; \ movq DRADDR_OFF(3)(%r15), %rsi; \ call kdi_dreg_set; \ ! popq %rdi; \ ! \ ! /* \ ! * Write any requested MSRs. \ ! */ \ ! movq KRS_MSR(%rdi), %rbx; \ ! cmpq $0, %rbx; \ ! je 3f; \ ! 1: \ ! movl MSR_NUM(%rbx), %ecx; \ ! cmpl $0, %ecx; \ ! je 3f; \ ! \ ! movl MSR_TYPE(%rbx), %edx; \ ! cmpl $KDI_MSR_WRITE, %edx; \ ! jne 2f; \ ! \ ! movq MSR_VALP(%rbx), %rdx; \ ! movl 0(%rdx), %eax; \ ! movl 4(%rdx), %edx; \ ! wrmsr; \ ! 2: \ ! addq $MSR_SIZE, %rbx; \ ! jmp 1b; \ ! 3: \ ! /* \ ! * We must not branch after re-enabling LBR. If \ ! * kdi_wsr_wrexit_msr is set, it contains the number \ ! * of the MSR that controls LBR. kdi_wsr_wrexit_valp \ ! * contains the value that is to be written to enable \ ! * LBR. \ ! */ \ ! leaq kdi_msr_wrexit_msr(%rip), %rcx; \ ! movl (%rcx), %ecx; \ ! cmpl $0, %ecx; \ ! je 1f; \ ! \ ! leaq kdi_msr_wrexit_valp(%rip), %rdx; \ ! movq (%rdx), %rdx; \ ! movl 0(%rdx), %eax; \ ! movl 4(%rdx), %edx; \ ! \ ! wrmsr; \ ! 1: /* * Each cpusave buffer has an area set aside for a ring buffer of breadcrumbs. * The following macros manage the buffer. */ --- 198,208 ---- movq DRADDR_OFF(2)(%r15), %rsi; \ call kdi_dreg_set; \ movl $3, %edi; \ movq DRADDR_OFF(3)(%r15), %rsi; \ call kdi_dreg_set; \ ! popq %rdi; /* * Each cpusave buffer has an area set aside for a ring buffer of breadcrumbs. * The following macros manage the buffer. */
*** 268,286 **** /* Set a value in the current breadcrumb buffer */ #define ADD_CRUMB(cpusave, offset, value, tmp) \ movq KRS_CURCRUMB(cpusave), tmp; \ movq value, offset(tmp) - #endif /* _ASM */ - - #if defined(__lint) - void - kdi_cmnint(void) - { - } - #else /* __lint */ - /* XXX implement me */ ENTRY_NP(kdi_nmiint) clrq %rcx movq (%rcx), %rcx SET_SIZE(kdi_nmiint) --- 231,240 ----
*** 326,335 **** --- 280,313 ---- movq %rax, %rdx shrq $32, %rdx movl $MSR_AMD_GSBASE, %ecx wrmsr + + /* + * In the trampoline we stashed the incoming %cr3. Copy this into + * the kdiregs for restoration and later use. + */ + mov %gs:(CPU_KPTI_DBG+KPTI_TR_CR3), %rdx + mov %rdx, REG_OFF(KDIREG_CR3)(%rsp) + /* + * Switch to the kernel's %cr3. From the early interrupt handler + * until now we've been running on the "paranoid" %cr3 (that of kas + * from early in boot). + * + * If we took the interrupt from somewhere already on the kas/paranoid + * %cr3 though, don't change it (this could happen if kcr3 is corrupt + * and we took a gptrap earlier from this very code). + */ + cmpq %rdx, kpti_safe_cr3 + je .no_kcr3 + mov %gs:CPU_KPTI_KCR3, %rdx + cmpq $0, %rdx + je .no_kcr3 + mov %rdx, %cr3 + .no_kcr3: + #endif /* __xpv */ GET_CPUSAVE_ADDR /* %rax = cpusave, %rbx = CPU ID */ ADVANCE_CRUMB_POINTER(%rax, %rcx, %rdx)
*** 349,365 **** * Were we in the debugger when we took the trap (i.e. was %esp in one * of the debugger's memory ranges)? */ leaq kdi_memranges, %rcx movl kdi_nmemranges, %edx ! 1: cmpq MR_BASE(%rcx), %rsp jl 2f /* below this range -- try the next one */ cmpq MR_LIM(%rcx), %rsp jg 2f /* above this range -- try the next one */ jmp 3f /* matched within this range */ ! 2: decl %edx jz kdi_save_common_state /* %rsp not within debugger memory */ addq $MR_SIZE, %rcx jmp 1b 3: /* --- 327,345 ---- * Were we in the debugger when we took the trap (i.e. was %esp in one * of the debugger's memory ranges)? */ leaq kdi_memranges, %rcx movl kdi_nmemranges, %edx ! 1: ! cmpq MR_BASE(%rcx), %rsp jl 2f /* below this range -- try the next one */ cmpq MR_LIM(%rcx), %rsp jg 2f /* above this range -- try the next one */ jmp 3f /* matched within this range */ ! 2: ! decl %edx jz kdi_save_common_state /* %rsp not within debugger memory */ addq $MR_SIZE, %rcx jmp 1b 3: /*
*** 385,396 **** jmp kdi_save_common_state SET_SIZE(kdi_master_entry) SET_SIZE(kdi_cmnint) - #endif /* __lint */ - /* * The cross-call handler for slave CPUs. * * The debugger is single-threaded, so only one CPU, called the master, may be * running it at any given time. The other CPUs, known as slaves, spin in a --- 365,374 ----
*** 397,422 **** * busy loop until there's something for them to do. This is the entry point * for the slaves - they'll be sent here in response to a cross-call sent by the * master. */ - #if defined(__lint) - char kdi_slave_entry_patch; - - void - kdi_slave_entry(void) - { - } - #else /* __lint */ - .globl kdi_slave_entry_patch; - ENTRY_NP(kdi_slave_entry) - /* kdi_msr_add_clrentry knows where this is */ - kdi_slave_entry_patch: - KDI_MSR_PATCH; - /* * Cross calls are implemented as function calls, so our stack currently * looks like one you'd get from a zero-argument function call. That * is, there's the return %rip at %rsp, and that's about it. We need * to make it look like an interrupt stack. When we first save, we'll --- 375,386 ----
*** 436,445 **** --- 400,412 ---- pushq $-1 /* phony trap number */ subq $REG_OFF(KDIREG_TRAPNO), %rsp KDI_SAVE_REGS(%rsp) + movq %cr3, %rax + movq %rax, REG_OFF(KDIREG_CR3)(%rsp) + movq REG_OFF(KDIREG_SS)(%rsp), %rax xchgq REG_OFF(KDIREG_RIP)(%rsp), %rax movq %rax, REG_OFF(KDIREG_SS)(%rsp) movq REG_OFF(KDIREG_RSP)(%rsp), %rax
*** 463,474 **** pushq %rax jmp kdi_save_common_state SET_SIZE(kdi_slave_entry) - #endif /* __lint */ - /* * The state of the world: * * The stack has a complete set of saved registers and segment * selectors, arranged in the kdi_regs.h order. It also has a pointer --- 430,439 ----
*** 478,489 **** * registers. First we check whether we should jump straight back to * the kernel. If not, we save a few more registers, ready the * machine for debugger entry, and enter the debugger. */ - #if !defined(__lint) - ENTRY_NP(kdi_save_common_state) popq %rdi /* the cpusave area */ movq %rsp, KRS_GREGS(%rdi) /* save ptr to current saved regs */ --- 443,452 ----
*** 535,575 **** call kdi_dreg_get movq %rax, KRS_DROFF(3)(%r15) movq %r15, %rax /* restore cpu save area to rax */ - /* - * Save any requested MSRs. - */ - movq KRS_MSR(%rax), %rcx - cmpq $0, %rcx - je no_msr - - pushq %rax /* rdmsr clobbers %eax */ - movq %rcx, %rbx - - 1: - movl MSR_NUM(%rbx), %ecx - cmpl $0, %ecx - je msr_done - - movl MSR_TYPE(%rbx), %edx - cmpl $KDI_MSR_READ, %edx - jne msr_next - - rdmsr /* addr in %ecx, value into %edx:%eax */ - movl %eax, MSR_VAL(%rbx) - movl %edx, _CONST(MSR_VAL + 4)(%rbx) - - msr_next: - addq $MSR_SIZE, %rbx - jmp 1b - - msr_done: - popq %rax - - no_msr: clrq %rbp /* stack traces should end here */ pushq %rax movq %rax, %rdi /* cpusave */ --- 498,507 ----
*** 580,602 **** jmp kdi_resume SET_SIZE(kdi_save_common_state) - #endif /* !__lint */ - /* * Resume the world. The code that calls kdi_resume has already * decided whether or not to restore the IDT. */ - #if defined(__lint) - void - kdi_resume(void) - { - } - #else /* __lint */ - /* cpusave in %rdi */ ENTRY_NP(kdi_resume) /* * Send this CPU back into the world --- 512,525 ----
*** 607,626 **** #endif KDI_RESTORE_DEBUGGING_STATE movq KRS_GREGS(%rdi), %rsp KDI_RESTORE_REGS(%rsp) addq $REG_OFF(KDIREG_RIP), %rsp /* Discard state, trapno, err */ IRET /*NOTREACHED*/ SET_SIZE(kdi_resume) - #endif /* __lint */ - - #if !defined(__lint) - ENTRY_NP(kdi_pass_to_kernel) popq %rdi /* cpusave */ movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi) --- 530,576 ---- #endif KDI_RESTORE_DEBUGGING_STATE movq KRS_GREGS(%rdi), %rsp + + #if !defined(__xpv) + /* + * If we're going back via tr_iret_kdi, then we want to copy the + * final %cr3 we're going to back into the kpti_dbg area now. + * + * Since the trampoline needs to find the kpti_dbg too, we enter it + * with %r13 set to point at that. The real %r13 (to restore before + * the iret) we stash in the kpti_dbg itself. + */ + movq %gs:CPU_SELF, %r13 /* can't leaq %gs:*, use self-ptr */ + addq $CPU_KPTI_DBG, %r13 + + movq REG_OFF(KDIREG_R13)(%rsp), %rdx + movq %rdx, KPTI_R13(%r13) + + movq REG_OFF(KDIREG_CR3)(%rsp), %rdx + movq %rdx, KPTI_TR_CR3(%r13) + + /* The trampoline will undo this later. */ + movq %r13, REG_OFF(KDIREG_R13)(%rsp) + #endif + KDI_RESTORE_REGS(%rsp) addq $REG_OFF(KDIREG_RIP), %rsp /* Discard state, trapno, err */ + /* + * The common trampoline code will restore %cr3 to the right value + * for either kernel or userland. + */ + #if !defined(__xpv) + jmp tr_iret_kdi + #else IRET + #endif /*NOTREACHED*/ SET_SIZE(kdi_resume) ENTRY_NP(kdi_pass_to_kernel) popq %rdi /* cpusave */ movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi)
*** 687,706 **** #endif /*NOTREACHED*/ SET_SIZE(kdi_reboot) - #endif /* !__lint */ - - #if defined(__lint) - /*ARGSUSED*/ - void - kdi_cpu_debug_init(kdi_cpusave_t *save) - { - } - #else /* __lint */ - ENTRY_NP(kdi_cpu_debug_init) pushq %rbp movq %rsp, %rbp pushq %rbx /* macro will clobber %rbx */ --- 637,646 ----
*** 707,715 **** KDI_RESTORE_DEBUGGING_STATE popq %rbx leave ret - SET_SIZE(kdi_cpu_debug_init) - #endif /* !__lint */ --- 647,680 ---- KDI_RESTORE_DEBUGGING_STATE popq %rbx leave ret SET_SIZE(kdi_cpu_debug_init) + #define GETDREG(name, r) \ + ENTRY_NP(name); \ + movq r, %rax; \ + ret; \ + SET_SIZE(name) + + #define SETDREG(name, r) \ + ENTRY_NP(name); \ + movq %rdi, r; \ + ret; \ + SET_SIZE(name) + + GETDREG(kdi_getdr0, %dr0) + GETDREG(kdi_getdr1, %dr1) + GETDREG(kdi_getdr2, %dr2) + GETDREG(kdi_getdr3, %dr3) + GETDREG(kdi_getdr6, %dr6) + GETDREG(kdi_getdr7, %dr7) + + SETDREG(kdi_setdr0, %dr0) + SETDREG(kdi_setdr1, %dr1) + SETDREG(kdi_setdr2, %dr2) + SETDREG(kdi_setdr3, %dr3) + SETDREG(kdi_setdr6, %dr6) + SETDREG(kdi_setdr7, %dr7) + + #endif /* !__lint */