Print this page
9441 kmdb should stash %cr3 in kdiregs
Reviewed by: John Levon <john.levon@joyent.com>
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 20,40 ****
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
- #pragma ident "%Z%%M% %I% %E% SMI"
-
/*
! * Debugger entry for both master and slave CPUs
*/
#if defined(__lint)
#include <sys/types.h>
! #endif
#include <sys/segments.h>
#include <sys/asm_linkage.h>
#include <sys/controlregs.h>
#include <sys/x86_archext.h>
--- 20,41 ----
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2018 Joyent, Inc.
*/
/*
! * Debugger entry and exit for both master and slave CPUs. kdi_idthdl.s contains
! * the IDT stubs that drop into here (mainly via kdi_cmnint).
*/
#if defined(__lint)
#include <sys/types.h>
! #else
#include <sys/segments.h>
#include <sys/asm_linkage.h>
#include <sys/controlregs.h>
#include <sys/x86_archext.h>
*** 44,56 ****
#include <sys/psw.h>
#include <sys/uadmin.h>
#ifdef __xpv
#include <sys/hypervisor.h>
#endif
-
- #ifdef _ASM
-
#include <kdi_assym.h>
#include <assym.h>
/* clobbers %rdx, %rcx, returns addr in %rax, CPU ID in %rbx */
#define GET_CPUSAVE_ADDR \
--- 45,54 ----
*** 78,87 ****
--- 76,88 ----
movq %r11, KRS_GDT(%rax); \
1:
#ifdef __xpv
+ /*
+ * Already on kernel gsbase via the hypervisor.
+ */
#define SAVE_GSBASE(reg) /* nothing */
#define RESTORE_GSBASE(reg) /* nothing */
#else
*** 88,99 ****
#define SAVE_GSBASE(base) \
movl $MSR_AMD_GSBASE, %ecx; \
rdmsr; \
shlq $32, %rdx; \
orq %rax, %rdx; \
! movq %rdx, REG_OFF(KDIREG_GSBASE)(base)
#define RESTORE_GSBASE(base) \
movq REG_OFF(KDIREG_GSBASE)(base), %rdx; \
movq %rdx, %rax; \
shrq $32, %rdx; \
movl $MSR_AMD_GSBASE, %ecx; \
--- 89,108 ----
#define SAVE_GSBASE(base) \
movl $MSR_AMD_GSBASE, %ecx; \
rdmsr; \
shlq $32, %rdx; \
orq %rax, %rdx; \
! movq %rdx, REG_OFF(KDIREG_GSBASE)(base); \
! movl $MSR_AMD_KGSBASE, %ecx; \
! rdmsr; \
! shlq $32, %rdx; \
! orq %rax, %rdx; \
! movq %rdx, REG_OFF(KDIREG_KGSBASE)(base)
+ /*
+ * We shouldn't have stomped on KGSBASE, so don't try to restore it.
+ */
#define RESTORE_GSBASE(base) \
movq REG_OFF(KDIREG_GSBASE)(base), %rdx; \
movq %rdx, %rax; \
shrq $32, %rdx; \
movl $MSR_AMD_GSBASE, %ecx; \
*** 100,112 ****
wrmsr
#endif /* __xpv */
/*
! * %ss, %rsp, %rflags, %cs, %rip, %err, %trapno are already on the stack. Note
! * that on the hypervisor, we skip the save/restore of GSBASE: it's slow, and
! * unnecessary.
*/
#define KDI_SAVE_REGS(base) \
movq %rdi, REG_OFF(KDIREG_RDI)(base); \
movq %rsi, REG_OFF(KDIREG_RSI)(base); \
movq %rdx, REG_OFF(KDIREG_RDX)(base); \
--- 109,119 ----
wrmsr
#endif /* __xpv */
/*
! * %ss, %rsp, %rflags, %cs, %rip, %err, %trapno are already on the stack.
*/
#define KDI_SAVE_REGS(base) \
movq %rdi, REG_OFF(KDIREG_RDI)(base); \
movq %rsi, REG_OFF(KDIREG_RSI)(base); \
movq %rdx, REG_OFF(KDIREG_RDX)(base); \
*** 123,132 ****
--- 130,141 ----
movq %r14, REG_OFF(KDIREG_R14)(base); \
movq %r15, REG_OFF(KDIREG_R15)(base); \
movq %rbp, REG_OFF(KDIREG_SAVFP)(base); \
movq REG_OFF(KDIREG_RIP)(base), %rax; \
movq %rax, REG_OFF(KDIREG_SAVPC)(base); \
+ movq %cr2, %rax; \
+ movq %rax, REG_OFF(KDIREG_CR2)(base); \
clrq %rax; \
movw %ds, %ax; \
movq %rax, REG_OFF(KDIREG_DS)(base); \
movw %es, %ax; \
movq %rax, REG_OFF(KDIREG_ES)(base); \
*** 141,150 ****
--- 150,161 ----
RESTORE_GSBASE(%rdi); \
movq REG_OFF(KDIREG_ES)(%rdi), %rax; \
movw %ax, %es; \
movq REG_OFF(KDIREG_DS)(%rdi), %rax; \
movw %ax, %ds; \
+ movq REG_OFF(KDIREG_CR2)(base), %rax; \
+ movq %rax, %cr2; \
movq REG_OFF(KDIREG_R15)(%rdi), %r15; \
movq REG_OFF(KDIREG_R14)(%rdi), %r14; \
movq REG_OFF(KDIREG_R13)(%rdi), %r13; \
movq REG_OFF(KDIREG_R12)(%rdi), %r12; \
movq REG_OFF(KDIREG_R11)(%rdi), %r11; \
*** 160,177 ****
movq REG_OFF(KDIREG_RDI)(%rdi), %rdi
/*
* Given the address of the current CPU's cpusave area in %rax, the following
* macro restores the debugging state to said CPU. Restored state includes
! * the debug registers from the global %dr variables, and debugging MSRs from
! * the CPU save area. This code would be in a separate routine, but for the
! * fact that some of the MSRs are jump-sensitive. As such, we need to minimize
! * the number of jumps taken subsequent to the update of said MSRs. We can
! * remove one jump (the ret) by using a macro instead of a function for the
! * debugging state restoration code.
*
! * Takes the cpusave area in %rdi as a parameter, clobbers %rax-%rdx
*/
#define KDI_RESTORE_DEBUGGING_STATE \
pushq %rdi; \
leaq kdi_drreg(%rip), %r15; \
movl $7, %edi; \
--- 171,183 ----
movq REG_OFF(KDIREG_RDI)(%rdi), %rdi
/*
* Given the address of the current CPU's cpusave area in %rax, the following
* macro restores the debugging state to said CPU. Restored state includes
! * the debug registers from the global %dr variables.
*
! * Takes the cpusave area in %rdi as a parameter.
*/
#define KDI_RESTORE_DEBUGGING_STATE \
pushq %rdi; \
leaq kdi_drreg(%rip), %r15; \
movl $7, %edi; \
*** 192,245 ****
movq DRADDR_OFF(2)(%r15), %rsi; \
call kdi_dreg_set; \
movl $3, %edi; \
movq DRADDR_OFF(3)(%r15), %rsi; \
call kdi_dreg_set; \
! popq %rdi; \
! \
! /* \
! * Write any requested MSRs. \
! */ \
! movq KRS_MSR(%rdi), %rbx; \
! cmpq $0, %rbx; \
! je 3f; \
! 1: \
! movl MSR_NUM(%rbx), %ecx; \
! cmpl $0, %ecx; \
! je 3f; \
! \
! movl MSR_TYPE(%rbx), %edx; \
! cmpl $KDI_MSR_WRITE, %edx; \
! jne 2f; \
! \
! movq MSR_VALP(%rbx), %rdx; \
! movl 0(%rdx), %eax; \
! movl 4(%rdx), %edx; \
! wrmsr; \
! 2: \
! addq $MSR_SIZE, %rbx; \
! jmp 1b; \
! 3: \
! /* \
! * We must not branch after re-enabling LBR. If \
! * kdi_wsr_wrexit_msr is set, it contains the number \
! * of the MSR that controls LBR. kdi_wsr_wrexit_valp \
! * contains the value that is to be written to enable \
! * LBR. \
! */ \
! leaq kdi_msr_wrexit_msr(%rip), %rcx; \
! movl (%rcx), %ecx; \
! cmpl $0, %ecx; \
! je 1f; \
! \
! leaq kdi_msr_wrexit_valp(%rip), %rdx; \
! movq (%rdx), %rdx; \
! movl 0(%rdx), %eax; \
! movl 4(%rdx), %edx; \
! \
! wrmsr; \
! 1:
/*
* Each cpusave buffer has an area set aside for a ring buffer of breadcrumbs.
* The following macros manage the buffer.
*/
--- 198,208 ----
movq DRADDR_OFF(2)(%r15), %rsi; \
call kdi_dreg_set; \
movl $3, %edi; \
movq DRADDR_OFF(3)(%r15), %rsi; \
call kdi_dreg_set; \
! popq %rdi;
/*
* Each cpusave buffer has an area set aside for a ring buffer of breadcrumbs.
* The following macros manage the buffer.
*/
*** 268,286 ****
/* Set a value in the current breadcrumb buffer */
#define ADD_CRUMB(cpusave, offset, value, tmp) \
movq KRS_CURCRUMB(cpusave), tmp; \
movq value, offset(tmp)
- #endif /* _ASM */
-
- #if defined(__lint)
- void
- kdi_cmnint(void)
- {
- }
- #else /* __lint */
-
/* XXX implement me */
ENTRY_NP(kdi_nmiint)
clrq %rcx
movq (%rcx), %rcx
SET_SIZE(kdi_nmiint)
--- 231,240 ----
*** 326,335 ****
--- 280,313 ----
movq %rax, %rdx
shrq $32, %rdx
movl $MSR_AMD_GSBASE, %ecx
wrmsr
+
+ /*
+ * In the trampoline we stashed the incoming %cr3. Copy this into
+ * the kdiregs for restoration and later use.
+ */
+ mov %gs:(CPU_KPTI_DBG+KPTI_TR_CR3), %rdx
+ mov %rdx, REG_OFF(KDIREG_CR3)(%rsp)
+ /*
+ * Switch to the kernel's %cr3. From the early interrupt handler
+ * until now we've been running on the "paranoid" %cr3 (that of kas
+ * from early in boot).
+ *
+ * If we took the interrupt from somewhere already on the kas/paranoid
+ * %cr3 though, don't change it (this could happen if kcr3 is corrupt
+ * and we took a gptrap earlier from this very code).
+ */
+ cmpq %rdx, kpti_safe_cr3
+ je .no_kcr3
+ mov %gs:CPU_KPTI_KCR3, %rdx
+ cmpq $0, %rdx
+ je .no_kcr3
+ mov %rdx, %cr3
+ .no_kcr3:
+
#endif /* __xpv */
GET_CPUSAVE_ADDR /* %rax = cpusave, %rbx = CPU ID */
ADVANCE_CRUMB_POINTER(%rax, %rcx, %rdx)
*** 349,365 ****
* Were we in the debugger when we took the trap (i.e. was %esp in one
* of the debugger's memory ranges)?
*/
leaq kdi_memranges, %rcx
movl kdi_nmemranges, %edx
! 1: cmpq MR_BASE(%rcx), %rsp
jl 2f /* below this range -- try the next one */
cmpq MR_LIM(%rcx), %rsp
jg 2f /* above this range -- try the next one */
jmp 3f /* matched within this range */
! 2: decl %edx
jz kdi_save_common_state /* %rsp not within debugger memory */
addq $MR_SIZE, %rcx
jmp 1b
3: /*
--- 327,345 ----
* Were we in the debugger when we took the trap (i.e. was %esp in one
* of the debugger's memory ranges)?
*/
leaq kdi_memranges, %rcx
movl kdi_nmemranges, %edx
! 1:
! cmpq MR_BASE(%rcx), %rsp
jl 2f /* below this range -- try the next one */
cmpq MR_LIM(%rcx), %rsp
jg 2f /* above this range -- try the next one */
jmp 3f /* matched within this range */
! 2:
! decl %edx
jz kdi_save_common_state /* %rsp not within debugger memory */
addq $MR_SIZE, %rcx
jmp 1b
3: /*
*** 385,396 ****
jmp kdi_save_common_state
SET_SIZE(kdi_master_entry)
SET_SIZE(kdi_cmnint)
- #endif /* __lint */
-
/*
* The cross-call handler for slave CPUs.
*
* The debugger is single-threaded, so only one CPU, called the master, may be
* running it at any given time. The other CPUs, known as slaves, spin in a
--- 365,374 ----
*** 397,422 ****
* busy loop until there's something for them to do. This is the entry point
* for the slaves - they'll be sent here in response to a cross-call sent by the
* master.
*/
- #if defined(__lint)
- char kdi_slave_entry_patch;
-
- void
- kdi_slave_entry(void)
- {
- }
- #else /* __lint */
- .globl kdi_slave_entry_patch;
-
ENTRY_NP(kdi_slave_entry)
- /* kdi_msr_add_clrentry knows where this is */
- kdi_slave_entry_patch:
- KDI_MSR_PATCH;
-
/*
* Cross calls are implemented as function calls, so our stack currently
* looks like one you'd get from a zero-argument function call. That
* is, there's the return %rip at %rsp, and that's about it. We need
* to make it look like an interrupt stack. When we first save, we'll
--- 375,386 ----
*** 436,445 ****
--- 400,412 ----
pushq $-1 /* phony trap number */
subq $REG_OFF(KDIREG_TRAPNO), %rsp
KDI_SAVE_REGS(%rsp)
+ movq %cr3, %rax
+ movq %rax, REG_OFF(KDIREG_CR3)(%rsp)
+
movq REG_OFF(KDIREG_SS)(%rsp), %rax
xchgq REG_OFF(KDIREG_RIP)(%rsp), %rax
movq %rax, REG_OFF(KDIREG_SS)(%rsp)
movq REG_OFF(KDIREG_RSP)(%rsp), %rax
*** 463,474 ****
pushq %rax
jmp kdi_save_common_state
SET_SIZE(kdi_slave_entry)
- #endif /* __lint */
-
/*
* The state of the world:
*
* The stack has a complete set of saved registers and segment
* selectors, arranged in the kdi_regs.h order. It also has a pointer
--- 430,439 ----
*** 478,489 ****
* registers. First we check whether we should jump straight back to
* the kernel. If not, we save a few more registers, ready the
* machine for debugger entry, and enter the debugger.
*/
- #if !defined(__lint)
-
ENTRY_NP(kdi_save_common_state)
popq %rdi /* the cpusave area */
movq %rsp, KRS_GREGS(%rdi) /* save ptr to current saved regs */
--- 443,452 ----
*** 535,575 ****
call kdi_dreg_get
movq %rax, KRS_DROFF(3)(%r15)
movq %r15, %rax /* restore cpu save area to rax */
- /*
- * Save any requested MSRs.
- */
- movq KRS_MSR(%rax), %rcx
- cmpq $0, %rcx
- je no_msr
-
- pushq %rax /* rdmsr clobbers %eax */
- movq %rcx, %rbx
-
- 1:
- movl MSR_NUM(%rbx), %ecx
- cmpl $0, %ecx
- je msr_done
-
- movl MSR_TYPE(%rbx), %edx
- cmpl $KDI_MSR_READ, %edx
- jne msr_next
-
- rdmsr /* addr in %ecx, value into %edx:%eax */
- movl %eax, MSR_VAL(%rbx)
- movl %edx, _CONST(MSR_VAL + 4)(%rbx)
-
- msr_next:
- addq $MSR_SIZE, %rbx
- jmp 1b
-
- msr_done:
- popq %rax
-
- no_msr:
clrq %rbp /* stack traces should end here */
pushq %rax
movq %rax, %rdi /* cpusave */
--- 498,507 ----
*** 580,602 ****
jmp kdi_resume
SET_SIZE(kdi_save_common_state)
- #endif /* !__lint */
-
/*
* Resume the world. The code that calls kdi_resume has already
* decided whether or not to restore the IDT.
*/
- #if defined(__lint)
- void
- kdi_resume(void)
- {
- }
- #else /* __lint */
-
/* cpusave in %rdi */
ENTRY_NP(kdi_resume)
/*
* Send this CPU back into the world
--- 512,525 ----
*** 607,626 ****
#endif
KDI_RESTORE_DEBUGGING_STATE
movq KRS_GREGS(%rdi), %rsp
KDI_RESTORE_REGS(%rsp)
addq $REG_OFF(KDIREG_RIP), %rsp /* Discard state, trapno, err */
IRET
/*NOTREACHED*/
SET_SIZE(kdi_resume)
- #endif /* __lint */
-
- #if !defined(__lint)
-
ENTRY_NP(kdi_pass_to_kernel)
popq %rdi /* cpusave */
movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi)
--- 530,576 ----
#endif
KDI_RESTORE_DEBUGGING_STATE
movq KRS_GREGS(%rdi), %rsp
+
+ #if !defined(__xpv)
+ /*
+ * If we're going back via tr_iret_kdi, then we want to copy the
+ * final %cr3 we're going to back into the kpti_dbg area now.
+ *
+ * Since the trampoline needs to find the kpti_dbg too, we enter it
+ * with %r13 set to point at that. The real %r13 (to restore before
+ * the iret) we stash in the kpti_dbg itself.
+ */
+ movq %gs:CPU_SELF, %r13 /* can't leaq %gs:*, use self-ptr */
+ addq $CPU_KPTI_DBG, %r13
+
+ movq REG_OFF(KDIREG_R13)(%rsp), %rdx
+ movq %rdx, KPTI_R13(%r13)
+
+ movq REG_OFF(KDIREG_CR3)(%rsp), %rdx
+ movq %rdx, KPTI_TR_CR3(%r13)
+
+ /* The trampoline will undo this later. */
+ movq %r13, REG_OFF(KDIREG_R13)(%rsp)
+ #endif
+
KDI_RESTORE_REGS(%rsp)
addq $REG_OFF(KDIREG_RIP), %rsp /* Discard state, trapno, err */
+ /*
+ * The common trampoline code will restore %cr3 to the right value
+ * for either kernel or userland.
+ */
+ #if !defined(__xpv)
+ jmp tr_iret_kdi
+ #else
IRET
+ #endif
/*NOTREACHED*/
SET_SIZE(kdi_resume)
ENTRY_NP(kdi_pass_to_kernel)
popq %rdi /* cpusave */
movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi)
*** 687,706 ****
#endif
/*NOTREACHED*/
SET_SIZE(kdi_reboot)
- #endif /* !__lint */
-
- #if defined(__lint)
- /*ARGSUSED*/
- void
- kdi_cpu_debug_init(kdi_cpusave_t *save)
- {
- }
- #else /* __lint */
-
ENTRY_NP(kdi_cpu_debug_init)
pushq %rbp
movq %rsp, %rbp
pushq %rbx /* macro will clobber %rbx */
--- 637,646 ----
*** 707,715 ****
KDI_RESTORE_DEBUGGING_STATE
popq %rbx
leave
ret
-
SET_SIZE(kdi_cpu_debug_init)
- #endif /* !__lint */
--- 647,680 ----
KDI_RESTORE_DEBUGGING_STATE
popq %rbx
leave
ret
SET_SIZE(kdi_cpu_debug_init)
+ #define GETDREG(name, r) \
+ ENTRY_NP(name); \
+ movq r, %rax; \
+ ret; \
+ SET_SIZE(name)
+
+ #define SETDREG(name, r) \
+ ENTRY_NP(name); \
+ movq %rdi, r; \
+ ret; \
+ SET_SIZE(name)
+
+ GETDREG(kdi_getdr0, %dr0)
+ GETDREG(kdi_getdr1, %dr1)
+ GETDREG(kdi_getdr2, %dr2)
+ GETDREG(kdi_getdr3, %dr3)
+ GETDREG(kdi_getdr6, %dr6)
+ GETDREG(kdi_getdr7, %dr7)
+
+ SETDREG(kdi_setdr0, %dr0)
+ SETDREG(kdi_setdr1, %dr1)
+ SETDREG(kdi_setdr2, %dr2)
+ SETDREG(kdi_setdr3, %dr3)
+ SETDREG(kdi_setdr6, %dr6)
+ SETDREG(kdi_setdr7, %dr7)
+
+ #endif /* !__lint */