Print this page
9441 kmdb should stash %cr3 in kdiregs
Reviewed by: John Levon <john.levon@joyent.com>
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -20,21 +20,22 @@
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2018 Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
- * Debugger entry for both master and slave CPUs
+ * Debugger entry and exit for both master and slave CPUs. kdi_idthdl.s contains
+ * the IDT stubs that drop into here (mainly via kdi_cmnint).
*/
#if defined(__lint)
#include <sys/types.h>
-#endif
+#else
#include <sys/segments.h>
#include <sys/asm_linkage.h>
#include <sys/controlregs.h>
#include <sys/x86_archext.h>
@@ -44,13 +45,10 @@
#include <sys/psw.h>
#include <sys/uadmin.h>
#ifdef __xpv
#include <sys/hypervisor.h>
#endif
-
-#ifdef _ASM
-
#include <kdi_assym.h>
#include <assym.h>
/* clobbers %rdx, %rcx, returns addr in %rax, CPU ID in %rbx */
#define GET_CPUSAVE_ADDR \
@@ -78,10 +76,13 @@
movq %r11, KRS_GDT(%rax); \
1:
#ifdef __xpv
+/*
+ * Already on kernel gsbase via the hypervisor.
+ */
#define SAVE_GSBASE(reg) /* nothing */
#define RESTORE_GSBASE(reg) /* nothing */
#else
@@ -88,12 +89,20 @@
#define SAVE_GSBASE(base) \
movl $MSR_AMD_GSBASE, %ecx; \
rdmsr; \
shlq $32, %rdx; \
orq %rax, %rdx; \
- movq %rdx, REG_OFF(KDIREG_GSBASE)(base)
+ movq %rdx, REG_OFF(KDIREG_GSBASE)(base); \
+ movl $MSR_AMD_KGSBASE, %ecx; \
+ rdmsr; \
+ shlq $32, %rdx; \
+ orq %rax, %rdx; \
+ movq %rdx, REG_OFF(KDIREG_KGSBASE)(base)
+/*
+ * We shouldn't have stomped on KGSBASE, so don't try to restore it.
+ */
#define RESTORE_GSBASE(base) \
movq REG_OFF(KDIREG_GSBASE)(base), %rdx; \
movq %rdx, %rax; \
shrq $32, %rdx; \
movl $MSR_AMD_GSBASE, %ecx; \
@@ -100,13 +109,11 @@
wrmsr
#endif /* __xpv */
/*
- * %ss, %rsp, %rflags, %cs, %rip, %err, %trapno are already on the stack. Note
- * that on the hypervisor, we skip the save/restore of GSBASE: it's slow, and
- * unnecessary.
+ * %ss, %rsp, %rflags, %cs, %rip, %err, %trapno are already on the stack.
*/
#define KDI_SAVE_REGS(base) \
movq %rdi, REG_OFF(KDIREG_RDI)(base); \
movq %rsi, REG_OFF(KDIREG_RSI)(base); \
movq %rdx, REG_OFF(KDIREG_RDX)(base); \
@@ -123,10 +130,12 @@
movq %r14, REG_OFF(KDIREG_R14)(base); \
movq %r15, REG_OFF(KDIREG_R15)(base); \
movq %rbp, REG_OFF(KDIREG_SAVFP)(base); \
movq REG_OFF(KDIREG_RIP)(base), %rax; \
movq %rax, REG_OFF(KDIREG_SAVPC)(base); \
+ movq %cr2, %rax; \
+ movq %rax, REG_OFF(KDIREG_CR2)(base); \
clrq %rax; \
movw %ds, %ax; \
movq %rax, REG_OFF(KDIREG_DS)(base); \
movw %es, %ax; \
movq %rax, REG_OFF(KDIREG_ES)(base); \
@@ -141,10 +150,12 @@
RESTORE_GSBASE(%rdi); \
movq REG_OFF(KDIREG_ES)(%rdi), %rax; \
movw %ax, %es; \
movq REG_OFF(KDIREG_DS)(%rdi), %rax; \
movw %ax, %ds; \
+ movq REG_OFF(KDIREG_CR2)(base), %rax; \
+ movq %rax, %cr2; \
movq REG_OFF(KDIREG_R15)(%rdi), %r15; \
movq REG_OFF(KDIREG_R14)(%rdi), %r14; \
movq REG_OFF(KDIREG_R13)(%rdi), %r13; \
movq REG_OFF(KDIREG_R12)(%rdi), %r12; \
movq REG_OFF(KDIREG_R11)(%rdi), %r11; \
@@ -160,18 +171,13 @@
movq REG_OFF(KDIREG_RDI)(%rdi), %rdi
/*
* Given the address of the current CPU's cpusave area in %rax, the following
* macro restores the debugging state to said CPU. Restored state includes
- * the debug registers from the global %dr variables, and debugging MSRs from
- * the CPU save area. This code would be in a separate routine, but for the
- * fact that some of the MSRs are jump-sensitive. As such, we need to minimize
- * the number of jumps taken subsequent to the update of said MSRs. We can
- * remove one jump (the ret) by using a macro instead of a function for the
- * debugging state restoration code.
+ * the debug registers from the global %dr variables.
*
- * Takes the cpusave area in %rdi as a parameter, clobbers %rax-%rdx
+ * Takes the cpusave area in %rdi as a parameter.
*/
#define KDI_RESTORE_DEBUGGING_STATE \
pushq %rdi; \
leaq kdi_drreg(%rip), %r15; \
movl $7, %edi; \
@@ -192,54 +198,11 @@
movq DRADDR_OFF(2)(%r15), %rsi; \
call kdi_dreg_set; \
movl $3, %edi; \
movq DRADDR_OFF(3)(%r15), %rsi; \
call kdi_dreg_set; \
- popq %rdi; \
- \
- /* \
- * Write any requested MSRs. \
- */ \
- movq KRS_MSR(%rdi), %rbx; \
- cmpq $0, %rbx; \
- je 3f; \
-1: \
- movl MSR_NUM(%rbx), %ecx; \
- cmpl $0, %ecx; \
- je 3f; \
- \
- movl MSR_TYPE(%rbx), %edx; \
- cmpl $KDI_MSR_WRITE, %edx; \
- jne 2f; \
- \
- movq MSR_VALP(%rbx), %rdx; \
- movl 0(%rdx), %eax; \
- movl 4(%rdx), %edx; \
- wrmsr; \
-2: \
- addq $MSR_SIZE, %rbx; \
- jmp 1b; \
-3: \
- /* \
- * We must not branch after re-enabling LBR. If \
- * kdi_wsr_wrexit_msr is set, it contains the number \
- * of the MSR that controls LBR. kdi_wsr_wrexit_valp \
- * contains the value that is to be written to enable \
- * LBR. \
- */ \
- leaq kdi_msr_wrexit_msr(%rip), %rcx; \
- movl (%rcx), %ecx; \
- cmpl $0, %ecx; \
- je 1f; \
- \
- leaq kdi_msr_wrexit_valp(%rip), %rdx; \
- movq (%rdx), %rdx; \
- movl 0(%rdx), %eax; \
- movl 4(%rdx), %edx; \
- \
- wrmsr; \
-1:
+ popq %rdi;
/*
* Each cpusave buffer has an area set aside for a ring buffer of breadcrumbs.
* The following macros manage the buffer.
*/
@@ -268,19 +231,10 @@
/* Set a value in the current breadcrumb buffer */
#define ADD_CRUMB(cpusave, offset, value, tmp) \
movq KRS_CURCRUMB(cpusave), tmp; \
movq value, offset(tmp)
-#endif /* _ASM */
-
-#if defined(__lint)
-void
-kdi_cmnint(void)
-{
-}
-#else /* __lint */
-
/* XXX implement me */
ENTRY_NP(kdi_nmiint)
clrq %rcx
movq (%rcx), %rcx
SET_SIZE(kdi_nmiint)
@@ -326,10 +280,34 @@
movq %rax, %rdx
shrq $32, %rdx
movl $MSR_AMD_GSBASE, %ecx
wrmsr
+
+ /*
+ * In the trampoline we stashed the incoming %cr3. Copy this into
+ * the kdiregs for restoration and later use.
+ */
+ mov %gs:(CPU_KPTI_DBG+KPTI_TR_CR3), %rdx
+ mov %rdx, REG_OFF(KDIREG_CR3)(%rsp)
+ /*
+ * Switch to the kernel's %cr3. From the early interrupt handler
+ * until now we've been running on the "paranoid" %cr3 (that of kas
+ * from early in boot).
+ *
+ * If we took the interrupt from somewhere already on the kas/paranoid
+ * %cr3 though, don't change it (this could happen if kcr3 is corrupt
+ * and we took a gptrap earlier from this very code).
+ */
+ cmpq %rdx, kpti_safe_cr3
+ je .no_kcr3
+ mov %gs:CPU_KPTI_KCR3, %rdx
+ cmpq $0, %rdx
+ je .no_kcr3
+ mov %rdx, %cr3
+.no_kcr3:
+
#endif /* __xpv */
GET_CPUSAVE_ADDR /* %rax = cpusave, %rbx = CPU ID */
ADVANCE_CRUMB_POINTER(%rax, %rcx, %rdx)
@@ -349,17 +327,19 @@
* Were we in the debugger when we took the trap (i.e. was %esp in one
* of the debugger's memory ranges)?
*/
leaq kdi_memranges, %rcx
movl kdi_nmemranges, %edx
-1: cmpq MR_BASE(%rcx), %rsp
+1:
+ cmpq MR_BASE(%rcx), %rsp
jl 2f /* below this range -- try the next one */
cmpq MR_LIM(%rcx), %rsp
jg 2f /* above this range -- try the next one */
jmp 3f /* matched within this range */
-2: decl %edx
+2:
+ decl %edx
jz kdi_save_common_state /* %rsp not within debugger memory */
addq $MR_SIZE, %rcx
jmp 1b
3: /*
@@ -385,12 +365,10 @@
jmp kdi_save_common_state
SET_SIZE(kdi_master_entry)
SET_SIZE(kdi_cmnint)
-#endif /* __lint */
-
/*
* The cross-call handler for slave CPUs.
*
* The debugger is single-threaded, so only one CPU, called the master, may be
* running it at any given time. The other CPUs, known as slaves, spin in a
@@ -397,26 +375,12 @@
* busy loop until there's something for them to do. This is the entry point
* for the slaves - they'll be sent here in response to a cross-call sent by the
* master.
*/
-#if defined(__lint)
-char kdi_slave_entry_patch;
-
-void
-kdi_slave_entry(void)
-{
-}
-#else /* __lint */
- .globl kdi_slave_entry_patch;
-
ENTRY_NP(kdi_slave_entry)
- /* kdi_msr_add_clrentry knows where this is */
-kdi_slave_entry_patch:
- KDI_MSR_PATCH;
-
/*
* Cross calls are implemented as function calls, so our stack currently
* looks like one you'd get from a zero-argument function call. That
* is, there's the return %rip at %rsp, and that's about it. We need
* to make it look like an interrupt stack. When we first save, we'll
@@ -436,10 +400,13 @@
pushq $-1 /* phony trap number */
subq $REG_OFF(KDIREG_TRAPNO), %rsp
KDI_SAVE_REGS(%rsp)
+ movq %cr3, %rax
+ movq %rax, REG_OFF(KDIREG_CR3)(%rsp)
+
movq REG_OFF(KDIREG_SS)(%rsp), %rax
xchgq REG_OFF(KDIREG_RIP)(%rsp), %rax
movq %rax, REG_OFF(KDIREG_SS)(%rsp)
movq REG_OFF(KDIREG_RSP)(%rsp), %rax
@@ -463,12 +430,10 @@
pushq %rax
jmp kdi_save_common_state
SET_SIZE(kdi_slave_entry)
-#endif /* __lint */
-
/*
* The state of the world:
*
* The stack has a complete set of saved registers and segment
* selectors, arranged in the kdi_regs.h order. It also has a pointer
@@ -478,12 +443,10 @@
* registers. First we check whether we should jump straight back to
* the kernel. If not, we save a few more registers, ready the
* machine for debugger entry, and enter the debugger.
*/
-#if !defined(__lint)
-
ENTRY_NP(kdi_save_common_state)
popq %rdi /* the cpusave area */
movq %rsp, KRS_GREGS(%rdi) /* save ptr to current saved regs */
@@ -535,41 +498,10 @@
call kdi_dreg_get
movq %rax, KRS_DROFF(3)(%r15)
movq %r15, %rax /* restore cpu save area to rax */
- /*
- * Save any requested MSRs.
- */
- movq KRS_MSR(%rax), %rcx
- cmpq $0, %rcx
- je no_msr
-
- pushq %rax /* rdmsr clobbers %eax */
- movq %rcx, %rbx
-
-1:
- movl MSR_NUM(%rbx), %ecx
- cmpl $0, %ecx
- je msr_done
-
- movl MSR_TYPE(%rbx), %edx
- cmpl $KDI_MSR_READ, %edx
- jne msr_next
-
- rdmsr /* addr in %ecx, value into %edx:%eax */
- movl %eax, MSR_VAL(%rbx)
- movl %edx, _CONST(MSR_VAL + 4)(%rbx)
-
-msr_next:
- addq $MSR_SIZE, %rbx
- jmp 1b
-
-msr_done:
- popq %rax
-
-no_msr:
clrq %rbp /* stack traces should end here */
pushq %rax
movq %rax, %rdi /* cpusave */
@@ -580,23 +512,14 @@
jmp kdi_resume
SET_SIZE(kdi_save_common_state)
-#endif /* !__lint */
-
/*
* Resume the world. The code that calls kdi_resume has already
* decided whether or not to restore the IDT.
*/
-#if defined(__lint)
-void
-kdi_resume(void)
-{
-}
-#else /* __lint */
-
/* cpusave in %rdi */
ENTRY_NP(kdi_resume)
/*
* Send this CPU back into the world
@@ -607,20 +530,47 @@
#endif
KDI_RESTORE_DEBUGGING_STATE
movq KRS_GREGS(%rdi), %rsp
+
+#if !defined(__xpv)
+ /*
+ * If we're going back via tr_iret_kdi, then we want to copy the
+ * final %cr3 we're going to back into the kpti_dbg area now.
+ *
+ * Since the trampoline needs to find the kpti_dbg too, we enter it
+ * with %r13 set to point at that. The real %r13 (to restore before
+ * the iret) we stash in the kpti_dbg itself.
+ */
+ movq %gs:CPU_SELF, %r13 /* can't leaq %gs:*, use self-ptr */
+ addq $CPU_KPTI_DBG, %r13
+
+ movq REG_OFF(KDIREG_R13)(%rsp), %rdx
+ movq %rdx, KPTI_R13(%r13)
+
+ movq REG_OFF(KDIREG_CR3)(%rsp), %rdx
+ movq %rdx, KPTI_TR_CR3(%r13)
+
+ /* The trampoline will undo this later. */
+ movq %r13, REG_OFF(KDIREG_R13)(%rsp)
+#endif
+
KDI_RESTORE_REGS(%rsp)
addq $REG_OFF(KDIREG_RIP), %rsp /* Discard state, trapno, err */
+ /*
+ * The common trampoline code will restore %cr3 to the right value
+ * for either kernel or userland.
+ */
+#if !defined(__xpv)
+ jmp tr_iret_kdi
+#else
IRET
+#endif
/*NOTREACHED*/
SET_SIZE(kdi_resume)
-#endif /* __lint */
-
-#if !defined(__lint)
-
ENTRY_NP(kdi_pass_to_kernel)
popq %rdi /* cpusave */
movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi)
@@ -687,20 +637,10 @@
#endif
/*NOTREACHED*/
SET_SIZE(kdi_reboot)
-#endif /* !__lint */
-
-#if defined(__lint)
-/*ARGSUSED*/
-void
-kdi_cpu_debug_init(kdi_cpusave_t *save)
-{
-}
-#else /* __lint */
-
ENTRY_NP(kdi_cpu_debug_init)
pushq %rbp
movq %rsp, %rbp
pushq %rbx /* macro will clobber %rbx */
@@ -707,9 +647,34 @@
KDI_RESTORE_DEBUGGING_STATE
popq %rbx
leave
ret
-
SET_SIZE(kdi_cpu_debug_init)
-#endif /* !__lint */
+#define GETDREG(name, r) \
+ ENTRY_NP(name); \
+ movq r, %rax; \
+ ret; \
+ SET_SIZE(name)
+
+#define SETDREG(name, r) \
+ ENTRY_NP(name); \
+ movq %rdi, r; \
+ ret; \
+ SET_SIZE(name)
+
+ GETDREG(kdi_getdr0, %dr0)
+ GETDREG(kdi_getdr1, %dr1)
+ GETDREG(kdi_getdr2, %dr2)
+ GETDREG(kdi_getdr3, %dr3)
+ GETDREG(kdi_getdr6, %dr6)
+ GETDREG(kdi_getdr7, %dr7)
+
+ SETDREG(kdi_setdr0, %dr0)
+ SETDREG(kdi_setdr1, %dr1)
+ SETDREG(kdi_setdr2, %dr2)
+ SETDREG(kdi_setdr3, %dr3)
+ SETDREG(kdi_setdr6, %dr6)
+ SETDREG(kdi_setdr7, %dr7)
+
+#endif /* !__lint */