Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -30,11 +30,11 @@
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
/* */
/*
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
@@ -478,11 +478,10 @@
int mstate;
int sicode = 0;
int watchcode;
int watchpage;
caddr_t vaddr;
- int singlestep_twiddle;
size_t sz;
int ta;
#ifdef __amd64
uchar_t instr;
#endif
@@ -1089,62 +1088,39 @@
(void) die(type, rp, addr, cpuid);
break;
case T_SGLSTP: /* single step/hw breakpoint exception */
- /* Now evaluate how we got here */
- if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
+#if !defined(__xpv)
/*
- * i386 single-steps even through lcalls which
- * change the privilege level. So we take a trap at
- * the first instruction in privileged mode.
- *
- * Set a flag to indicate that upon completion of
- * the system call, deal with the single-step trap.
- *
- * The same thing happens for sysenter, too.
+ * We'd never normally get here, as kmdb handles its own single
+ * step traps. There is one nasty exception though, as
+ * described in more detail in sys_sysenter(). Note that
+ * checking for all four locations covers both the KPTI and the
+ * non-KPTI cases correctly: the former will never be found at
+ * (brand_)sys_sysenter, and vice versa.
*/
- singlestep_twiddle = 0;
- if (rp->r_pc == (uintptr_t)sys_sysenter ||
- rp->r_pc == (uintptr_t)brand_sys_sysenter) {
- singlestep_twiddle = 1;
-#if defined(__amd64)
- /*
- * Since we are already on the kernel's
- * %gs, on 64-bit systems the sysenter case
- * needs to adjust the pc to avoid
- * executing the swapgs instruction at the
- * top of the handler.
- */
- if (rp->r_pc == (uintptr_t)sys_sysenter)
- rp->r_pc = (uintptr_t)
- _sys_sysenter_post_swapgs;
- else
- rp->r_pc = (uintptr_t)
- _brand_sys_sysenter_post_swapgs;
-#endif
- }
-#if defined(__i386)
- else if (rp->r_pc == (uintptr_t)sys_call ||
- rp->r_pc == (uintptr_t)brand_sys_call) {
- singlestep_twiddle = 1;
- }
-#endif
- else {
- /* not on sysenter/syscall; uregs available */
- if (tudebug && tudebugbpt)
- showregs(type, rp, (caddr_t)0);
- }
- if (singlestep_twiddle) {
+ if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
+ if (rp->r_pc == (greg_t)brand_sys_sysenter ||
+ rp->r_pc == (greg_t)sys_sysenter ||
+ rp->r_pc == (greg_t)tr_brand_sys_sysenter ||
+ rp->r_pc == (greg_t)tr_sys_sysenter) {
+
+ rp->r_pc += 0x3; /* sizeof (swapgs) */
+
rp->r_ps &= ~PS_T; /* turn off trace */
lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
ct->t_post_sys = 1;
aston(curthread);
goto cleanup;
+ } else {
+ if (tudebug && tudebugbpt)
+ showregs(type, rp, (caddr_t)0);
}
}
- /* XXX - needs review on debugger interface? */
+#endif /* !__xpv */
+
if (boothowto & RB_DEBUG)
debug_enter((char *)NULL);
else
(void) die(type, rp, addr, cpuid);
break;
@@ -1736,13 +1712,13 @@
#else
printf("cr0: %b cr4: %b\n",
(uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
#endif /* __lint */
- printf("cr2: %lx", getcr2());
+ printf("cr2: %lx ", getcr2());
#if !defined(__xpv)
- printf("cr3: %lx", getcr3());
+ printf("cr3: %lx ", getcr3());
#if defined(__amd64)
printf("cr8: %lx\n", getcr8());
#endif
#endif
printf("\n");
@@ -1844,11 +1820,12 @@
}
#endif /* __i386 */
/*
- * Test to see if the instruction is part of _sys_rtt.
+ * Test to see if the instruction is part of _sys_rtt (or the KPTI trampolines
+ * which are used by _sys_rtt).
*
* Again on the hypervisor if we try to IRET to user land with a bad code
* or stack selector we will get vectored through xen_failsafe_callback.
* In which case we assume we got here via _sys_rtt since we only allow
* IRET to user land to take place in _sys_rtt.
@@ -1856,10 +1833,23 @@
static int
instr_is_sys_rtt(caddr_t pc)
{
extern void _sys_rtt(), _sys_rtt_end();
+#if !defined(__xpv)
+ extern void tr_sysc_ret_start(), tr_sysc_ret_end();
+ extern void tr_intr_ret_start(), tr_intr_ret_end();
+
+ if ((uintptr_t)pc >= (uintptr_t)tr_sysc_ret_start &&
+ (uintptr_t)pc <= (uintptr_t)tr_sysc_ret_end)
+ return (1);
+
+ if ((uintptr_t)pc >= (uintptr_t)tr_intr_ret_start &&
+ (uintptr_t)pc <= (uintptr_t)tr_intr_ret_end)
+ return (1);
+#endif
+
if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
(uintptr_t)pc > (uintptr_t)_sys_rtt_end)
return (0);
return (1);