Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
        
*** 30,40 ****
  /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
  /*              All Rights Reserved                             */
  /*                                                              */
  
  /*
!  * Copyright 2017 Joyent, Inc.
   */
  
  #include <sys/types.h>
  #include <sys/sysmacros.h>
  #include <sys/param.h>
--- 30,40 ----
  /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
  /*              All Rights Reserved                             */
  /*                                                              */
  
  /*
!  * Copyright 2018 Joyent, Inc.
   */
  
  #include <sys/types.h>
  #include <sys/sysmacros.h>
  #include <sys/param.h>
*** 478,488 ****
          int mstate;
          int sicode = 0;
          int watchcode;
          int watchpage;
          caddr_t vaddr;
-         int singlestep_twiddle;
          size_t sz;
          int ta;
  #ifdef __amd64
          uchar_t instr;
  #endif
--- 478,487 ----
*** 1089,1150 ****
                  (void) die(type, rp, addr, cpuid);
                  break;
  
          case T_SGLSTP: /* single step/hw breakpoint exception */
  
!                 /* Now evaluate how we got here */
!                 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
                          /*
!                          * i386 single-steps even through lcalls which
!                          * change the privilege level. So we take a trap at
!                          * the first instruction in privileged mode.
!                          *
!                          * Set a flag to indicate that upon completion of
!                          * the system call, deal with the single-step trap.
!                          *
!                          * The same thing happens for sysenter, too.
                           */
!                         singlestep_twiddle = 0;
!                         if (rp->r_pc == (uintptr_t)sys_sysenter ||
!                             rp->r_pc == (uintptr_t)brand_sys_sysenter) {
!                                 singlestep_twiddle = 1;
! #if defined(__amd64)
!                                 /*
!                                  * Since we are already on the kernel's
!                                  * %gs, on 64-bit systems the sysenter case
!                                  * needs to adjust the pc to avoid
!                                  * executing the swapgs instruction at the
!                                  * top of the handler.
!                                  */
!                                 if (rp->r_pc == (uintptr_t)sys_sysenter)
!                                         rp->r_pc = (uintptr_t)
!                                             _sys_sysenter_post_swapgs;
!                                 else
!                                         rp->r_pc = (uintptr_t)
!                                             _brand_sys_sysenter_post_swapgs;
! #endif
!                         }
! #if defined(__i386)
!                         else if (rp->r_pc == (uintptr_t)sys_call ||
!                             rp->r_pc == (uintptr_t)brand_sys_call) {
!                                 singlestep_twiddle = 1;
!                         }
! #endif
!                         else {
!                                 /* not on sysenter/syscall; uregs available */
!                                 if (tudebug && tudebugbpt)
!                                         showregs(type, rp, (caddr_t)0);
!                         }
!                         if (singlestep_twiddle) {
                                  rp->r_ps &= ~PS_T; /* turn off trace */
                                  lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
                                  ct->t_post_sys = 1;
                                  aston(curthread);
                                  goto cleanup;
                          }
                  }
!                 /* XXX - needs review on debugger interface? */
                  if (boothowto & RB_DEBUG)
                          debug_enter((char *)NULL);
                  else
                          (void) die(type, rp, addr, cpuid);
                  break;
--- 1088,1126 ----
                  (void) die(type, rp, addr, cpuid);
                  break;
  
          case T_SGLSTP: /* single step/hw breakpoint exception */
  
! #if !defined(__xpv)
                  /*
!                  * We'd never normally get here, as kmdb handles its own single
!                  * step traps.  There is one nasty exception though, as
!                  * described in more detail in sys_sysenter().  Note that
!                  * checking for all four locations covers both the KPTI and the
!                  * non-KPTI cases correctly: the former will never be found at
!                  * (brand_)sys_sysenter, and vice versa.
                   */
!                 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
!                         if (rp->r_pc == (greg_t)brand_sys_sysenter ||
!                             rp->r_pc == (greg_t)sys_sysenter ||
!                             rp->r_pc == (greg_t)tr_brand_sys_sysenter ||
!                             rp->r_pc == (greg_t)tr_sys_sysenter) {
! 
!                                 rp->r_pc += 0x3; /* sizeof (swapgs) */
! 
                                  rp->r_ps &= ~PS_T; /* turn off trace */
                                  lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
                                  ct->t_post_sys = 1;
                                  aston(curthread);
                                  goto cleanup;
+                         } else {
+                                 if (tudebug && tudebugbpt)
+                                         showregs(type, rp, (caddr_t)0);
                          }
                  }
! #endif /* !__xpv */
! 
                  if (boothowto & RB_DEBUG)
                          debug_enter((char *)NULL);
                  else
                          (void) die(type, rp, addr, cpuid);
                  break;
*** 1736,1748 ****
  #else
          printf("cr0: %b cr4: %b\n",
              (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
  #endif  /* __lint */
  
!         printf("cr2: %lx", getcr2());
  #if !defined(__xpv)
!         printf("cr3: %lx", getcr3());
  #if defined(__amd64)
          printf("cr8: %lx\n", getcr8());
  #endif
  #endif
          printf("\n");
--- 1712,1724 ----
  #else
          printf("cr0: %b  cr4: %b\n",
              (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
  #endif  /* __lint */
  
!         printf("cr2: %lx  ", getcr2());
  #if !defined(__xpv)
!         printf("cr3: %lx  ", getcr3());
  #if defined(__amd64)
          printf("cr8: %lx\n", getcr8());
  #endif
  #endif
          printf("\n");
*** 1844,1854 ****
  }
  
  #endif  /* __i386 */
  
  /*
!  * Test to see if the instruction is part of _sys_rtt.
   *
   * Again on the hypervisor if we try to IRET to user land with a bad code
   * or stack selector we will get vectored through xen_failsafe_callback.
   * In which case we assume we got here via _sys_rtt since we only allow
   * IRET to user land to take place in _sys_rtt.
--- 1820,1831 ----
  }
  
  #endif  /* __i386 */
  
  /*
!  * Test to see if the instruction is part of _sys_rtt (or the KPTI trampolines
!  * which are used by _sys_rtt).
   *
   * Again on the hypervisor if we try to IRET to user land with a bad code
   * or stack selector we will get vectored through xen_failsafe_callback.
   * In which case we assume we got here via _sys_rtt since we only allow
   * IRET to user land to take place in _sys_rtt.
*** 1856,1865 ****
--- 1833,1855 ----
  static int
  instr_is_sys_rtt(caddr_t pc)
  {
          extern void _sys_rtt(), _sys_rtt_end();
  
+ #if !defined(__xpv)
+         extern void tr_sysc_ret_start(), tr_sysc_ret_end();
+         extern void tr_intr_ret_start(), tr_intr_ret_end();
+ 
+         if ((uintptr_t)pc >= (uintptr_t)tr_sysc_ret_start &&
+             (uintptr_t)pc <= (uintptr_t)tr_sysc_ret_end)
+                 return (1);
+ 
+         if ((uintptr_t)pc >= (uintptr_t)tr_intr_ret_start &&
+             (uintptr_t)pc <= (uintptr_t)tr_intr_ret_end)
+                 return (1);
+ #endif
+ 
          if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
              (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
                  return (0);
  
          return (1);