Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/trap.c
          +++ new/usr/src/uts/i86pc/os/trap.c
↓ open down ↓ 24 lines elided ↑ open up ↑
  25   25  
  26   26  /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
  28   28  /*              All Rights Reserved                             */
  29   29  /*                                                              */
  30   30  /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
  31   31  /*              All Rights Reserved                             */
  32   32  /*                                                              */
  33   33  
  34   34  /*
  35      - * Copyright 2017 Joyent, Inc.
       35 + * Copyright 2018 Joyent, Inc.
  36   36   */
  37   37  
  38   38  #include <sys/types.h>
  39   39  #include <sys/sysmacros.h>
  40   40  #include <sys/param.h>
  41   41  #include <sys/signal.h>
  42   42  #include <sys/systm.h>
  43   43  #include <sys/user.h>
  44   44  #include <sys/proc.h>
  45   45  #include <sys/disp.h>
↓ open down ↓ 427 lines elided ↑ open up ↑
 473  473          label_t *onfault;
 474  474          faultcode_t pagefault(), res, errcode;
 475  475          enum fault_type fault_type;
 476  476          k_siginfo_t siginfo;
 477  477          uint_t fault = 0;
 478  478          int mstate;
 479  479          int sicode = 0;
 480  480          int watchcode;
 481  481          int watchpage;
 482  482          caddr_t vaddr;
 483      -        int singlestep_twiddle;
 484  483          size_t sz;
 485  484          int ta;
 486  485  #ifdef __amd64
 487  486          uchar_t instr;
 488  487  #endif
 489  488  
 490  489          ASSERT_STACK_ALIGNED();
 491  490  
 492  491          type = rp->r_trapno;
 493  492          CPU_STATS_ADDQ(CPU, sys, trap, 1);
↓ open down ↓ 590 lines elided ↑ open up ↑
1084 1083                   * fatal.
1085 1084                   */
1086 1085                  if (tudebug && tudebugbpt)
1087 1086                          showregs(type, rp, (caddr_t)0);
1088 1087  
1089 1088                  (void) die(type, rp, addr, cpuid);
1090 1089                  break;
1091 1090  
1092 1091          case T_SGLSTP: /* single step/hw breakpoint exception */
1093 1092  
1094      -                /* Now evaluate how we got here */
     1093 +#if !defined(__xpv)
     1094 +                /*
     1095 +                 * We'd never normally get here, as kmdb handles its own single
     1096 +                 * step traps.  There is one nasty exception though, as
     1097 +                 * described in more detail in sys_sysenter().  Note that
     1098 +                 * checking for all four locations covers both the KPTI and the
     1099 +                 * non-KPTI cases correctly: the former will never be found at
     1100 +                 * (brand_)sys_sysenter, and vice versa.
     1101 +                 */
1095 1102                  if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
1096      -                        /*
1097      -                         * i386 single-steps even through lcalls which
1098      -                         * change the privilege level. So we take a trap at
1099      -                         * the first instruction in privileged mode.
1100      -                         *
1101      -                         * Set a flag to indicate that upon completion of
1102      -                         * the system call, deal with the single-step trap.
1103      -                         *
1104      -                         * The same thing happens for sysenter, too.
1105      -                         */
1106      -                        singlestep_twiddle = 0;
1107      -                        if (rp->r_pc == (uintptr_t)sys_sysenter ||
1108      -                            rp->r_pc == (uintptr_t)brand_sys_sysenter) {
1109      -                                singlestep_twiddle = 1;
1110      -#if defined(__amd64)
1111      -                                /*
1112      -                                 * Since we are already on the kernel's
1113      -                                 * %gs, on 64-bit systems the sysenter case
1114      -                                 * needs to adjust the pc to avoid
1115      -                                 * executing the swapgs instruction at the
1116      -                                 * top of the handler.
1117      -                                 */
1118      -                                if (rp->r_pc == (uintptr_t)sys_sysenter)
1119      -                                        rp->r_pc = (uintptr_t)
1120      -                                            _sys_sysenter_post_swapgs;
1121      -                                else
1122      -                                        rp->r_pc = (uintptr_t)
1123      -                                            _brand_sys_sysenter_post_swapgs;
1124      -#endif
1125      -                        }
1126      -#if defined(__i386)
1127      -                        else if (rp->r_pc == (uintptr_t)sys_call ||
1128      -                            rp->r_pc == (uintptr_t)brand_sys_call) {
1129      -                                singlestep_twiddle = 1;
1130      -                        }
1131      -#endif
1132      -                        else {
1133      -                                /* not on sysenter/syscall; uregs available */
1134      -                                if (tudebug && tudebugbpt)
1135      -                                        showregs(type, rp, (caddr_t)0);
1136      -                        }
1137      -                        if (singlestep_twiddle) {
     1103 +                        if (rp->r_pc == (greg_t)brand_sys_sysenter ||
     1104 +                            rp->r_pc == (greg_t)sys_sysenter ||
     1105 +                            rp->r_pc == (greg_t)tr_brand_sys_sysenter ||
     1106 +                            rp->r_pc == (greg_t)tr_sys_sysenter) {
     1107 +
     1108 +                                rp->r_pc += 0x3; /* sizeof (swapgs) */
     1109 +
1138 1110                                  rp->r_ps &= ~PS_T; /* turn off trace */
1139 1111                                  lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
1140 1112                                  ct->t_post_sys = 1;
1141 1113                                  aston(curthread);
1142 1114                                  goto cleanup;
     1115 +                        } else {
     1116 +                                if (tudebug && tudebugbpt)
     1117 +                                        showregs(type, rp, (caddr_t)0);
1143 1118                          }
1144 1119                  }
1145      -                /* XXX - needs review on debugger interface? */
     1120 +#endif /* !__xpv */
     1121 +
1146 1122                  if (boothowto & RB_DEBUG)
1147 1123                          debug_enter((char *)NULL);
1148 1124                  else
1149 1125                          (void) die(type, rp, addr, cpuid);
1150 1126                  break;
1151 1127  
1152 1128          case T_NMIFLT:  /* NMI interrupt */
1153 1129                  printf("Unexpected NMI in system mode\n");
1154 1130                  goto cleanup;
1155 1131  
↓ open down ↓ 568 lines elided ↑ open up ↑
1724 1700  
1725 1701          printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n",
1726 1702              (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ?
1727 1703              ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps);
1728 1704  
1729 1705  #if defined(__lint)
1730 1706          /*
1731 1707           * this clause can be deleted when lint bug 4870403 is fixed
1732 1708           * (lint thinks that bit 32 is illegal in a %b format string)
1733 1709           */
1734      -        printf("cr0: %x cr4: %b\n",
     1710 +        printf("cr0: %x  cr4: %b\n",
1735 1711              (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4);
1736 1712  #else
1737      -        printf("cr0: %b cr4: %b\n",
     1713 +        printf("cr0: %b  cr4: %b\n",
1738 1714              (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
1739 1715  #endif  /* __lint */
1740 1716  
1741      -        printf("cr2: %lx", getcr2());
     1717 +        printf("cr2: %lx  ", getcr2());
1742 1718  #if !defined(__xpv)
1743      -        printf("cr3: %lx", getcr3());
     1719 +        printf("cr3: %lx  ", getcr3());
1744 1720  #if defined(__amd64)
1745 1721          printf("cr8: %lx\n", getcr8());
1746 1722  #endif
1747 1723  #endif
1748 1724          printf("\n");
1749 1725  
1750 1726          dumpregs(rp);
1751 1727          splx(s);
1752 1728  }
1753 1729  
↓ open down ↓ 85 lines elided ↑ open up ↑
1839 1815              bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 ||
1840 1816              bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0)
1841 1817                  return (1);
1842 1818  
1843 1819          return (0);
1844 1820  }
1845 1821  
1846 1822  #endif  /* __i386 */
1847 1823  
1848 1824  /*
1849      - * Test to see if the instruction is part of _sys_rtt.
     1825 + * Test to see if the instruction is part of _sys_rtt (or the KPTI trampolines
     1826 + * which are used by _sys_rtt).
1850 1827   *
1851 1828   * Again on the hypervisor if we try to IRET to user land with a bad code
1852 1829   * or stack selector we will get vectored through xen_failsafe_callback.
1853 1830   * In which case we assume we got here via _sys_rtt since we only allow
1854 1831   * IRET to user land to take place in _sys_rtt.
1855 1832   */
1856 1833  static int
1857 1834  instr_is_sys_rtt(caddr_t pc)
1858 1835  {
1859 1836          extern void _sys_rtt(), _sys_rtt_end();
1860 1837  
     1838 +#if !defined(__xpv)
     1839 +        extern void tr_sysc_ret_start(), tr_sysc_ret_end();
     1840 +        extern void tr_intr_ret_start(), tr_intr_ret_end();
     1841 +
     1842 +        if ((uintptr_t)pc >= (uintptr_t)tr_sysc_ret_start &&
     1843 +            (uintptr_t)pc <= (uintptr_t)tr_sysc_ret_end)
     1844 +                return (1);
     1845 +
     1846 +        if ((uintptr_t)pc >= (uintptr_t)tr_intr_ret_start &&
     1847 +            (uintptr_t)pc <= (uintptr_t)tr_intr_ret_end)
     1848 +                return (1);
     1849 +#endif
     1850 +
1861 1851          if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
1862 1852              (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
1863 1853                  return (0);
1864 1854  
1865 1855          return (1);
1866 1856  }
1867 1857  
1868 1858  /*
1869 1859   * Handle #gp faults in kernel mode.
1870 1860   *
↓ open down ↓ 458 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX