Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/ia32/ml/exception.s
          +++ new/usr/src/uts/intel/ia32/ml/exception.s
   1    1  /*
   2    2   * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
   3    3   * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
   4      - * Copyright (c) 2017 Joyent, Inc.
        4 + * Copyright (c) 2018 Joyent, Inc.
   5    5   */
   6    6  
   7    7  /*
   8    8   * Copyright (c) 1989, 1990 William F. Jolitz.
   9    9   * Copyright (c) 1990 The Regents of the University of California.
  10   10   * All rights reserved.
  11   11   *
  12   12   * Redistribution and use in source and binary forms, with or without
  13   13   * modification, are permitted provided that the following conditions
  14   14   * are met:
↓ open down ↓ 59 lines elided ↑ open up ↑
  74   74   * Note that for all exceptions for amd64
  75   75   * %r11 and %rcx are on the stack. Just pop
  76   76   * them back into their appropriate registers and let
  77   77   * it get saved as is running native.
  78   78   */
  79   79  
  80   80  #if defined(__xpv) && defined(__amd64)
  81   81  
  82   82  #define NPTRAP_NOERR(trapno)    \
  83   83          pushq   $0;             \
  84      -        pushq   $trapno 
       84 +        pushq   $trapno
  85   85  
  86   86  #define TRAP_NOERR(trapno)      \
  87   87          XPV_TRAP_POP;           \
  88   88          NPTRAP_NOERR(trapno)
  89   89  
  90   90  /*
  91   91   * error code already pushed by hw
  92   92   * onto stack.
  93   93   */
  94   94  #define TRAP_ERR(trapno)        \
  95   95          XPV_TRAP_POP;           \
  96      -        pushq   $trapno 
       96 +        pushq   $trapno
  97   97  
  98   98  #else /* __xpv && __amd64 */
  99   99  
 100  100  #define TRAP_NOERR(trapno)      \
 101  101          push    $0;             \
 102      -        push    $trapno 
      102 +        push    $trapno
 103  103  
 104  104  #define NPTRAP_NOERR(trapno) TRAP_NOERR(trapno)
 105  105  
 106  106  /*
 107  107   * error code already pushed by hw
 108  108   * onto stack.
 109  109   */
 110  110  #define TRAP_ERR(trapno)        \
 111      -        push    $trapno 
      111 +        push    $trapno
 112  112  
 113  113  #endif  /* __xpv && __amd64 */
 114  114  
      115 +        /*
      116 +         * These are the stacks used on cpu0 for taking double faults,
      117 +         * NMIs and MCEs (the latter two only on amd64 where we have IST).
      118 +         *
      119 +         * We define them here instead of in a C file so that we can page-align
      120 +         * them (gcc won't do that in a .c file).
      121 +         */
      122 +        .data
      123 +        DGDEF3(dblfault_stack0, DEFAULTSTKSZ, MMU_PAGESIZE)
      124 +        .fill   DEFAULTSTKSZ, 1, 0
      125 +        DGDEF3(nmi_stack0, DEFAULTSTKSZ, MMU_PAGESIZE)
      126 +        .fill   DEFAULTSTKSZ, 1, 0
      127 +        DGDEF3(mce_stack0, DEFAULTSTKSZ, MMU_PAGESIZE)
      128 +        .fill   DEFAULTSTKSZ, 1, 0
 115  129  
 116  130          /*
 117  131           * #DE
 118  132           */
 119  133          ENTRY_NP(div0trap)
 120  134          TRAP_NOERR(T_ZERODIV)   /* $0 */
 121  135          jmp     cmntrap
 122  136          SET_SIZE(div0trap)
 123  137  
 124  138          /*
↓ open down ↓ 31 lines elided ↑ open up ↑
 156  170           *                      r_rip           <-- %rsp + 24
 157  171           *                      r_err           <-- %rsp + 16
 158  172           *                      r_trapno        <-- %rsp + 8
 159  173           * (low address)        %r11            <-- %rsp
 160  174           */
 161  175          leaq    sys_sysenter(%rip), %r11
 162  176          cmpq    %r11, 24(%rsp)  /* Compare to saved r_rip on the stack */
 163  177          je      1f
 164  178          leaq    brand_sys_sysenter(%rip), %r11
 165  179          cmpq    %r11, 24(%rsp)  /* Compare to saved r_rip on the stack */
      180 +        je      1f
      181 +        leaq    tr_sys_sysenter(%rip), %r11
      182 +        cmpq    %r11, 24(%rsp)
      183 +        je      1f
      184 +        leaq    tr_brand_sys_sysenter(%rip), %r11
      185 +        cmpq    %r11, 24(%rsp)
 166  186          jne     2f
 167  187  1:      SWAPGS
 168  188  2:      popq    %r11
 169  189  #endif  /* !__xpv */
 170  190  
 171  191          INTR_PUSH
 172  192  #if defined(__xpv)
 173  193          movl    $6, %edi
 174  194          call    kdi_dreg_get
 175  195          movq    %rax, %r15              /* %db6 -> %r15 */
↓ open down ↓ 31 lines elided ↑ open up ↑
 207  227  #if defined(__amd64)
 208  228  #if !defined(__xpv)
 209  229  
 210  230  /*
 211  231   * Macro to set the gsbase or kgsbase to the address of the struct cpu
 212  232   * for this processor.  If we came from userland, set kgsbase else
 213  233   * set gsbase.  We find the proper cpu struct by looping through
 214  234   * the cpu structs for all processors till we find a match for the gdt
 215  235   * of the trapping processor.  The stack is expected to be pointing at
 216  236   * the standard regs pushed by hardware on a trap (plus error code and trapno).
      237 + *
      238 + * It's ok for us to clobber gsbase here (and possibly end up with both gsbase
      239 + * and kgsbase set to the same value) because we're not going back the normal
      240 + * way out of here (via IRET). Where we're going, we don't need no user %gs.
 217  241   */
 218  242  #define SET_CPU_GSBASE                                                  \
 219  243          subq    $REGOFF_TRAPNO, %rsp;   /* save regs */                 \
 220  244          movq    %rax, REGOFF_RAX(%rsp);                                 \
 221  245          movq    %rbx, REGOFF_RBX(%rsp);                                 \
 222  246          movq    %rcx, REGOFF_RCX(%rsp);                                 \
 223  247          movq    %rdx, REGOFF_RDX(%rsp);                                 \
 224  248          movq    %rbp, REGOFF_RBP(%rsp);                                 \
 225  249          movq    %rsp, %rbp;                                             \
 226  250          subq    $16, %rsp;              /* space for gdt */             \
↓ open down ↓ 60 lines elided ↑ open up ↑
 287  311          TRACE_PTR(%r12, %rax, %eax, %rdx, $TT_TRAP)
 288  312          TRACE_REGS(%r12, %rsp, %rax, %rbx)
 289  313          TRACE_STAMP(%r12)
 290  314  
 291  315          movq    %rsp, %rbp
 292  316  
 293  317          movq    %rbp, %rdi
 294  318          call    av_dispatch_nmivect
 295  319  
 296  320          INTR_POP
 297      -        IRET
      321 +        jmp     tr_iret_auto
 298  322          /*NOTREACHED*/
 299  323          SET_SIZE(nmiint)
 300  324  
 301  325  #elif defined(__i386)
 302  326  
 303  327          /*
 304  328           * #NMI
 305  329           */
 306  330          ENTRY_NP(nmiint)
 307  331          TRAP_NOERR(T_NMIFLT)    /* $2 */
↓ open down ↓ 4 lines elided ↑ open up ↑
 312  336           */
 313  337          INTR_PUSH
 314  338          INTGATE_INIT_KERNEL_FLAGS
 315  339  
 316  340          TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP)
 317  341          TRACE_REGS(%edi, %esp, %ebx, %ecx)
 318  342          TRACE_STAMP(%edi)
 319  343  
 320  344          movl    %esp, %ebp
 321  345  
 322      -        pushl   %ebp    
 323      -        call    av_dispatch_nmivect     
      346 +        pushl   %ebp
      347 +        call    av_dispatch_nmivect
 324  348          addl    $4, %esp
 325  349  
 326  350          INTR_POP_USER
 327  351          IRET
 328  352          SET_SIZE(nmiint)
 329  353  
 330  354  #endif  /* __i386 */
 331  355  
 332  356          /*
 333  357           * #BP
↓ open down ↓ 92 lines elided ↑ open up ↑
 426  450          movq    40(%rsp), %rax          /* load calling RFLAGS */
 427  451          movq    %rax, 24(%rsp)          /* store calling RFLAGS */
 428  452          movq    48(%rsp), %rax          /* load calling RSP */
 429  453          subq    $8, %rax                /* make room for %rbp */
 430  454          movq    %rax, 32(%rsp)          /* store calling RSP */
 431  455          movq    56(%rsp), %rax          /* load calling SS */
 432  456          movq    %rax, 40(%rsp)          /* store calling SS */
 433  457          movq    32(%rsp), %rax          /* reload calling RSP */
 434  458          movq    %rbp, (%rax)            /* store %rbp there */
 435  459          popq    %rax                    /* pop off temp */
 436      -        IRET                            /* return from interrupt */
      460 +        jmp     tr_iret_kernel          /* return from interrupt */
 437  461          /*NOTREACHED*/
 438  462  
 439  463  ud_leave:
 440  464          /*
 441  465           * We must emulate a "leave", which is the same as a "movq %rbp, %rsp"
 442  466           * followed by a "popq %rbp".  This is quite a bit simpler on amd64
 443  467           * than it is on i386 -- we can exploit the fact that the %rsp is
 444  468           * explicitly saved to effect the pop without having to reshuffle
 445  469           * the other data pushed for the trap.
 446  470           */
 447  471          INTR_POP
 448  472          pushq   %rax                    /* push temp */
 449  473          movq    8(%rsp), %rax           /* load calling RIP */
 450  474          addq    $1, %rax                /* increment over trapping instr */
 451  475          movq    %rax, 8(%rsp)           /* store calling RIP */
 452  476          movq    (%rbp), %rax            /* get new %rbp */
 453  477          addq    $8, %rbp                /* adjust new %rsp */
 454  478          movq    %rbp, 32(%rsp)          /* store new %rsp */
 455  479          movq    %rax, %rbp              /* set new %rbp */
 456  480          popq    %rax                    /* pop off temp */
 457      -        IRET                            /* return from interrupt */
      481 +        jmp     tr_iret_kernel          /* return from interrupt */
 458  482          /*NOTREACHED*/
 459  483  
 460  484  ud_nop:
 461  485          /*
 462  486           * We must emulate a "nop".  This is obviously not hard:  we need only
 463  487           * advance the %rip by one.
 464  488           */
 465  489          INTR_POP
 466  490          incq    (%rsp)
 467      -        IRET
      491 +        jmp     tr_iret_kernel
 468  492          /*NOTREACHED*/
 469  493  
 470  494  ud_ret:
 471  495          INTR_POP
 472  496          pushq   %rax                    /* push temp */
 473  497          movq    32(%rsp), %rax          /* load %rsp */
 474  498          movq    (%rax), %rax            /* load calling RIP */
 475  499          movq    %rax, 8(%rsp)           /* store calling RIP */
 476  500          addq    $8, 32(%rsp)            /* adjust new %rsp */
 477  501          popq    %rax                    /* pop off temp */
 478      -        IRET                            /* return from interrupt */
      502 +        jmp     tr_iret_kernel          /* return from interrupt */
 479  503          /*NOTREACHED*/
 480  504  
 481  505  ud_trap:
 482  506          /*
 483  507           * We're going to let the kernel handle this as a normal #UD.  If,
 484  508           * however, we came through #BP and are spoofing #UD (in this case,
 485  509           * the stored error value will be non-zero), we need to de-spoof
 486  510           * the trap by incrementing %rip and pushing T_BPTFLT.
 487  511           */
 488  512          cmpq    $0, REGOFF_ERR(%rsp)
↓ open down ↓ 137 lines elided ↑ open up ↑
 626  650           */
 627  651  #if defined(__xpv)
 628  652  
 629  653          ENTRY_NP(ndptrap)
 630  654          /*
 631  655           * (On the hypervisor we must make a hypercall so we might as well
 632  656           * save everything and handle as in a normal trap.)
 633  657           */
 634  658          TRAP_NOERR(T_NOEXTFLT)  /* $7 */
 635  659          INTR_PUSH
 636      -        
      660 +
 637  661          /*
 638  662           * We want to do this quickly as every lwp using fp will take this
 639  663           * after a context switch -- we do the frequent path in ndptrap_frstor
 640  664           * below; for all other cases, we let the trap code handle it
 641  665           */
 642  666          LOADCPU(%rax)                   /* swapgs handled in hypervisor */
 643  667          cmpl    $0, fpu_exists(%rip)
 644  668          je      .handle_in_trap         /* let trap handle no fp case */
 645  669          movq    CPU_THREAD(%rax), %rbx  /* %rbx = curthread */
 646  670          movl    $FPU_EN, %eax
↓ open down ↓ 55 lines elided ↑ open up ↑
 702  726          pushq   %rax
 703  727          pushq   %rbx
 704  728          cmpw    $KCS_SEL, 24(%rsp)      /* did we come from kernel mode? */
 705  729          jne     1f
 706  730          LOADCPU(%rax)                   /* if yes, don't swapgs */
 707  731          jmp     2f
 708  732  1:
 709  733          SWAPGS                          /* if from user, need swapgs */
 710  734          LOADCPU(%rax)
 711  735          SWAPGS
 712      -2:      
      736 +2:
 713  737          /*
 714  738           * Xrstor needs to use edx as part of its flag.
 715  739           * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24
 716  740           * will not point to CS.
 717  741           */
 718  742          pushq   %rdx
 719  743          cmpl    $0, fpu_exists(%rip)
 720  744          je      .handle_in_trap         /* let trap handle no fp case */
 721  745          movq    CPU_THREAD(%rax), %rbx  /* %rbx = curthread */
 722  746          movl    $FPU_EN, %eax
↓ open down ↓ 19 lines elided ↑ open up ↑
 742  766           * kernel due to user fault.
 743  767           */
 744  768          ALTENTRY(ndptrap_frstor)
 745  769          movq (%rbx), %rbx               /* fpu_regs.kfpu_u.kfpu_XX pointer */
 746  770          .globl  _patch_xrstorq_rbx
 747  771  _patch_xrstorq_rbx:
 748  772          fxrstorq (%rbx)
 749  773          popq    %rdx
 750  774          popq    %rbx
 751  775          popq    %rax
 752      -        IRET
      776 +        jmp     tr_iret_auto
 753  777          /*NOTREACHED*/
 754  778  
 755  779  .handle_in_trap:
 756  780          popq    %rdx
 757  781          popq    %rbx
 758  782          popq    %rax
 759  783          TRAP_NOERR(T_NOEXTFLT)  /* $7 */
 760  784          jmp     cmninttrap
 761  785          SET_SIZE(ndptrap_frstor)
 762  786          SET_SIZE(ndptrap)
↓ open down ↓ 97 lines elided ↑ open up ↑
 860  884          je      1f
 861  885  
 862  886          movq    %rax, DTR_BASE(%rsp)
 863  887          movw    $_MUL(NIDT, GATE_DESC_SIZE), DTR_LIMIT(%rsp)
 864  888          lidt    (%rsp)
 865  889  
 866  890          movl    $1, nopanicdebug
 867  891  
 868  892  1:      addq    $DESCTBR_SIZE, %rsp
 869  893          popq    %rax
 870      -        
      894 +
 871  895          DFTRAP_PUSH
 872  896  
 873  897          /*
 874  898           * freeze trap trace.
 875  899           */
 876  900  #ifdef TRAPTRACE
 877  901          leaq    trap_trace_freeze(%rip), %r11
 878  902          incl    (%r11)
 879  903  #endif
 880  904  
↓ open down ↓ 239 lines elided ↑ open up ↑
1120 1144           * We now know that this is the invalid opcode trap.
1121 1145           */
1122 1146          popl    %eax
1123 1147          addl    $4, %esp        /* pop error code */
1124 1148          jmp     invoptrap
1125 1149          SET_SIZE(pentium_pftrap)
1126 1150  
1127 1151  #endif  /* !__amd64 */
1128 1152  
1129 1153          ENTRY_NP(resvtrap)
1130      -        TRAP_NOERR(15)          /* (reserved)  */
     1154 +        TRAP_NOERR(T_RESVTRAP)  /* (reserved)  */
1131 1155          jmp     cmntrap
1132 1156          SET_SIZE(resvtrap)
1133 1157  
1134 1158          /*
1135 1159           * #MF
1136 1160           */
1137 1161          ENTRY_NP(ndperr)
1138 1162          TRAP_NOERR(T_EXTERRFLT) /* $16 */
1139 1163          jmp     cmninttrap
1140 1164          SET_SIZE(ndperr)
↓ open down ↓ 59 lines elided ↑ open up ↑
1200 1224  
1201 1225          /*
1202 1226           * #XF
1203 1227           */
1204 1228          ENTRY_NP(xmtrap)
1205 1229          TRAP_NOERR(T_SIMDFPE)   /* $19 */
1206 1230          jmp     cmninttrap
1207 1231          SET_SIZE(xmtrap)
1208 1232  
1209 1233          ENTRY_NP(invaltrap)
1210      -        TRAP_NOERR(30)          /* very invalid */
     1234 +        TRAP_NOERR(T_INVALTRAP) /* very invalid */
1211 1235          jmp     cmntrap
1212 1236          SET_SIZE(invaltrap)
1213 1237  
1214      -        ENTRY_NP(invalint)
1215      -        TRAP_NOERR(31)          /* even more so */
1216      -        jmp     cmnint
1217      -        SET_SIZE(invalint)
1218      -
1219 1238          .globl  fasttable
1220 1239  
1221 1240  #if defined(__amd64)
1222 1241  
1223 1242          ENTRY_NP(fasttrap)
1224 1243          cmpl    $T_LASTFAST, %eax
1225 1244          ja      1f
1226 1245          orl     %eax, %eax      /* (zero extend top 32-bits) */
1227 1246          leaq    fasttable(%rip), %r11
1228 1247          leaq    (%r11, %rax, CLONGSIZE), %r11
↓ open down ↓ 50 lines elided ↑ open up ↑
1279 1298  
1280 1299  #if defined(__amd64)
1281 1300  
1282 1301          /*
1283 1302           * RFLAGS 24 bytes up the stack from %rsp.
1284 1303           * XXX a constant would be nicer.
1285 1304           */
1286 1305          ENTRY_NP(fast_null)
1287 1306          XPV_TRAP_POP
1288 1307          orq     $PS_C, 24(%rsp) /* set carry bit in user flags */
1289      -        IRET
     1308 +        jmp     tr_iret_auto
1290 1309          /*NOTREACHED*/
1291 1310          SET_SIZE(fast_null)
1292 1311  
1293 1312  #elif defined(__i386)
1294 1313  
1295 1314          ENTRY_NP(fast_null)
1296 1315          orw     $PS_C, 8(%esp)  /* set carry bit in user flags */
1297 1316          IRET
1298 1317          SET_SIZE(fast_null)
1299 1318  
↓ open down ↓ 238 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX