Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/ia32/ml/swtch.s
          +++ new/usr/src/uts/intel/ia32/ml/swtch.s
↓ open down ↓ 16 lines elided ↑ open up ↑
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27      - * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       27 + * Copyright (c) 2018 Joyent, Inc.
  28   28   */
  29   29  
  30   30  /*
  31   31   * Process switching routines.
  32   32   */
  33   33  
  34   34  #if defined(__lint)
  35   35  #include <sys/thread.h>
  36   36  #include <sys/systm.h>
  37   37  #include <sys/time.h>
↓ open down ↓ 19 lines elided ↑ open up ↑
  57   57   * when they are the same thread. in this case where the threads
  58   58   * are the same, resume() on one processor will spin on the incoming
  59   59   * thread until resume() on the other processor has finished with
  60   60   * the outgoing thread.
  61   61   *
  62   62   * The MMU context changes when the resuming thread resides in a different
  63   63   * process.  Kernel threads are known by resume to reside in process 0.
  64   64   * The MMU context, therefore, only changes when resuming a thread in
  65   65   * a process different from curproc.
  66   66   *
  67      - * resume_from_intr() is called when the thread being resumed was not 
       67 + * resume_from_intr() is called when the thread being resumed was not
  68   68   * passivated by resume (e.g. was interrupted).  This means that the
  69   69   * resume lock is already held and that a restore context is not needed.
  70   70   * Also, the MMU context is not changed on the resume in this case.
  71   71   *
  72   72   * resume_from_zombie() is the same as resume except the calling thread
  73   73   * is a zombie and must be put on the deathrow list after the CPU is
  74   74   * off the stack.
  75   75   */
  76   76  
  77   77  #if !defined(__lint)
↓ open down ↓ 150 lines elided ↑ open up ↑
 228  228  
 229  229  /* ARGSUSED */
 230  230  void
 231  231  resume(kthread_t *t)
 232  232  {}
 233  233  
 234  234  #else   /* __lint */
 235  235  
 236  236  #if defined(__amd64)
 237  237  
      238 +        .global kpti_enable
      239 +
 238  240          ENTRY(resume)
 239  241          movq    %gs:CPU_THREAD, %rax
 240  242          leaq    resume_return(%rip), %r11
 241  243  
 242  244          /*
 243  245           * Deal with SMAP here. A thread may be switched out at any point while
 244  246           * it is executing. The thread could be under on_fault() or it could be
 245  247           * pre-empted while performing a copy interruption. If this happens and
 246  248           * we're not in the context of an interrupt which happens to handle
 247  249           * saving and restoring rflags correctly, we may lose our SMAP related
↓ open down ↓ 50 lines elided ↑ open up ↑
 298  300  
 299  301          movq    %r14, %rdi              /* arg = proc pointer */
 300  302          call    savepctx                 /* call ctx ops */
 301  303  .nosavepctx:
 302  304  
 303  305          /*
 304  306           * Temporarily switch to the idle thread's stack
 305  307           */
 306  308          movq    CPU_IDLE_THREAD(%r15), %rax     /* idle thread pointer */
 307  309  
 308      -        /* 
      310 +        /*
 309  311           * Set the idle thread as the current thread
 310  312           */
 311  313          movq    T_SP(%rax), %rsp        /* It is safe to set rsp */
 312  314          movq    %rax, CPU_THREAD(%r15)
 313  315  
 314  316          /*
 315  317           * Switch in the hat context for the new thread
 316  318           *
 317  319           */
 318  320          GET_THREAD_HATP(%rdi, %r12, %r11)
 319  321          call    hat_switch
 320  322  
 321      -        /* 
      323 +        /*
 322  324           * Clear and unlock previous thread's t_lock
 323  325           * to allow it to be dispatched by another processor.
 324  326           */
 325  327          movb    $0, T_LOCK(%r13)
 326  328  
 327  329          /*
 328  330           * IMPORTANT: Registers at this point must be:
 329  331           *       %r12 = new thread
 330  332           *
 331  333           * Here we are in the idle thread, have dropped the old thread.
↓ open down ↓ 23 lines elided ↑ open up ↑
 355  357          LOADCPU(%r13)                   /* load current CPU pointer */
 356  358          cmpq    %r13, T_CPU(%r12)
 357  359          je      .setup_cpu
 358  360  
 359  361          /* cp->cpu_stats.sys.cpumigrate++ */
 360  362          incq    CPU_STATS_SYS_CPUMIGRATE(%r13)
 361  363          movq    %r13, T_CPU(%r12)       /* set new thread's CPU pointer */
 362  364  
 363  365  .setup_cpu:
 364  366          /*
 365      -         * Setup rsp0 (kernel stack) in TSS to curthread's stack.
 366      -         * (Note: Since we don't have saved 'regs' structure for all
 367      -         *        the threads we can't easily determine if we need to
 368      -         *        change rsp0. So, we simply change the rsp0 to bottom 
 369      -         *        of the thread stack and it will work for all cases.)
      367 +         * Setup rsp0 (kernel stack) in TSS to curthread's saved regs
      368 +         * structure.  If this thread doesn't have a regs structure above
      369 +         * the stack -- that is, if lwp_stk_init() was never called for the
      370 +         * thread -- this will set rsp0 to the wrong value, but it's harmless
      371 +         * as it's a kernel thread, and it won't actually attempt to implicitly
      372 +         * use the rsp0 via a privilege change.
 370  373           *
 371      -         * XX64 - Is this correct?
      374 +         * Note that when we have KPTI enabled on amd64, we never use this
      375 +         * value at all (since all the interrupts have an IST set).
 372  376           */
 373  377          movq    CPU_TSS(%r13), %r14
      378 +#if !defined(__xpv)
      379 +        cmpq    $1, kpti_enable
      380 +        jne     1f
      381 +        leaq    CPU_KPTI_TR_RSP(%r13), %rax
      382 +        jmp     2f
      383 +1:
 374  384          movq    T_STACK(%r12), %rax
 375  385          addq    $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
 376      -#if !defined(__xpv)
      386 +2:
 377  387          movq    %rax, TSS_RSP0(%r14)
 378  388  #else
      389 +        movq    T_STACK(%r12), %rax
      390 +        addq    $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
 379  391          movl    $KDS_SEL, %edi
 380  392          movq    %rax, %rsi
 381  393          call    HYPERVISOR_stack_switch
 382  394  #endif  /* __xpv */
 383  395  
 384  396          movq    %r12, CPU_THREAD(%r13)  /* set CPU's thread pointer */
 385  397          mfence                          /* synchronize with mutex_exit() */
 386  398          xorl    %ebp, %ebp              /* make $<threadlist behave better */
 387  399          movq    T_LWP(%r12), %rax       /* set associated lwp to  */
 388  400          movq    %rax, CPU_LWP(%r13)     /* CPU's lwp ptr */
↓ open down ↓ 12 lines elided ↑ open up ↑
 401  413  
 402  414          /*
 403  415           * Call restorepctx if context ops have been installed for the proc.
 404  416           */
 405  417          movq    T_PROCP(%r12), %rcx
 406  418          cmpq    $0, P_PCTX(%rcx)
 407  419          jz      .norestorepctx
 408  420          movq    %rcx, %rdi
 409  421          call    restorepctx
 410  422  .norestorepctx:
 411      -        
      423 +
 412  424          STORE_INTR_START(%r12)
 413  425  
 414  426          /*
 415  427           * If we came into swtch with the ability to access userland pages, go
 416  428           * ahead and restore that fact by disabling SMAP.  Clear the indicator
 417  429           * flag out of paranoia.
 418  430           */
 419  431          movq    T_USERACC(%r12), %rax   /* should we disable smap? */
 420  432          cmpq    $0, %rax                /* skip call when zero */
 421  433          jz      .nosmap
 422  434          xorq    %rax, %rax
 423  435          movq    %rax, T_USERACC(%r12)
 424  436          call    smap_disable
 425  437  .nosmap:
 426  438  
 427  439          /*
 428  440           * Restore non-volatile registers, then have spl0 return to the
 429  441           * resuming thread's PC after first setting the priority as low as
 430  442           * possible and blocking all interrupt threads that may be active.
 431  443           */
 432      -        movq    %r13, %rax      /* save return address */       
      444 +        movq    %r13, %rax      /* save return address */
 433  445          RESTORE_REGS(%r11)
 434  446          pushq   %rax            /* push return address for spl0() */
 435  447          call    __dtrace_probe___sched_on__cpu
 436  448          jmp     spl0
 437  449  
 438  450  resume_return:
 439  451          /*
 440  452           * Remove stack frame created in SAVE_REGS()
 441  453           */
 442  454          addq    $CLONGSIZE, %rsp
↓ open down ↓ 41 lines elided ↑ open up ↑
 484  496           * Call savepctx if process has installed context ops.
 485  497           */
 486  498          movl    T_PROCP(%esi), %eax     /* %eax = proc */
 487  499          cmpl    $0, P_PCTX(%eax)        /* should current thread savectx? */
 488  500          je      .nosavepctx             /* skip call when zero */
 489  501          pushl   %eax                    /* arg = proc pointer */
 490  502          call    savepctx                /* call ctx ops */
 491  503          addl    $4, %esp
 492  504  .nosavepctx:
 493  505  
 494      -        /* 
      506 +        /*
 495  507           * Temporarily switch to the idle thread's stack
 496  508           */
 497  509          movl    CPU_IDLE_THREAD(%ebx), %eax     /* idle thread pointer */
 498  510  
 499      -        /* 
      511 +        /*
 500  512           * Set the idle thread as the current thread
 501  513           */
 502  514          movl    T_SP(%eax), %esp        /* It is safe to set esp */
 503  515          movl    %eax, CPU_THREAD(%ebx)
 504  516  
 505  517          /* switch in the hat context for the new thread */
 506  518          GET_THREAD_HATP(%ecx, %edi, %ecx)
 507  519          pushl   %ecx
 508  520          call    hat_switch
 509  521          addl    $4, %esp
 510      -        
 511      -        /* 
      522 +
      523 +        /*
 512  524           * Clear and unlock previous thread's t_lock
 513  525           * to allow it to be dispatched by another processor.
 514  526           */
 515  527          movb    $0, T_LOCK(%esi)
 516  528  
 517  529          /*
 518  530           * IMPORTANT: Registers at this point must be:
 519  531           *       %edi = new thread
 520  532           *
 521  533           * Here we are in the idle thread, have dropped the old thread.
↓ open down ↓ 144 lines elided ↑ open up ↑
 666  678          movq    %cr0, %rax
 667  679          testq   $CR0_TS, %rax
 668  680          jnz     .zfpu_disabled          /* if TS already set, nothing to do */
 669  681          fninit                          /* init fpu & discard pending error */
 670  682          orq     $CR0_TS, %rax
 671  683          movq    %rax, %cr0
 672  684  .zfpu_disabled:
 673  685  
 674  686  #endif  /* __xpv */
 675  687  
 676      -        /* 
      688 +        /*
 677  689           * Temporarily switch to the idle thread's stack so that the zombie
 678  690           * thread's stack can be reclaimed by the reaper.
 679  691           */
 680  692          movq    %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
 681  693          movq    T_SP(%rax), %rsp        /* get onto idle thread stack */
 682  694  
 683  695          /*
 684  696           * Sigh. If the idle thread has never run thread_start()
 685  697           * then t_sp is mis-aligned by thread_load().
 686  698           */
 687  699          andq    $_BITNOT(STACK_ALIGN-1), %rsp
 688  700  
 689      -        /* 
      701 +        /*
 690  702           * Set the idle thread as the current thread.
 691  703           */
 692  704          movq    %rax, %gs:CPU_THREAD
 693  705  
 694  706          /* switch in the hat context for the new thread */
 695  707          GET_THREAD_HATP(%rdi, %r12, %r11)
 696  708          call    hat_switch
 697  709  
 698      -        /* 
      710 +        /*
 699  711           * Put the zombie on death-row.
 700  712           */
 701  713          movq    %r13, %rdi
 702  714          call    reapq_add
 703  715  
 704  716          jmp     _resume_from_idle       /* finish job of resume */
 705  717  
 706  718  resume_from_zombie_return:
 707  719          RESTORE_REGS(%r11)              /* restore non-volatile registers */
 708  720          call    __dtrace_probe___sched_on__cpu
↓ open down ↓ 27 lines elided ↑ open up ↑
 736  748          /* clean up the fp unit. It might be left enabled */
 737  749  
 738  750          movl    %cr0, %eax
 739  751          testl   $CR0_TS, %eax
 740  752          jnz     .zfpu_disabled          /* if TS already set, nothing to do */
 741  753          fninit                          /* init fpu & discard pending error */
 742  754          orl     $CR0_TS, %eax
 743  755          movl    %eax, %cr0
 744  756  .zfpu_disabled:
 745  757  
 746      -        /* 
      758 +        /*
 747  759           * Temporarily switch to the idle thread's stack so that the zombie
 748  760           * thread's stack can be reclaimed by the reaper.
 749  761           */
 750  762          movl    %gs:CPU_IDLE_THREAD, %eax /* idle thread pointer */
 751  763          movl    T_SP(%eax), %esp        /* get onto idle thread stack */
 752  764  
 753      -        /* 
      765 +        /*
 754  766           * Set the idle thread as the current thread.
 755  767           */
 756  768          movl    %eax, %gs:CPU_THREAD
 757  769  
 758  770          /*
 759  771           * switch in the hat context for the new thread
 760  772           */
 761  773          GET_THREAD_HATP(%ecx, %edi, %ecx)
 762  774          pushl   %ecx
 763  775          call    hat_switch
 764  776          addl    $4, %esp
 765  777  
 766      -        /* 
      778 +        /*
 767  779           * Put the zombie on death-row.
 768  780           */
 769  781          pushl   %esi
 770  782          call    reapq_add
 771  783          addl    $4, %esp
 772  784          jmp     _resume_from_idle       /* finish job of resume */
 773  785  
 774  786  resume_from_zombie_return:
 775  787          RESTORE_REGS(%ecx)              /* restore non-volatile registers */
 776  788          call    __dtrace_probe___sched_on__cpu
↓ open down ↓ 30 lines elided ↑ open up ↑
 807  819           * %r12 = t (new thread) when done
 808  820           */
 809  821          SAVE_REGS(%rax, %r11)
 810  822  
 811  823          movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 812  824          movq    %r12, %gs:CPU_THREAD    /* set CPU's thread pointer */
 813  825          mfence                          /* synchronize with mutex_exit() */
 814  826          movq    T_SP(%r12), %rsp        /* restore resuming thread's sp */
 815  827          xorl    %ebp, %ebp              /* make $<threadlist behave better */
 816  828  
 817      -        /* 
      829 +        /*
 818  830           * Unlock outgoing thread's mutex dispatched by another processor.
 819  831           */
 820  832          xorl    %eax, %eax
 821  833          xchgb   %al, T_LOCK(%r13)
 822  834  
 823  835          STORE_INTR_START(%r12)
 824  836  
 825  837          /*
 826  838           * Restore non-volatile registers, then have spl0 return to the
 827  839           * resuming thread's PC after first setting the priority as low as
↓ open down ↓ 29 lines elided ↑ open up ↑
 857  869  
 858  870  #ifdef DEBUG
 859  871          call    assert_ints_enabled     /* panics if we are cli'd */
 860  872  #endif
 861  873          movl    %gs:CPU_THREAD, %esi    /* %esi = curthread */
 862  874          movl    %edi, %gs:CPU_THREAD    /* set CPU's thread pointer */
 863  875          mfence                          /* synchronize with mutex_exit() */
 864  876          movl    T_SP(%edi), %esp        /* restore resuming thread's sp */
 865  877          xorl    %ebp, %ebp              /* make $<threadlist behave better */
 866  878  
 867      -        /* 
      879 +        /*
 868  880           * Unlock outgoing thread's mutex dispatched by another processor.
 869  881           */
 870  882          xorl    %eax,%eax
 871  883          xchgb   %al, T_LOCK(%esi)
 872  884  
 873  885          STORE_INTR_START(%edi)
 874  886  
 875  887          /*
 876  888           * Restore non-volatile registers, then have spl0 return to the
 877  889           * resuming thread's PC after first setting the priority as low as
↓ open down ↓ 54 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX