Print this page
10924 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2018 Joyent, Inc.
  28  */
  29 
  30 /*
  31  * Process switching routines.
  32  */
  33 
  34 #if defined(__lint)
  35 #include <sys/thread.h>
  36 #include <sys/systm.h>
  37 #include <sys/time.h>
  38 #else   /* __lint */
  39 #include "assym.h"
  40 #endif  /* __lint */
  41 
  42 #include <sys/asm_linkage.h>
  43 #include <sys/asm_misc.h>
  44 #include <sys/regset.h>
  45 #include <sys/privregs.h>
  46 #include <sys/stack.h>
  47 #include <sys/segments.h>
  48 #include <sys/psw.h>
  49 


  50 /*
  51  * resume(thread_id_t t);
  52  *
  53  * a thread can only run on one processor at a time. there
  54  * exists a window on MPs where the current thread on one
  55  * processor is capable of being dispatched by another processor.
  56  * some overlap between outgoing and incoming threads can happen
  57  * when they are the same thread. in this case where the threads
  58  * are the same, resume() on one processor will spin on the incoming
  59  * thread until resume() on the other processor has finished with
  60  * the outgoing thread.
  61  *
  62  * The MMU context changes when the resuming thread resides in a different
  63  * process.  Kernel threads are known by resume to reside in process 0.
  64  * The MMU context, therefore, only changes when resuming a thread in
  65  * a process different from curproc.
  66  *
  67  * resume_from_intr() is called when the thread being resumed was not
  68  * passivated by resume (e.g. was interrupted).  This means that the
  69  * resume lock is already held and that a restore context is not needed.
  70  * Also, the MMU context is not changed on the resume in this case.
  71  *
  72  * resume_from_zombie() is the same as resume except the calling thread
  73  * is a zombie and must be put on the deathrow list after the CPU is
  74  * off the stack.
  75  */
  76 
  77 #if !defined(__lint)
  78 
  79 #if LWP_PCB_FPU != 0
  80 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
  81 #endif  /* LWP_PCB_FPU != 0 */
  82 
  83 #endif  /* !__lint */
  84 
  85 #if defined(__amd64)
  86 
  87 /*
  88  * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
  89  *
  90  * The stack frame must be created before the save of %rsp so that tracebacks
  91  * of swtch()ed-out processes show the process as having last called swtch().
  92  */
  93 #define SAVE_REGS(thread_t, retaddr)                    \
  94         movq    %rbp, T_RBP(thread_t);                  \
  95         movq    %rbx, T_RBX(thread_t);                  \
  96         movq    %r12, T_R12(thread_t);                  \
  97         movq    %r13, T_R13(thread_t);                  \
  98         movq    %r14, T_R14(thread_t);                  \
  99         movq    %r15, T_R15(thread_t);                  \
 100         pushq   %rbp;                                   \
 101         movq    %rsp, %rbp;                             \
 102         movq    %rsp, T_SP(thread_t);                   \
 103         movq    retaddr, T_PC(thread_t);                \
 104         movq    %rdi, %r12;                             \
 105         call    __dtrace_probe___sched_off__cpu
 106 


 136         movq    %rax, %r14;
 137 
 138 /*
 139  * If we are resuming an interrupt thread, store a timestamp in the thread
 140  * structure.  If an interrupt occurs between tsc_read() and its subsequent
 141  * store, the timestamp will be stale by the time it is stored.  We can detect
 142  * this by doing a compare-and-swap on the thread's timestamp, since any
 143  * interrupt occurring in this window will put a new timestamp in the thread's
 144  * t_intr_start field.
 145  */
 146 #define STORE_INTR_START(thread_t)                      \
 147         testw   $T_INTR_THREAD, T_FLAGS(thread_t);      \
 148         jz      1f;                                     \
 149 0:                                                      \
 150         TSC_READ();                                     \
 151         movq    T_INTR_START(thread_t), %rax;           \
 152         cmpxchgq %r14, T_INTR_START(thread_t);          \
 153         jnz     0b;                                     \
 154 1:
 155 
 156 #elif defined (__i386)
 157 
 158 /*
 159  * Save non-volatile registers (%ebp, %esi, %edi and %ebx)
 160  *
 161  * The stack frame must be created before the save of %esp so that tracebacks
 162  * of swtch()ed-out processes show the process as having last called swtch().
 163  */
 164 #define SAVE_REGS(thread_t, retaddr)                    \
 165         movl    %ebp, T_EBP(thread_t);                  \
 166         movl    %ebx, T_EBX(thread_t);                  \
 167         movl    %esi, T_ESI(thread_t);                  \
 168         movl    %edi, T_EDI(thread_t);                  \
 169         pushl   %ebp;                                   \
 170         movl    %esp, %ebp;                             \
 171         movl    %esp, T_SP(thread_t);                   \
 172         movl    retaddr, T_PC(thread_t);                \
 173         movl    8(%ebp), %edi;                          \
 174         pushl   %edi;                                   \
 175         call    __dtrace_probe___sched_off__cpu;        \
 176         addl    $CLONGSIZE, %esp
 177 
 178 /*
 179  * Restore non-volatile registers (%ebp, %esi, %edi and %ebx)
 180  *
 181  * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
 182  * already has the effect of putting the stack back the way it was when
 183  * we came in.
 184  */
 185 #define RESTORE_REGS(scratch_reg)                       \
 186         movl    %gs:CPU_THREAD, scratch_reg;            \
 187         movl    T_EBP(scratch_reg), %ebp;               \
 188         movl    T_EBX(scratch_reg), %ebx;               \
 189         movl    T_ESI(scratch_reg), %esi;               \
 190         movl    T_EDI(scratch_reg), %edi
 191 
 192 /*
 193  * Get pointer to a thread's hat structure
 194  */
 195 #define GET_THREAD_HATP(hatp, thread_t, scratch_reg)    \
 196         movl    T_PROCP(thread_t), hatp;                \
 197         movl    P_AS(hatp), scratch_reg;                \
 198         movl    A_HAT(scratch_reg), hatp
 199 
 200 /*
 201  * If we are resuming an interrupt thread, store a timestamp in the thread
 202  * structure.  If an interrupt occurs between tsc_read() and its subsequent
 203  * store, the timestamp will be stale by the time it is stored.  We can detect
 204  * this by doing a compare-and-swap on the thread's timestamp, since any
 205  * interrupt occurring in this window will put a new timestamp in the thread's
 206  * t_intr_start field.
 207  */
 208 #define STORE_INTR_START(thread_t)                      \
 209         testw   $T_INTR_THREAD, T_FLAGS(thread_t);      \
 210         jz      1f;                                     \
 211         pushl   %ecx;                                   \
 212 0:                                                      \
 213         pushl   T_INTR_START(thread_t);                 \
 214         pushl   T_INTR_START+4(thread_t);               \
 215         call    tsc_read;                               \
 216         movl    %eax, %ebx;                             \
 217         movl    %edx, %ecx;                             \
 218         popl    %edx;                                   \
 219         popl    %eax;                                   \
 220         cmpxchg8b T_INTR_START(thread_t);               \
 221         jnz     0b;                                     \
 222         popl    %ecx;                                   \
 223 1:
 224 
 225 #endif  /* __amd64 */
 226 
 227 #if defined(__lint)
 228 
 229 /* ARGSUSED */
 230 void
 231 resume(kthread_t *t)
 232 {}
 233 
 234 #else   /* __lint */
 235 
 236 #if defined(__amd64)
 237 
 238         .global kpti_enable
 239 
 240         ENTRY(resume)
 241         movq    %gs:CPU_THREAD, %rax
 242         leaq    resume_return(%rip), %r11
 243 
 244         /*
 245          * Deal with SMAP here. A thread may be switched out at any point while
 246          * it is executing. The thread could be under on_fault() or it could be
 247          * pre-empted while performing a copy interruption. If this happens and
 248          * we're not in the context of an interrupt which happens to handle
 249          * saving and restoring rflags correctly, we may lose our SMAP related
 250          * state.
 251          *
 252          * To handle this, as part of being switched out, we first save whether
 253          * or not userland access is allowed ($PS_ACHK in rflags) and store that
 254          * in t_useracc on the kthread_t and unconditionally enable SMAP to
 255          * protect the system.
 256          *
 257          * Later, when the thread finishes resuming, we potentially disable smap


 419         jz      .norestorepctx
 420         movq    %rcx, %rdi
 421         call    restorepctx
 422 .norestorepctx:
 423 
 424         STORE_INTR_START(%r12)
 425 
 426         /*
 427          * If we came into swtch with the ability to access userland pages, go
 428          * ahead and restore that fact by disabling SMAP.  Clear the indicator
 429          * flag out of paranoia.
 430          */
 431         movq    T_USERACC(%r12), %rax   /* should we disable smap? */
 432         cmpq    $0, %rax                /* skip call when zero */
 433         jz      .nosmap
 434         xorq    %rax, %rax
 435         movq    %rax, T_USERACC(%r12)
 436         call    smap_disable
 437 .nosmap:
 438 


 439         /*
 440          * Restore non-volatile registers, then have spl0 return to the
 441          * resuming thread's PC after first setting the priority as low as
 442          * possible and blocking all interrupt threads that may be active.
 443          */
 444         movq    %r13, %rax      /* save return address */
 445         RESTORE_REGS(%r11)
 446         pushq   %rax            /* push return address for spl0() */
 447         call    __dtrace_probe___sched_on__cpu
 448         jmp     spl0
 449 
 450 resume_return:
 451         /*
 452          * Remove stack frame created in SAVE_REGS()
 453          */
 454         addq    $CLONGSIZE, %rsp
 455         ret
 456         SET_SIZE(_resume_from_idle)
 457         SET_SIZE(resume)
 458 
 459 #elif defined (__i386)
 460 
 461         ENTRY(resume)
 462         movl    %gs:CPU_THREAD, %eax
 463         movl    $resume_return, %ecx
 464 
 465         /*
 466          * Save non-volatile registers, and set return address for current
 467          * thread to resume_return.
 468          *
 469          * %edi = t (new thread) when done.
 470          */
 471         SAVE_REGS(%eax,  %ecx)
 472 
 473         LOADCPU(%ebx)                   /* %ebx = CPU */
 474         movl    CPU_THREAD(%ebx), %esi  /* %esi = curthread */
 475 
 476 #ifdef DEBUG
 477         call    assert_ints_enabled     /* panics if we are cli'd */
 478 #endif
 479         /*
 480          * Call savectx if thread has installed context ops.
 481          *
 482          * Note that if we have floating point context, the save op
 483          * (either fpsave_begin or fpxsave_begin) will issue the
 484          * async save instruction (fnsave or fxsave respectively)
 485          * that we fwait for below.
 486          */
 487         movl    T_CTX(%esi), %eax       /* should current thread savectx? */
 488         testl   %eax, %eax
 489         jz      .nosavectx              /* skip call when zero */
 490         pushl   %esi                    /* arg = thread pointer */
 491         call    savectx                 /* call ctx ops */
 492         addl    $4, %esp                /* restore stack pointer */
 493 .nosavectx:
 494 
 495         /*
 496          * Call savepctx if process has installed context ops.
 497          */
 498         movl    T_PROCP(%esi), %eax     /* %eax = proc */
 499         cmpl    $0, P_PCTX(%eax)        /* should current thread savectx? */
 500         je      .nosavepctx             /* skip call when zero */
 501         pushl   %eax                    /* arg = proc pointer */
 502         call    savepctx                /* call ctx ops */
 503         addl    $4, %esp
 504 .nosavepctx:
 505 
 506         /*
 507          * Temporarily switch to the idle thread's stack
 508          */
 509         movl    CPU_IDLE_THREAD(%ebx), %eax     /* idle thread pointer */
 510 
 511         /*
 512          * Set the idle thread as the current thread
 513          */
 514         movl    T_SP(%eax), %esp        /* It is safe to set esp */
 515         movl    %eax, CPU_THREAD(%ebx)
 516 
 517         /* switch in the hat context for the new thread */
 518         GET_THREAD_HATP(%ecx, %edi, %ecx)
 519         pushl   %ecx
 520         call    hat_switch
 521         addl    $4, %esp
 522 
 523         /*
 524          * Clear and unlock previous thread's t_lock
 525          * to allow it to be dispatched by another processor.
 526          */
 527         movb    $0, T_LOCK(%esi)
 528 
 529         /*
 530          * IMPORTANT: Registers at this point must be:
 531          *       %edi = new thread
 532          *
 533          * Here we are in the idle thread, have dropped the old thread.
 534          */
 535         ALTENTRY(_resume_from_idle)
 536         /*
 537          * spin until dispatched thread's mutex has
 538          * been unlocked. this mutex is unlocked when
 539          * it becomes safe for the thread to run.
 540          */
 541 .L4:
 542         lock
 543         btsl    $0, T_LOCK(%edi) /* lock new thread's mutex */
 544         jc      .L4_2                   /* lock did not succeed */
 545 
 546         /*
 547          * Fix CPU structure to indicate new running thread.
 548          * Set pointer in new thread to the CPU structure.
 549          */
 550         LOADCPU(%esi)                   /* load current CPU pointer */
 551         movl    T_STACK(%edi), %eax     /* here to use v pipeline of */
 552                                         /* Pentium. Used few lines below */
 553         cmpl    %esi, T_CPU(%edi)
 554         jne     .L5_2
 555 .L5_1:
 556         /*
 557          * Setup esp0 (kernel stack) in TSS to curthread's stack.
 558          * (Note: Since we don't have saved 'regs' structure for all
 559          *        the threads we can't easily determine if we need to
 560          *        change esp0. So, we simply change the esp0 to bottom 
 561          *        of the thread stack and it will work for all cases.)
 562          */
 563         movl    CPU_TSS(%esi), %ecx
 564         addl    $REGSIZE+MINFRAME, %eax /* to the bottom of thread stack */
 565 #if !defined(__xpv)
 566         movl    %eax, TSS_ESP0(%ecx)
 567 #else
 568         pushl   %eax
 569         pushl   $KDS_SEL
 570         call    HYPERVISOR_stack_switch
 571         addl    $8, %esp
 572 #endif  /* __xpv */
 573 
 574         movl    %edi, CPU_THREAD(%esi)  /* set CPU's thread pointer */
 575         mfence                          /* synchronize with mutex_exit() */
 576         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 577         movl    T_LWP(%edi), %eax       /* set associated lwp to  */
 578         movl    %eax, CPU_LWP(%esi)     /* CPU's lwp ptr */
 579 
 580         movl    T_SP(%edi), %esp        /* switch to outgoing thread's stack */
 581         movl    T_PC(%edi), %esi        /* saved return addr */
 582 
 583         /*
 584          * Call restorectx if context ops have been installed.
 585          */
 586         movl    T_CTX(%edi), %eax       /* should resumed thread restorectx? */
 587         testl   %eax, %eax
 588         jz      .norestorectx           /* skip call when zero */
 589         pushl   %edi                    /* arg = thread pointer */
 590         call    restorectx              /* call ctx ops */
 591         addl    $4, %esp                /* restore stack pointer */
 592 .norestorectx:
 593 
 594         /*
 595          * Call restorepctx if context ops have been installed for the proc.
 596          */
 597         movl    T_PROCP(%edi), %eax
 598         cmpl    $0, P_PCTX(%eax)
 599         je      .norestorepctx
 600         pushl   %eax                    /* arg = proc pointer */
 601         call    restorepctx
 602         addl    $4, %esp                /* restore stack pointer */
 603 .norestorepctx:
 604 
 605         STORE_INTR_START(%edi)
 606 
 607         /*
 608          * Restore non-volatile registers, then have spl0 return to the
 609          * resuming thread's PC after first setting the priority as low as
 610          * possible and blocking all interrupt threads that may be active.
 611          */
 612         movl    %esi, %eax              /* save return address */
 613         RESTORE_REGS(%ecx)
 614         pushl   %eax                    /* push return address for spl0() */
 615         call    __dtrace_probe___sched_on__cpu
 616         jmp     spl0
 617 
 618 resume_return:
 619         /*
 620          * Remove stack frame created in SAVE_REGS()
 621          */
 622         addl    $CLONGSIZE, %esp
 623         ret
 624 
 625 .L4_2:
 626         pause
 627         cmpb    $0, T_LOCK(%edi)
 628         je      .L4
 629         jmp     .L4_2
 630 
 631 .L5_2:
 632         /* cp->cpu_stats.sys.cpumigrate++ */
 633         addl    $1, CPU_STATS_SYS_CPUMIGRATE(%esi)
 634         adcl    $0, CPU_STATS_SYS_CPUMIGRATE+4(%esi)
 635         movl    %esi, T_CPU(%edi)       /* set new thread's CPU pointer */
 636         jmp     .L5_1
 637 
 638         SET_SIZE(_resume_from_idle)
 639         SET_SIZE(resume)
 640 
 641 #endif  /* __amd64 */
 642 #endif  /* __lint */
 643 
 644 #if defined(__lint)
 645 
 646 /* ARGSUSED */
 647 void
 648 resume_from_zombie(kthread_t *t)
 649 {}
 650 
 651 #else   /* __lint */
 652 
 653 #if defined(__amd64)
 654 
 655         ENTRY(resume_from_zombie)
 656         movq    %gs:CPU_THREAD, %rax
 657         leaq    resume_from_zombie_return(%rip), %r11
 658 
 659         /*
 660          * Save non-volatile registers, and set return address for current
 661          * thread to resume_from_zombie_return.
 662          *
 663          * %r12 = t (new thread) when done
 664          */
 665         SAVE_REGS(%rax, %r11)
 666 
 667         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 668 
 669         /* clean up the fp unit. It might be left enabled */
 670 
 671 #if defined(__xpv)              /* XXPV XXtclayton */
 672         /*
 673          * Remove this after bringup.
 674          * (Too many #gp's for an instrumented hypervisor.)


 709 
 710         /*
 711          * Put the zombie on death-row.
 712          */
 713         movq    %r13, %rdi
 714         call    reapq_add
 715 
 716         jmp     _resume_from_idle       /* finish job of resume */
 717 
 718 resume_from_zombie_return:
 719         RESTORE_REGS(%r11)              /* restore non-volatile registers */
 720         call    __dtrace_probe___sched_on__cpu
 721 
 722         /*
 723          * Remove stack frame created in SAVE_REGS()
 724          */
 725         addq    $CLONGSIZE, %rsp
 726         ret
 727         SET_SIZE(resume_from_zombie)
 728 
 729 #elif defined (__i386)
 730 
 731         ENTRY(resume_from_zombie)
 732         movl    %gs:CPU_THREAD, %eax
 733         movl    $resume_from_zombie_return, %ecx
 734 
 735         /*
 736          * Save non-volatile registers, and set return address for current
 737          * thread to resume_from_zombie_return.
 738          *
 739          * %edi = t (new thread) when done.
 740          */
 741         SAVE_REGS(%eax, %ecx)
 742 
 743 #ifdef DEBUG
 744         call    assert_ints_enabled     /* panics if we are cli'd */
 745 #endif
 746         movl    %gs:CPU_THREAD, %esi    /* %esi = curthread */
 747 
 748         /* clean up the fp unit. It might be left enabled */
 749 
 750         movl    %cr0, %eax
 751         testl   $CR0_TS, %eax
 752         jnz     .zfpu_disabled          /* if TS already set, nothing to do */
 753         fninit                          /* init fpu & discard pending error */
 754         orl     $CR0_TS, %eax
 755         movl    %eax, %cr0
 756 .zfpu_disabled:
 757 
 758         /*
 759          * Temporarily switch to the idle thread's stack so that the zombie
 760          * thread's stack can be reclaimed by the reaper.
 761          */
 762         movl    %gs:CPU_IDLE_THREAD, %eax /* idle thread pointer */
 763         movl    T_SP(%eax), %esp        /* get onto idle thread stack */
 764 
 765         /*
 766          * Set the idle thread as the current thread.
 767          */
 768         movl    %eax, %gs:CPU_THREAD
 769 
 770         /*
 771          * switch in the hat context for the new thread
 772          */
 773         GET_THREAD_HATP(%ecx, %edi, %ecx)
 774         pushl   %ecx
 775         call    hat_switch
 776         addl    $4, %esp
 777 
 778         /*
 779          * Put the zombie on death-row.
 780          */
 781         pushl   %esi
 782         call    reapq_add
 783         addl    $4, %esp
 784         jmp     _resume_from_idle       /* finish job of resume */
 785 
 786 resume_from_zombie_return:
 787         RESTORE_REGS(%ecx)              /* restore non-volatile registers */
 788         call    __dtrace_probe___sched_on__cpu
 789 
 790         /*
 791          * Remove stack frame created in SAVE_REGS()
 792          */
 793         addl    $CLONGSIZE, %esp
 794         ret
 795         SET_SIZE(resume_from_zombie)
 796 
 797 #endif  /* __amd64 */
 798 #endif  /* __lint */
 799 
 800 #if defined(__lint)
 801 
 802 /* ARGSUSED */
 803 void
 804 resume_from_intr(kthread_t *t)
 805 {}
 806 
 807 #else   /* __lint */
 808 
 809 #if defined(__amd64)
 810 
 811         ENTRY(resume_from_intr)
 812         movq    %gs:CPU_THREAD, %rax
 813         leaq    resume_from_intr_return(%rip), %r11
 814 
 815         /*
 816          * Save non-volatile registers, and set return address for current
 817          * thread to resume_from_intr_return.
 818          *
 819          * %r12 = t (new thread) when done
 820          */
 821         SAVE_REGS(%rax, %r11)
 822 
 823         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 824         movq    %r12, %gs:CPU_THREAD    /* set CPU's thread pointer */
 825         mfence                          /* synchronize with mutex_exit() */
 826         movq    T_SP(%r12), %rsp        /* restore resuming thread's sp */
 827         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 828 
 829         /*
 830          * Unlock outgoing thread's mutex dispatched by another processor.
 831          */
 832         xorl    %eax, %eax
 833         xchgb   %al, T_LOCK(%r13)
 834 
 835         STORE_INTR_START(%r12)
 836 


 837         /*
 838          * Restore non-volatile registers, then have spl0 return to the
 839          * resuming thread's PC after first setting the priority as low as
 840          * possible and blocking all interrupt threads that may be active.
 841          */
 842         movq    T_PC(%r12), %rax        /* saved return addr */
 843         RESTORE_REGS(%r11);
 844         pushq   %rax                    /* push return address for spl0() */
 845         call    __dtrace_probe___sched_on__cpu
 846         jmp     spl0
 847 
 848 resume_from_intr_return:
 849         /*
 850          * Remove stack frame created in SAVE_REGS()
 851          */
 852         addq    $CLONGSIZE, %rsp
 853         ret
 854         SET_SIZE(resume_from_intr)
 855 
 856 #elif defined (__i386)
 857 
 858         ENTRY(resume_from_intr)
 859         movl    %gs:CPU_THREAD, %eax
 860         movl    $resume_from_intr_return, %ecx
 861 
 862         /*
 863          * Save non-volatile registers, and set return address for current
 864          * thread to resume_return.
 865          *
 866          * %edi = t (new thread) when done.
 867          */
 868         SAVE_REGS(%eax, %ecx)
 869 
 870 #ifdef DEBUG
 871         call    assert_ints_enabled     /* panics if we are cli'd */
 872 #endif
 873         movl    %gs:CPU_THREAD, %esi    /* %esi = curthread */
 874         movl    %edi, %gs:CPU_THREAD    /* set CPU's thread pointer */
 875         mfence                          /* synchronize with mutex_exit() */
 876         movl    T_SP(%edi), %esp        /* restore resuming thread's sp */
 877         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 878 
 879         /*
 880          * Unlock outgoing thread's mutex dispatched by another processor.
 881          */
 882         xorl    %eax,%eax
 883         xchgb   %al, T_LOCK(%esi)
 884 
 885         STORE_INTR_START(%edi)
 886 
 887         /*
 888          * Restore non-volatile registers, then have spl0 return to the
 889          * resuming thread's PC after first setting the priority as low as
 890          * possible and blocking all interrupt threads that may be active.
 891          */
 892         movl    T_PC(%edi), %eax        /* saved return addr */
 893         RESTORE_REGS(%ecx)
 894         pushl   %eax                    /* push return address for spl0() */
 895         call    __dtrace_probe___sched_on__cpu
 896         jmp     spl0
 897 
 898 resume_from_intr_return:
 899         /*
 900          * Remove stack frame created in SAVE_REGS()
 901          */
 902         addl    $CLONGSIZE, %esp
 903         ret
 904         SET_SIZE(resume_from_intr)
 905 
 906 #endif  /* __amd64 */
 907 #endif /* __lint */
 908 
 909 #if defined(__lint)
 910 
 911 void
 912 thread_start(void)
 913 {}
 914 
 915 #else   /* __lint */
 916 
 917 #if defined(__amd64)
 918 
 919         ENTRY(thread_start)
 920         popq    %rax            /* start() */
 921         popq    %rdi            /* arg */
 922         popq    %rsi            /* len */
 923         movq    %rsp, %rbp
 924         call    *%rax
 925         call    thread_exit     /* destroy thread if it returns. */
 926         /*NOTREACHED*/
 927         SET_SIZE(thread_start)
 928 
 929 #elif defined(__i386)
 930 
 931         ENTRY(thread_start)
 932         popl    %eax
 933         movl    %esp, %ebp
 934         addl    $8, %ebp
 935         call    *%eax
 936         addl    $8, %esp
 937         call    thread_exit     /* destroy thread if it returns. */
 938         /*NOTREACHED*/
 939         SET_SIZE(thread_start)
 940 
 941 #endif  /* __i386 */
 942 
 943 #endif  /* __lint */


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 /*
  31  * Process switching routines.
  32  */
  33 








  34 #include <sys/asm_linkage.h>
  35 #include <sys/asm_misc.h>
  36 #include <sys/regset.h>
  37 #include <sys/privregs.h>
  38 #include <sys/stack.h>
  39 #include <sys/segments.h>
  40 #include <sys/psw.h>
  41 
  42 #include "assym.h"
  43 
  44 /*
  45  * resume(thread_id_t t);
  46  *
  47  * a thread can only run on one processor at a time. there
  48  * exists a window on MPs where the current thread on one
  49  * processor is capable of being dispatched by another processor.
  50  * some overlap between outgoing and incoming threads can happen
  51  * when they are the same thread. in this case where the threads
  52  * are the same, resume() on one processor will spin on the incoming
  53  * thread until resume() on the other processor has finished with
  54  * the outgoing thread.
  55  *
  56  * The MMU context changes when the resuming thread resides in a different
  57  * process.  Kernel threads are known by resume to reside in process 0.
  58  * The MMU context, therefore, only changes when resuming a thread in
  59  * a process different from curproc.
  60  *
  61  * resume_from_intr() is called when the thread being resumed was not
  62  * passivated by resume (e.g. was interrupted).  This means that the
  63  * resume lock is already held and that a restore context is not needed.
  64  * Also, the MMU context is not changed on the resume in this case.
  65  *
  66  * resume_from_zombie() is the same as resume except the calling thread
  67  * is a zombie and must be put on the deathrow list after the CPU is
  68  * off the stack.
  69  */
  70 


  71 #if LWP_PCB_FPU != 0
  72 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
  73 #endif  /* LWP_PCB_FPU != 0 */
  74 




  75 /*
  76  * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
  77  *
  78  * The stack frame must be created before the save of %rsp so that tracebacks
  79  * of swtch()ed-out processes show the process as having last called swtch().
  80  */
  81 #define SAVE_REGS(thread_t, retaddr)                    \
  82         movq    %rbp, T_RBP(thread_t);                  \
  83         movq    %rbx, T_RBX(thread_t);                  \
  84         movq    %r12, T_R12(thread_t);                  \
  85         movq    %r13, T_R13(thread_t);                  \
  86         movq    %r14, T_R14(thread_t);                  \
  87         movq    %r15, T_R15(thread_t);                  \
  88         pushq   %rbp;                                   \
  89         movq    %rsp, %rbp;                             \
  90         movq    %rsp, T_SP(thread_t);                   \
  91         movq    retaddr, T_PC(thread_t);                \
  92         movq    %rdi, %r12;                             \
  93         call    __dtrace_probe___sched_off__cpu
  94 


 124         movq    %rax, %r14;
 125 
 126 /*
 127  * If we are resuming an interrupt thread, store a timestamp in the thread
 128  * structure.  If an interrupt occurs between tsc_read() and its subsequent
 129  * store, the timestamp will be stale by the time it is stored.  We can detect
 130  * this by doing a compare-and-swap on the thread's timestamp, since any
 131  * interrupt occurring in this window will put a new timestamp in the thread's
 132  * t_intr_start field.
 133  */
 134 #define STORE_INTR_START(thread_t)                      \
 135         testw   $T_INTR_THREAD, T_FLAGS(thread_t);      \
 136         jz      1f;                                     \
 137 0:                                                      \
 138         TSC_READ();                                     \
 139         movq    T_INTR_START(thread_t), %rax;           \
 140         cmpxchgq %r14, T_INTR_START(thread_t);          \
 141         jnz     0b;                                     \
 142 1:
 143 


















































































 144         .global kpti_enable
 145 
 146         ENTRY(resume)
 147         movq    %gs:CPU_THREAD, %rax
 148         leaq    resume_return(%rip), %r11
 149 
 150         /*
 151          * Deal with SMAP here. A thread may be switched out at any point while
 152          * it is executing. The thread could be under on_fault() or it could be
 153          * pre-empted while performing a copy interruption. If this happens and
 154          * we're not in the context of an interrupt which happens to handle
 155          * saving and restoring rflags correctly, we may lose our SMAP related
 156          * state.
 157          *
 158          * To handle this, as part of being switched out, we first save whether
 159          * or not userland access is allowed ($PS_ACHK in rflags) and store that
 160          * in t_useracc on the kthread_t and unconditionally enable SMAP to
 161          * protect the system.
 162          *
 163          * Later, when the thread finishes resuming, we potentially disable smap


 325         jz      .norestorepctx
 326         movq    %rcx, %rdi
 327         call    restorepctx
 328 .norestorepctx:
 329 
 330         STORE_INTR_START(%r12)
 331 
 332         /*
 333          * If we came into swtch with the ability to access userland pages, go
 334          * ahead and restore that fact by disabling SMAP.  Clear the indicator
 335          * flag out of paranoia.
 336          */
 337         movq    T_USERACC(%r12), %rax   /* should we disable smap? */
 338         cmpq    $0, %rax                /* skip call when zero */
 339         jz      .nosmap
 340         xorq    %rax, %rax
 341         movq    %rax, T_USERACC(%r12)
 342         call    smap_disable
 343 .nosmap:
 344 
 345         call    ht_mark
 346 
 347         /*
 348          * Restore non-volatile registers, then have spl0 return to the
 349          * resuming thread's PC after first setting the priority as low as
 350          * possible and blocking all interrupt threads that may be active.
 351          */
 352         movq    %r13, %rax      /* save return address */
 353         RESTORE_REGS(%r11)
 354         pushq   %rax            /* push return address for spl0() */
 355         call    __dtrace_probe___sched_on__cpu
 356         jmp     spl0
 357 
 358 resume_return:
 359         /*
 360          * Remove stack frame created in SAVE_REGS()
 361          */
 362         addq    $CLONGSIZE, %rsp
 363         ret
 364         SET_SIZE(_resume_from_idle)
 365         SET_SIZE(resume)
 366 




































































































































































































 367         ENTRY(resume_from_zombie)
 368         movq    %gs:CPU_THREAD, %rax
 369         leaq    resume_from_zombie_return(%rip), %r11
 370 
 371         /*
 372          * Save non-volatile registers, and set return address for current
 373          * thread to resume_from_zombie_return.
 374          *
 375          * %r12 = t (new thread) when done
 376          */
 377         SAVE_REGS(%rax, %r11)
 378 
 379         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 380 
 381         /* clean up the fp unit. It might be left enabled */
 382 
 383 #if defined(__xpv)              /* XXPV XXtclayton */
 384         /*
 385          * Remove this after bringup.
 386          * (Too many #gp's for an instrumented hypervisor.)


 421 
 422         /*
 423          * Put the zombie on death-row.
 424          */
 425         movq    %r13, %rdi
 426         call    reapq_add
 427 
 428         jmp     _resume_from_idle       /* finish job of resume */
 429 
 430 resume_from_zombie_return:
 431         RESTORE_REGS(%r11)              /* restore non-volatile registers */
 432         call    __dtrace_probe___sched_on__cpu
 433 
 434         /*
 435          * Remove stack frame created in SAVE_REGS()
 436          */
 437         addq    $CLONGSIZE, %rsp
 438         ret
 439         SET_SIZE(resume_from_zombie)
 440 


















































































 441         ENTRY(resume_from_intr)
 442         movq    %gs:CPU_THREAD, %rax
 443         leaq    resume_from_intr_return(%rip), %r11
 444 
 445         /*
 446          * Save non-volatile registers, and set return address for current
 447          * thread to resume_from_intr_return.
 448          *
 449          * %r12 = t (new thread) when done
 450          */
 451         SAVE_REGS(%rax, %r11)
 452 
 453         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 454         movq    %r12, %gs:CPU_THREAD    /* set CPU's thread pointer */
 455         mfence                          /* synchronize with mutex_exit() */
 456         movq    T_SP(%r12), %rsp        /* restore resuming thread's sp */
 457         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 458 
 459         /*
 460          * Unlock outgoing thread's mutex dispatched by another processor.
 461          */
 462         xorl    %eax, %eax
 463         xchgb   %al, T_LOCK(%r13)
 464 
 465         STORE_INTR_START(%r12)
 466 
 467         call    ht_mark
 468 
 469         /*
 470          * Restore non-volatile registers, then have spl0 return to the
 471          * resuming thread's PC after first setting the priority as low as
 472          * possible and blocking all interrupt threads that may be active.
 473          */
 474         movq    T_PC(%r12), %rax        /* saved return addr */
 475         RESTORE_REGS(%r11);
 476         pushq   %rax                    /* push return address for spl0() */
 477         call    __dtrace_probe___sched_on__cpu
 478         jmp     spl0
 479 
 480 resume_from_intr_return:
 481         /*
 482          * Remove stack frame created in SAVE_REGS()
 483          */
 484         addq    $CLONGSIZE, %rsp
 485         ret
 486         SET_SIZE(resume_from_intr)
 487 































































 488         ENTRY(thread_start)
 489         popq    %rax            /* start() */
 490         popq    %rdi            /* arg */
 491         popq    %rsi            /* len */
 492         movq    %rsp, %rbp
 493         call    *%rax
 494         call    thread_exit     /* destroy thread if it returns. */
 495         /*NOTREACHED*/
 496         SET_SIZE(thread_start)