1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 /*
  31  * Process switching routines.
  32  */
  33 
  34 #include <sys/asm_linkage.h>
  35 #include <sys/asm_misc.h>
  36 #include <sys/regset.h>
  37 #include <sys/privregs.h>
  38 #include <sys/stack.h>
  39 #include <sys/segments.h>
  40 #include <sys/psw.h>
  41 
  42 #include "assym.h"
  43 
  44 /*
  45  * resume(thread_id_t t);
  46  *
  47  * a thread can only run on one processor at a time. there
  48  * exists a window on MPs where the current thread on one
  49  * processor is capable of being dispatched by another processor.
  50  * some overlap between outgoing and incoming threads can happen
  51  * when they are the same thread. in this case where the threads
  52  * are the same, resume() on one processor will spin on the incoming
  53  * thread until resume() on the other processor has finished with
  54  * the outgoing thread.
  55  *
  56  * The MMU context changes when the resuming thread resides in a different
  57  * process.  Kernel threads are known by resume to reside in process 0.
  58  * The MMU context, therefore, only changes when resuming a thread in
  59  * a process different from curproc.
  60  *
  61  * resume_from_intr() is called when the thread being resumed was not
  62  * passivated by resume (e.g. was interrupted).  This means that the
  63  * resume lock is already held and that a restore context is not needed.
  64  * Also, the MMU context is not changed on the resume in this case.
  65  *
  66  * resume_from_zombie() is the same as resume except the calling thread
  67  * is a zombie and must be put on the deathrow list after the CPU is
  68  * off the stack.
  69  */
  70 
  71 #if LWP_PCB_FPU != 0
  72 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
  73 #endif  /* LWP_PCB_FPU != 0 */
  74 
  75 /*
  76  * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
  77  *
  78  * The stack frame must be created before the save of %rsp so that tracebacks
  79  * of swtch()ed-out processes show the process as having last called swtch().
  80  */
  81 #define SAVE_REGS(thread_t, retaddr)                    \
  82         movq    %rbp, T_RBP(thread_t);                  \
  83         movq    %rbx, T_RBX(thread_t);                  \
  84         movq    %r12, T_R12(thread_t);                  \
  85         movq    %r13, T_R13(thread_t);                  \
  86         movq    %r14, T_R14(thread_t);                  \
  87         movq    %r15, T_R15(thread_t);                  \
  88         pushq   %rbp;                                   \
  89         movq    %rsp, %rbp;                             \
  90         movq    %rsp, T_SP(thread_t);                   \
  91         movq    retaddr, T_PC(thread_t);                \
  92         movq    %rdi, %r12;                             \
  93         call    __dtrace_probe___sched_off__cpu
  94 
  95 /*
  96  * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
  97  *
  98  * We load up %rsp from the label_t as part of the context switch, so
  99  * we don't repeat that here.
 100  *
 101  * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
 102  * already has the effect of putting the stack back the way it was when
 103  * we came in.
 104  */
 105 #define RESTORE_REGS(scratch_reg)                       \
 106         movq    %gs:CPU_THREAD, scratch_reg;            \
 107         movq    T_RBP(scratch_reg), %rbp;               \
 108         movq    T_RBX(scratch_reg), %rbx;               \
 109         movq    T_R12(scratch_reg), %r12;               \
 110         movq    T_R13(scratch_reg), %r13;               \
 111         movq    T_R14(scratch_reg), %r14;               \
 112         movq    T_R15(scratch_reg), %r15
 113 
 114 /*
 115  * Get pointer to a thread's hat structure
 116  */
 117 #define GET_THREAD_HATP(hatp, thread_t, scratch_reg)    \
 118         movq    T_PROCP(thread_t), hatp;                \
 119         movq    P_AS(hatp), scratch_reg;                \
 120         movq    A_HAT(scratch_reg), hatp
 121 
 122 #define TSC_READ()                                      \
 123         call    tsc_read;                               \
 124         movq    %rax, %r14;
 125 
 126 /*
 127  * If we are resuming an interrupt thread, store a timestamp in the thread
 128  * structure.  If an interrupt occurs between tsc_read() and its subsequent
 129  * store, the timestamp will be stale by the time it is stored.  We can detect
 130  * this by doing a compare-and-swap on the thread's timestamp, since any
 131  * interrupt occurring in this window will put a new timestamp in the thread's
 132  * t_intr_start field.
 133  */
 134 #define STORE_INTR_START(thread_t)                      \
 135         testw   $T_INTR_THREAD, T_FLAGS(thread_t);      \
 136         jz      1f;                                     \
 137 0:                                                      \
 138         TSC_READ();                                     \
 139         movq    T_INTR_START(thread_t), %rax;           \
 140         cmpxchgq %r14, T_INTR_START(thread_t);          \
 141         jnz     0b;                                     \
 142 1:
 143 
 144         .global kpti_enable
 145 
 146         ENTRY(resume)
 147         movq    %gs:CPU_THREAD, %rax
 148         leaq    resume_return(%rip), %r11
 149 
 150         /*
 151          * Deal with SMAP here. A thread may be switched out at any point while
 152          * it is executing. The thread could be under on_fault() or it could be
 153          * pre-empted while performing a copy interruption. If this happens and
 154          * we're not in the context of an interrupt which happens to handle
 155          * saving and restoring rflags correctly, we may lose our SMAP related
 156          * state.
 157          *
 158          * To handle this, as part of being switched out, we first save whether
 159          * or not userland access is allowed ($PS_ACHK in rflags) and store that
 160          * in t_useracc on the kthread_t and unconditionally enable SMAP to
 161          * protect the system.
 162          *
 163          * Later, when the thread finishes resuming, we potentially disable smap
 164          * if PS_ACHK was present in rflags. See uts/intel/ia32/ml/copy.s for
 165          * more information on rflags and SMAP.
 166          */
 167         pushfq
 168         popq    %rsi
 169         andq    $PS_ACHK, %rsi
 170         movq    %rsi, T_USERACC(%rax)
 171         call    smap_enable
 172 
 173         /*
 174          * Take a moment to potentially clear the RSB buffer. This is done to
 175          * prevent various Spectre variant 2 and SpectreRSB attacks. This may
 176          * not be sufficient. Please see uts/intel/ia32/ml/retpoline.s for more
 177          * information about this.
 178          */
 179         call    x86_rsb_stuff
 180 
 181         /*
 182          * Save non-volatile registers, and set return address for current
 183          * thread to resume_return.
 184          *
 185          * %r12 = t (new thread) when done
 186          */
 187         SAVE_REGS(%rax, %r11)
 188 
 189 
 190         LOADCPU(%r15)                           /* %r15 = CPU */
 191         movq    CPU_THREAD(%r15), %r13          /* %r13 = curthread */
 192 
 193         /*
 194          * Call savectx if thread has installed context ops.
 195          *
 196          * Note that if we have floating point context, the save op
 197          * (either fpsave_begin or fpxsave_begin) will issue the
 198          * async save instruction (fnsave or fxsave respectively)
 199          * that we fwait for below.
 200          */
 201         cmpq    $0, T_CTX(%r13)         /* should current thread savectx? */
 202         je      .nosavectx              /* skip call when zero */
 203 
 204         movq    %r13, %rdi              /* arg = thread pointer */
 205         call    savectx                 /* call ctx ops */
 206 .nosavectx:
 207 
 208         /*
 209          * Call savepctx if process has installed context ops.
 210          */
 211         movq    T_PROCP(%r13), %r14     /* %r14 = proc */
 212         cmpq    $0, P_PCTX(%r14)         /* should current thread savectx? */
 213         je      .nosavepctx              /* skip call when zero */
 214 
 215         movq    %r14, %rdi              /* arg = proc pointer */
 216         call    savepctx                 /* call ctx ops */
 217 .nosavepctx:
 218 
 219         /*
 220          * Temporarily switch to the idle thread's stack
 221          */
 222         movq    CPU_IDLE_THREAD(%r15), %rax     /* idle thread pointer */
 223 
 224         /*
 225          * Set the idle thread as the current thread
 226          */
 227         movq    T_SP(%rax), %rsp        /* It is safe to set rsp */
 228         movq    %rax, CPU_THREAD(%r15)
 229 
 230         /*
 231          * Switch in the hat context for the new thread
 232          *
 233          */
 234         GET_THREAD_HATP(%rdi, %r12, %r11)
 235         call    hat_switch
 236 
 237         /*
 238          * Clear and unlock previous thread's t_lock
 239          * to allow it to be dispatched by another processor.
 240          */
 241         movb    $0, T_LOCK(%r13)
 242 
 243         /*
 244          * IMPORTANT: Registers at this point must be:
 245          *       %r12 = new thread
 246          *
 247          * Here we are in the idle thread, have dropped the old thread.
 248          */
 249         ALTENTRY(_resume_from_idle)
 250         /*
 251          * spin until dispatched thread's mutex has
 252          * been unlocked. this mutex is unlocked when
 253          * it becomes safe for the thread to run.
 254          */
 255 .lock_thread_mutex:
 256         lock
 257         btsl    $0, T_LOCK(%r12)        /* attempt to lock new thread's mutex */
 258         jnc     .thread_mutex_locked    /* got it */
 259 
 260 .spin_thread_mutex:
 261         pause
 262         cmpb    $0, T_LOCK(%r12)        /* check mutex status */
 263         jz      .lock_thread_mutex      /* clear, retry lock */
 264         jmp     .spin_thread_mutex      /* still locked, spin... */
 265 
 266 .thread_mutex_locked:
 267         /*
 268          * Fix CPU structure to indicate new running thread.
 269          * Set pointer in new thread to the CPU structure.
 270          */
 271         LOADCPU(%r13)                   /* load current CPU pointer */
 272         cmpq    %r13, T_CPU(%r12)
 273         je      .setup_cpu
 274 
 275         /* cp->cpu_stats.sys.cpumigrate++ */
 276         incq    CPU_STATS_SYS_CPUMIGRATE(%r13)
 277         movq    %r13, T_CPU(%r12)       /* set new thread's CPU pointer */
 278 
 279 .setup_cpu:
 280         /*
 281          * Setup rsp0 (kernel stack) in TSS to curthread's saved regs
 282          * structure.  If this thread doesn't have a regs structure above
 283          * the stack -- that is, if lwp_stk_init() was never called for the
 284          * thread -- this will set rsp0 to the wrong value, but it's harmless
 285          * as it's a kernel thread, and it won't actually attempt to implicitly
 286          * use the rsp0 via a privilege change.
 287          *
 288          * Note that when we have KPTI enabled on amd64, we never use this
 289          * value at all (since all the interrupts have an IST set).
 290          */
 291         movq    CPU_TSS(%r13), %r14
 292 #if !defined(__xpv)
 293         cmpq    $1, kpti_enable
 294         jne     1f
 295         leaq    CPU_KPTI_TR_RSP(%r13), %rax
 296         jmp     2f
 297 1:
 298         movq    T_STACK(%r12), %rax
 299         addq    $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
 300 2:
 301         movq    %rax, TSS_RSP0(%r14)
 302 #else
 303         movq    T_STACK(%r12), %rax
 304         addq    $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
 305         movl    $KDS_SEL, %edi
 306         movq    %rax, %rsi
 307         call    HYPERVISOR_stack_switch
 308 #endif  /* __xpv */
 309 
 310         movq    %r12, CPU_THREAD(%r13)  /* set CPU's thread pointer */
 311         mfence                          /* synchronize with mutex_exit() */
 312         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 313         movq    T_LWP(%r12), %rax       /* set associated lwp to  */
 314         movq    %rax, CPU_LWP(%r13)     /* CPU's lwp ptr */
 315 
 316         movq    T_SP(%r12), %rsp        /* switch to outgoing thread's stack */
 317         movq    T_PC(%r12), %r13        /* saved return addr */
 318 
 319         /*
 320          * Call restorectx if context ops have been installed.
 321          */
 322         cmpq    $0, T_CTX(%r12)         /* should resumed thread restorectx? */
 323         jz      .norestorectx           /* skip call when zero */
 324         movq    %r12, %rdi              /* arg = thread pointer */
 325         call    restorectx              /* call ctx ops */
 326 .norestorectx:
 327 
 328         /*
 329          * Call restorepctx if context ops have been installed for the proc.
 330          */
 331         movq    T_PROCP(%r12), %rcx
 332         cmpq    $0, P_PCTX(%rcx)
 333         jz      .norestorepctx
 334         movq    %rcx, %rdi
 335         call    restorepctx
 336 .norestorepctx:
 337 
 338         STORE_INTR_START(%r12)
 339 
 340         /*
 341          * If we came into swtch with the ability to access userland pages, go
 342          * ahead and restore that fact by disabling SMAP.  Clear the indicator
 343          * flag out of paranoia.
 344          */
 345         movq    T_USERACC(%r12), %rax   /* should we disable smap? */
 346         cmpq    $0, %rax                /* skip call when zero */
 347         jz      .nosmap
 348         xorq    %rax, %rax
 349         movq    %rax, T_USERACC(%r12)
 350         call    smap_disable
 351 .nosmap:
 352 
 353         call    smt_mark
 354 
 355         /*
 356          * Restore non-volatile registers, then have spl0 return to the
 357          * resuming thread's PC after first setting the priority as low as
 358          * possible and blocking all interrupt threads that may be active.
 359          */
 360         movq    %r13, %rax      /* save return address */
 361         RESTORE_REGS(%r11)
 362         pushq   %rax            /* push return address for spl0() */
 363         call    __dtrace_probe___sched_on__cpu
 364         jmp     spl0
 365 
 366 resume_return:
 367         /*
 368          * Remove stack frame created in SAVE_REGS()
 369          */
 370         addq    $CLONGSIZE, %rsp
 371         ret
 372         SET_SIZE(_resume_from_idle)
 373         SET_SIZE(resume)
 374 
 375         ENTRY(resume_from_zombie)
 376         movq    %gs:CPU_THREAD, %rax
 377         leaq    resume_from_zombie_return(%rip), %r11
 378 
 379         /*
 380          * Save non-volatile registers, and set return address for current
 381          * thread to resume_from_zombie_return.
 382          *
 383          * %r12 = t (new thread) when done
 384          */
 385         SAVE_REGS(%rax, %r11)
 386 
 387         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 388 
 389         /* clean up the fp unit. It might be left enabled */
 390 
 391 #if defined(__xpv)              /* XXPV XXtclayton */
 392         /*
 393          * Remove this after bringup.
 394          * (Too many #gp's for an instrumented hypervisor.)
 395          */
 396         STTS(%rax)
 397 #else
 398         movq    %cr0, %rax
 399         testq   $CR0_TS, %rax
 400         jnz     .zfpu_disabled          /* if TS already set, nothing to do */
 401         fninit                          /* init fpu & discard pending error */
 402         orq     $CR0_TS, %rax
 403         movq    %rax, %cr0
 404 .zfpu_disabled:
 405 
 406 #endif  /* __xpv */
 407 
 408         /*
 409          * Temporarily switch to the idle thread's stack so that the zombie
 410          * thread's stack can be reclaimed by the reaper.
 411          */
 412         movq    %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
 413         movq    T_SP(%rax), %rsp        /* get onto idle thread stack */
 414 
 415         /*
 416          * Sigh. If the idle thread has never run thread_start()
 417          * then t_sp is mis-aligned by thread_load().
 418          */
 419         andq    $_BITNOT(STACK_ALIGN-1), %rsp
 420 
 421         /*
 422          * Set the idle thread as the current thread.
 423          */
 424         movq    %rax, %gs:CPU_THREAD
 425 
 426         /* switch in the hat context for the new thread */
 427         GET_THREAD_HATP(%rdi, %r12, %r11)
 428         call    hat_switch
 429 
 430         /*
 431          * Put the zombie on death-row.
 432          */
 433         movq    %r13, %rdi
 434         call    reapq_add
 435 
 436         jmp     _resume_from_idle       /* finish job of resume */
 437 
 438 resume_from_zombie_return:
 439         RESTORE_REGS(%r11)              /* restore non-volatile registers */
 440         call    __dtrace_probe___sched_on__cpu
 441 
 442         /*
 443          * Remove stack frame created in SAVE_REGS()
 444          */
 445         addq    $CLONGSIZE, %rsp
 446         ret
 447         SET_SIZE(resume_from_zombie)
 448 
 449         ENTRY(resume_from_intr)
 450         movq    %gs:CPU_THREAD, %rax
 451         leaq    resume_from_intr_return(%rip), %r11
 452 
 453         /*
 454          * Save non-volatile registers, and set return address for current
 455          * thread to resume_from_intr_return.
 456          *
 457          * %r12 = t (new thread) when done
 458          */
 459         SAVE_REGS(%rax, %r11)
 460 
 461         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 462         movq    %r12, %gs:CPU_THREAD    /* set CPU's thread pointer */
 463         mfence                          /* synchronize with mutex_exit() */
 464         movq    T_SP(%r12), %rsp        /* restore resuming thread's sp */
 465         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 466 
 467         /*
 468          * Unlock outgoing thread's mutex dispatched by another processor.
 469          */
 470         xorl    %eax, %eax
 471         xchgb   %al, T_LOCK(%r13)
 472 
 473         STORE_INTR_START(%r12)
 474 
 475         call    smt_mark
 476 
 477         /*
 478          * Restore non-volatile registers, then have spl0 return to the
 479          * resuming thread's PC after first setting the priority as low as
 480          * possible and blocking all interrupt threads that may be active.
 481          */
 482         movq    T_PC(%r12), %rax        /* saved return addr */
 483         RESTORE_REGS(%r11);
 484         pushq   %rax                    /* push return address for spl0() */
 485         call    __dtrace_probe___sched_on__cpu
 486         jmp     spl0
 487 
 488 resume_from_intr_return:
 489         /*
 490          * Remove stack frame created in SAVE_REGS()
 491          */
 492         addq    $CLONGSIZE, %rsp
 493         ret
 494         SET_SIZE(resume_from_intr)
 495 
 496         ENTRY(thread_start)
 497         popq    %rax            /* start() */
 498         popq    %rdi            /* arg */
 499         popq    %rsi            /* len */
 500         movq    %rsp, %rbp
 501         INDIRECT_CALL_REG(rax)
 502         call    thread_exit     /* destroy thread if it returns. */
 503         /*NOTREACHED*/
 504         SET_SIZE(thread_start)