1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 /*
  31  * Process switching routines.
  32  */
  33 
  34 #include <sys/asm_linkage.h>
  35 #include <sys/asm_misc.h>
  36 #include <sys/regset.h>
  37 #include <sys/privregs.h>
  38 #include <sys/stack.h>
  39 #include <sys/segments.h>
  40 #include <sys/psw.h>
  41 
  42 #include "assym.h"
  43 
  44 /*
  45  * resume(thread_id_t t);
  46  *
  47  * a thread can only run on one processor at a time. there
  48  * exists a window on MPs where the current thread on one
  49  * processor is capable of being dispatched by another processor.
  50  * some overlap between outgoing and incoming threads can happen
  51  * when they are the same thread. in this case where the threads
  52  * are the same, resume() on one processor will spin on the incoming
  53  * thread until resume() on the other processor has finished with
  54  * the outgoing thread.
  55  *
  56  * The MMU context changes when the resuming thread resides in a different
  57  * process.  Kernel threads are known by resume to reside in process 0.
  58  * The MMU context, therefore, only changes when resuming a thread in
  59  * a process different from curproc.
  60  *
  61  * resume_from_intr() is called when the thread being resumed was not
  62  * passivated by resume (e.g. was interrupted).  This means that the
  63  * resume lock is already held and that a restore context is not needed.
  64  * Also, the MMU context is not changed on the resume in this case.
  65  *
  66  * resume_from_zombie() is the same as resume except the calling thread
  67  * is a zombie and must be put on the deathrow list after the CPU is
  68  * off the stack.
  69  */
  70 
  71 #if LWP_PCB_FPU != 0
  72 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
  73 #endif  /* LWP_PCB_FPU != 0 */
  74 
  75 /*
  76  * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
  77  *
  78  * The stack frame must be created before the save of %rsp so that tracebacks
  79  * of swtch()ed-out processes show the process as having last called swtch().
  80  */
  81 #define SAVE_REGS(thread_t, retaddr)                    \
  82         movq    %rbp, T_RBP(thread_t);                  \
  83         movq    %rbx, T_RBX(thread_t);                  \
  84         movq    %r12, T_R12(thread_t);                  \
  85         movq    %r13, T_R13(thread_t);                  \
  86         movq    %r14, T_R14(thread_t);                  \
  87         movq    %r15, T_R15(thread_t);                  \
  88         pushq   %rbp;                                   \
  89         movq    %rsp, %rbp;                             \
  90         movq    %rsp, T_SP(thread_t);                   \
  91         movq    retaddr, T_PC(thread_t);                \
  92         movq    %rdi, %r12;                             \
  93         call    __dtrace_probe___sched_off__cpu
  94 
  95 /*
  96  * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
  97  *
  98  * We load up %rsp from the label_t as part of the context switch, so
  99  * we don't repeat that here.
 100  *
 101  * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
 102  * already has the effect of putting the stack back the way it was when
 103  * we came in.
 104  */
 105 #define RESTORE_REGS(scratch_reg)                       \
 106         movq    %gs:CPU_THREAD, scratch_reg;            \
 107         movq    T_RBP(scratch_reg), %rbp;               \
 108         movq    T_RBX(scratch_reg), %rbx;               \
 109         movq    T_R12(scratch_reg), %r12;               \
 110         movq    T_R13(scratch_reg), %r13;               \
 111         movq    T_R14(scratch_reg), %r14;               \
 112         movq    T_R15(scratch_reg), %r15
 113 
 114 /*
 115  * Get pointer to a thread's hat structure
 116  */
 117 #define GET_THREAD_HATP(hatp, thread_t, scratch_reg)    \
 118         movq    T_PROCP(thread_t), hatp;                \
 119         movq    P_AS(hatp), scratch_reg;                \
 120         movq    A_HAT(scratch_reg), hatp
 121 
 122 #define TSC_READ()                                      \
 123         call    tsc_read;                               \
 124         movq    %rax, %r14;
 125 
 126 /*
 127  * If we are resuming an interrupt thread, store a timestamp in the thread
 128  * structure.  If an interrupt occurs between tsc_read() and its subsequent
 129  * store, the timestamp will be stale by the time it is stored.  We can detect
 130  * this by doing a compare-and-swap on the thread's timestamp, since any
 131  * interrupt occurring in this window will put a new timestamp in the thread's
 132  * t_intr_start field.
 133  */
 134 #define STORE_INTR_START(thread_t)                      \
 135         testw   $T_INTR_THREAD, T_FLAGS(thread_t);      \
 136         jz      1f;                                     \
 137 0:                                                      \
 138         TSC_READ();                                     \
 139         movq    T_INTR_START(thread_t), %rax;           \
 140         cmpxchgq %r14, T_INTR_START(thread_t);          \
 141         jnz     0b;                                     \
 142 1:
 143 
 144         .global kpti_enable
 145 
 146         ENTRY(resume)
 147         movq    %gs:CPU_THREAD, %rax
 148         leaq    resume_return(%rip), %r11
 149 
 150         /*
 151          * Deal with SMAP here. A thread may be switched out at any point while
 152          * it is executing. The thread could be under on_fault() or it could be
 153          * pre-empted while performing a copy interruption. If this happens and
 154          * we're not in the context of an interrupt which happens to handle
 155          * saving and restoring rflags correctly, we may lose our SMAP related
 156          * state.
 157          *
 158          * To handle this, as part of being switched out, we first save whether
 159          * or not userland access is allowed ($PS_ACHK in rflags) and store that
 160          * in t_useracc on the kthread_t and unconditionally enable SMAP to
 161          * protect the system.
 162          *
 163          * Later, when the thread finishes resuming, we potentially disable smap
 164          * if PS_ACHK was present in rflags. See uts/intel/ia32/ml/copy.s for
 165          * more information on rflags and SMAP.
 166          */
 167         pushfq
 168         popq    %rsi
 169         andq    $PS_ACHK, %rsi
 170         movq    %rsi, T_USERACC(%rax)
 171         call    smap_enable
 172 
 173         /*
 174          * Save non-volatile registers, and set return address for current
 175          * thread to resume_return.
 176          *
 177          * %r12 = t (new thread) when done
 178          */
 179         SAVE_REGS(%rax, %r11)
 180 
 181 
 182         LOADCPU(%r15)                           /* %r15 = CPU */
 183         movq    CPU_THREAD(%r15), %r13          /* %r13 = curthread */
 184 
 185         /*
 186          * Call savectx if thread has installed context ops.
 187          *
 188          * Note that if we have floating point context, the save op
 189          * (either fpsave_begin or fpxsave_begin) will issue the
 190          * async save instruction (fnsave or fxsave respectively)
 191          * that we fwait for below.
 192          */
 193         cmpq    $0, T_CTX(%r13)         /* should current thread savectx? */
 194         je      .nosavectx              /* skip call when zero */
 195 
 196         movq    %r13, %rdi              /* arg = thread pointer */
 197         call    savectx                 /* call ctx ops */
 198 .nosavectx:
 199 
 200         /*
 201          * Call savepctx if process has installed context ops.
 202          */
 203         movq    T_PROCP(%r13), %r14     /* %r14 = proc */
 204         cmpq    $0, P_PCTX(%r14)         /* should current thread savectx? */
 205         je      .nosavepctx              /* skip call when zero */
 206 
 207         movq    %r14, %rdi              /* arg = proc pointer */
 208         call    savepctx                 /* call ctx ops */
 209 .nosavepctx:
 210 
 211         /*
 212          * Temporarily switch to the idle thread's stack
 213          */
 214         movq    CPU_IDLE_THREAD(%r15), %rax     /* idle thread pointer */
 215 
 216         /*
 217          * Set the idle thread as the current thread
 218          */
 219         movq    T_SP(%rax), %rsp        /* It is safe to set rsp */
 220         movq    %rax, CPU_THREAD(%r15)
 221 
 222         /*
 223          * Switch in the hat context for the new thread
 224          *
 225          */
 226         GET_THREAD_HATP(%rdi, %r12, %r11)
 227         call    hat_switch
 228 
 229         /*
 230          * Clear and unlock previous thread's t_lock
 231          * to allow it to be dispatched by another processor.
 232          */
 233         movb    $0, T_LOCK(%r13)
 234 
 235         /*
 236          * IMPORTANT: Registers at this point must be:
 237          *       %r12 = new thread
 238          *
 239          * Here we are in the idle thread, have dropped the old thread.
 240          */
 241         ALTENTRY(_resume_from_idle)
 242         /*
 243          * spin until dispatched thread's mutex has
 244          * been unlocked. this mutex is unlocked when
 245          * it becomes safe for the thread to run.
 246          */
 247 .lock_thread_mutex:
 248         lock
 249         btsl    $0, T_LOCK(%r12)        /* attempt to lock new thread's mutex */
 250         jnc     .thread_mutex_locked    /* got it */
 251 
 252 .spin_thread_mutex:
 253         pause
 254         cmpb    $0, T_LOCK(%r12)        /* check mutex status */
 255         jz      .lock_thread_mutex      /* clear, retry lock */
 256         jmp     .spin_thread_mutex      /* still locked, spin... */
 257 
 258 .thread_mutex_locked:
 259         /*
 260          * Fix CPU structure to indicate new running thread.
 261          * Set pointer in new thread to the CPU structure.
 262          */
 263         LOADCPU(%r13)                   /* load current CPU pointer */
 264         cmpq    %r13, T_CPU(%r12)
 265         je      .setup_cpu
 266 
 267         /* cp->cpu_stats.sys.cpumigrate++ */
 268         incq    CPU_STATS_SYS_CPUMIGRATE(%r13)
 269         movq    %r13, T_CPU(%r12)       /* set new thread's CPU pointer */
 270 
 271 .setup_cpu:
 272         /*
 273          * Setup rsp0 (kernel stack) in TSS to curthread's saved regs
 274          * structure.  If this thread doesn't have a regs structure above
 275          * the stack -- that is, if lwp_stk_init() was never called for the
 276          * thread -- this will set rsp0 to the wrong value, but it's harmless
 277          * as it's a kernel thread, and it won't actually attempt to implicitly
 278          * use the rsp0 via a privilege change.
 279          *
 280          * Note that when we have KPTI enabled on amd64, we never use this
 281          * value at all (since all the interrupts have an IST set).
 282          */
 283         movq    CPU_TSS(%r13), %r14
 284 #if !defined(__xpv)
 285         cmpq    $1, kpti_enable
 286         jne     1f
 287         leaq    CPU_KPTI_TR_RSP(%r13), %rax
 288         jmp     2f
 289 1:
 290         movq    T_STACK(%r12), %rax
 291         addq    $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
 292 2:
 293         movq    %rax, TSS_RSP0(%r14)
 294 #else
 295         movq    T_STACK(%r12), %rax
 296         addq    $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
 297         movl    $KDS_SEL, %edi
 298         movq    %rax, %rsi
 299         call    HYPERVISOR_stack_switch
 300 #endif  /* __xpv */
 301 
 302         movq    %r12, CPU_THREAD(%r13)  /* set CPU's thread pointer */
 303         mfence                          /* synchronize with mutex_exit() */
 304         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 305         movq    T_LWP(%r12), %rax       /* set associated lwp to  */
 306         movq    %rax, CPU_LWP(%r13)     /* CPU's lwp ptr */
 307 
 308         movq    T_SP(%r12), %rsp        /* switch to outgoing thread's stack */
 309         movq    T_PC(%r12), %r13        /* saved return addr */
 310 
 311         /*
 312          * Call restorectx if context ops have been installed.
 313          */
 314         cmpq    $0, T_CTX(%r12)         /* should resumed thread restorectx? */
 315         jz      .norestorectx           /* skip call when zero */
 316         movq    %r12, %rdi              /* arg = thread pointer */
 317         call    restorectx              /* call ctx ops */
 318 .norestorectx:
 319 
 320         /*
 321          * Call restorepctx if context ops have been installed for the proc.
 322          */
 323         movq    T_PROCP(%r12), %rcx
 324         cmpq    $0, P_PCTX(%rcx)
 325         jz      .norestorepctx
 326         movq    %rcx, %rdi
 327         call    restorepctx
 328 .norestorepctx:
 329 
 330         STORE_INTR_START(%r12)
 331 
 332         /*
 333          * If we came into swtch with the ability to access userland pages, go
 334          * ahead and restore that fact by disabling SMAP.  Clear the indicator
 335          * flag out of paranoia.
 336          */
 337         movq    T_USERACC(%r12), %rax   /* should we disable smap? */
 338         cmpq    $0, %rax                /* skip call when zero */
 339         jz      .nosmap
 340         xorq    %rax, %rax
 341         movq    %rax, T_USERACC(%r12)
 342         call    smap_disable
 343 .nosmap:
 344 
 345         call    ht_mark
 346 
 347         /*
 348          * Restore non-volatile registers, then have spl0 return to the
 349          * resuming thread's PC after first setting the priority as low as
 350          * possible and blocking all interrupt threads that may be active.
 351          */
 352         movq    %r13, %rax      /* save return address */
 353         RESTORE_REGS(%r11)
 354         pushq   %rax            /* push return address for spl0() */
 355         call    __dtrace_probe___sched_on__cpu
 356         jmp     spl0
 357 
 358 resume_return:
 359         /*
 360          * Remove stack frame created in SAVE_REGS()
 361          */
 362         addq    $CLONGSIZE, %rsp
 363         ret
 364         SET_SIZE(_resume_from_idle)
 365         SET_SIZE(resume)
 366 
 367         ENTRY(resume_from_zombie)
 368         movq    %gs:CPU_THREAD, %rax
 369         leaq    resume_from_zombie_return(%rip), %r11
 370 
 371         /*
 372          * Save non-volatile registers, and set return address for current
 373          * thread to resume_from_zombie_return.
 374          *
 375          * %r12 = t (new thread) when done
 376          */
 377         SAVE_REGS(%rax, %r11)
 378 
 379         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 380 
 381         /* clean up the fp unit. It might be left enabled */
 382 
 383 #if defined(__xpv)              /* XXPV XXtclayton */
 384         /*
 385          * Remove this after bringup.
 386          * (Too many #gp's for an instrumented hypervisor.)
 387          */
 388         STTS(%rax)
 389 #else
 390         movq    %cr0, %rax
 391         testq   $CR0_TS, %rax
 392         jnz     .zfpu_disabled          /* if TS already set, nothing to do */
 393         fninit                          /* init fpu & discard pending error */
 394         orq     $CR0_TS, %rax
 395         movq    %rax, %cr0
 396 .zfpu_disabled:
 397 
 398 #endif  /* __xpv */
 399 
 400         /*
 401          * Temporarily switch to the idle thread's stack so that the zombie
 402          * thread's stack can be reclaimed by the reaper.
 403          */
 404         movq    %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
 405         movq    T_SP(%rax), %rsp        /* get onto idle thread stack */
 406 
 407         /*
 408          * Sigh. If the idle thread has never run thread_start()
 409          * then t_sp is mis-aligned by thread_load().
 410          */
 411         andq    $_BITNOT(STACK_ALIGN-1), %rsp
 412 
 413         /*
 414          * Set the idle thread as the current thread.
 415          */
 416         movq    %rax, %gs:CPU_THREAD
 417 
 418         /* switch in the hat context for the new thread */
 419         GET_THREAD_HATP(%rdi, %r12, %r11)
 420         call    hat_switch
 421 
 422         /*
 423          * Put the zombie on death-row.
 424          */
 425         movq    %r13, %rdi
 426         call    reapq_add
 427 
 428         jmp     _resume_from_idle       /* finish job of resume */
 429 
 430 resume_from_zombie_return:
 431         RESTORE_REGS(%r11)              /* restore non-volatile registers */
 432         call    __dtrace_probe___sched_on__cpu
 433 
 434         /*
 435          * Remove stack frame created in SAVE_REGS()
 436          */
 437         addq    $CLONGSIZE, %rsp
 438         ret
 439         SET_SIZE(resume_from_zombie)
 440 
 441         ENTRY(resume_from_intr)
 442         movq    %gs:CPU_THREAD, %rax
 443         leaq    resume_from_intr_return(%rip), %r11
 444 
 445         /*
 446          * Save non-volatile registers, and set return address for current
 447          * thread to resume_from_intr_return.
 448          *
 449          * %r12 = t (new thread) when done
 450          */
 451         SAVE_REGS(%rax, %r11)
 452 
 453         movq    %gs:CPU_THREAD, %r13    /* %r13 = curthread */
 454         movq    %r12, %gs:CPU_THREAD    /* set CPU's thread pointer */
 455         mfence                          /* synchronize with mutex_exit() */
 456         movq    T_SP(%r12), %rsp        /* restore resuming thread's sp */
 457         xorl    %ebp, %ebp              /* make $<threadlist behave better */
 458 
 459         /*
 460          * Unlock outgoing thread's mutex dispatched by another processor.
 461          */
 462         xorl    %eax, %eax
 463         xchgb   %al, T_LOCK(%r13)
 464 
 465         STORE_INTR_START(%r12)
 466 
 467         call    ht_mark
 468 
 469         /*
 470          * Restore non-volatile registers, then have spl0 return to the
 471          * resuming thread's PC after first setting the priority as low as
 472          * possible and blocking all interrupt threads that may be active.
 473          */
 474         movq    T_PC(%r12), %rax        /* saved return addr */
 475         RESTORE_REGS(%r11);
 476         pushq   %rax                    /* push return address for spl0() */
 477         call    __dtrace_probe___sched_on__cpu
 478         jmp     spl0
 479 
 480 resume_from_intr_return:
 481         /*
 482          * Remove stack frame created in SAVE_REGS()
 483          */
 484         addq    $CLONGSIZE, %rsp
 485         ret
 486         SET_SIZE(resume_from_intr)
 487 
 488         ENTRY(thread_start)
 489         popq    %rax            /* start() */
 490         popq    %rdi            /* arg */
 491         popq    %rsi            /* len */
 492         movq    %rsp, %rbp
 493         call    *%rax
 494         call    thread_exit     /* destroy thread if it returns. */
 495         /*NOTREACHED*/
 496         SET_SIZE(thread_start)