1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2019 Joyent, Inc. 28 */ 29 30 /* 31 * Process switching routines. 32 */ 33 34 #include <sys/asm_linkage.h> 35 #include <sys/asm_misc.h> 36 #include <sys/regset.h> 37 #include <sys/privregs.h> 38 #include <sys/stack.h> 39 #include <sys/segments.h> 40 #include <sys/psw.h> 41 42 #include "assym.h" 43 44 /* 45 * resume(thread_id_t t); 46 * 47 * a thread can only run on one processor at a time. there 48 * exists a window on MPs where the current thread on one 49 * processor is capable of being dispatched by another processor. 50 * some overlap between outgoing and incoming threads can happen 51 * when they are the same thread. in this case where the threads 52 * are the same, resume() on one processor will spin on the incoming 53 * thread until resume() on the other processor has finished with 54 * the outgoing thread. 55 * 56 * The MMU context changes when the resuming thread resides in a different 57 * process. Kernel threads are known by resume to reside in process 0. 58 * The MMU context, therefore, only changes when resuming a thread in 59 * a process different from curproc. 60 * 61 * resume_from_intr() is called when the thread being resumed was not 62 * passivated by resume (e.g. was interrupted). This means that the 63 * resume lock is already held and that a restore context is not needed. 64 * Also, the MMU context is not changed on the resume in this case. 65 * 66 * resume_from_zombie() is the same as resume except the calling thread 67 * is a zombie and must be put on the deathrow list after the CPU is 68 * off the stack. 69 */ 70 71 #if LWP_PCB_FPU != 0 72 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work 73 #endif /* LWP_PCB_FPU != 0 */ 74 75 /* 76 * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) 77 * 78 * The stack frame must be created before the save of %rsp so that tracebacks 79 * of swtch()ed-out processes show the process as having last called swtch(). 80 */ 81 #define SAVE_REGS(thread_t, retaddr) \ 82 movq %rbp, T_RBP(thread_t); \ 83 movq %rbx, T_RBX(thread_t); \ 84 movq %r12, T_R12(thread_t); \ 85 movq %r13, T_R13(thread_t); \ 86 movq %r14, T_R14(thread_t); \ 87 movq %r15, T_R15(thread_t); \ 88 pushq %rbp; \ 89 movq %rsp, %rbp; \ 90 movq %rsp, T_SP(thread_t); \ 91 movq retaddr, T_PC(thread_t); \ 92 movq %rdi, %r12; \ 93 call __dtrace_probe___sched_off__cpu 94 95 /* 96 * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) 97 * 98 * We load up %rsp from the label_t as part of the context switch, so 99 * we don't repeat that here. 100 * 101 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t 102 * already has the effect of putting the stack back the way it was when 103 * we came in. 104 */ 105 #define RESTORE_REGS(scratch_reg) \ 106 movq %gs:CPU_THREAD, scratch_reg; \ 107 movq T_RBP(scratch_reg), %rbp; \ 108 movq T_RBX(scratch_reg), %rbx; \ 109 movq T_R12(scratch_reg), %r12; \ 110 movq T_R13(scratch_reg), %r13; \ 111 movq T_R14(scratch_reg), %r14; \ 112 movq T_R15(scratch_reg), %r15 113 114 /* 115 * Get pointer to a thread's hat structure 116 */ 117 #define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ 118 movq T_PROCP(thread_t), hatp; \ 119 movq P_AS(hatp), scratch_reg; \ 120 movq A_HAT(scratch_reg), hatp 121 122 #define TSC_READ() \ 123 call tsc_read; \ 124 movq %rax, %r14; 125 126 /* 127 * If we are resuming an interrupt thread, store a timestamp in the thread 128 * structure. If an interrupt occurs between tsc_read() and its subsequent 129 * store, the timestamp will be stale by the time it is stored. We can detect 130 * this by doing a compare-and-swap on the thread's timestamp, since any 131 * interrupt occurring in this window will put a new timestamp in the thread's 132 * t_intr_start field. 133 */ 134 #define STORE_INTR_START(thread_t) \ 135 testw $T_INTR_THREAD, T_FLAGS(thread_t); \ 136 jz 1f; \ 137 0: \ 138 TSC_READ(); \ 139 movq T_INTR_START(thread_t), %rax; \ 140 cmpxchgq %r14, T_INTR_START(thread_t); \ 141 jnz 0b; \ 142 1: 143 144 .global kpti_enable 145 146 ENTRY(resume) 147 movq %gs:CPU_THREAD, %rax 148 leaq resume_return(%rip), %r11 149 150 /* 151 * Deal with SMAP here. A thread may be switched out at any point while 152 * it is executing. The thread could be under on_fault() or it could be 153 * pre-empted while performing a copy interruption. If this happens and 154 * we're not in the context of an interrupt which happens to handle 155 * saving and restoring rflags correctly, we may lose our SMAP related 156 * state. 157 * 158 * To handle this, as part of being switched out, we first save whether 159 * or not userland access is allowed ($PS_ACHK in rflags) and store that 160 * in t_useracc on the kthread_t and unconditionally enable SMAP to 161 * protect the system. 162 * 163 * Later, when the thread finishes resuming, we potentially disable smap 164 * if PS_ACHK was present in rflags. See uts/intel/ia32/ml/copy.s for 165 * more information on rflags and SMAP. 166 */ 167 pushfq 168 popq %rsi 169 andq $PS_ACHK, %rsi 170 movq %rsi, T_USERACC(%rax) 171 call smap_enable 172 173 /* 174 * Take a moment to potentially clear the RSB buffer. This is done to 175 * prevent various Spectre variant 2 and SpectreRSB attacks. This may 176 * not be sufficient. Please see uts/intel/ia32/ml/retpoline.s for more 177 * information about this. 178 */ 179 call x86_rsb_stuff 180 181 /* 182 * Save non-volatile registers, and set return address for current 183 * thread to resume_return. 184 * 185 * %r12 = t (new thread) when done 186 */ 187 SAVE_REGS(%rax, %r11) 188 189 190 LOADCPU(%r15) /* %r15 = CPU */ 191 movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */ 192 193 /* 194 * Call savectx if thread has installed context ops. 195 * 196 * Note that if we have floating point context, the save op 197 * (either fpsave_begin or fpxsave_begin) will issue the 198 * async save instruction (fnsave or fxsave respectively) 199 * that we fwait for below. 200 */ 201 cmpq $0, T_CTX(%r13) /* should current thread savectx? */ 202 je .nosavectx /* skip call when zero */ 203 204 movq %r13, %rdi /* arg = thread pointer */ 205 call savectx /* call ctx ops */ 206 .nosavectx: 207 208 /* 209 * Call savepctx if process has installed context ops. 210 */ 211 movq T_PROCP(%r13), %r14 /* %r14 = proc */ 212 cmpq $0, P_PCTX(%r14) /* should current thread savectx? */ 213 je .nosavepctx /* skip call when zero */ 214 215 movq %r14, %rdi /* arg = proc pointer */ 216 call savepctx /* call ctx ops */ 217 .nosavepctx: 218 219 /* 220 * Temporarily switch to the idle thread's stack 221 */ 222 movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */ 223 224 /* 225 * Set the idle thread as the current thread 226 */ 227 movq T_SP(%rax), %rsp /* It is safe to set rsp */ 228 movq %rax, CPU_THREAD(%r15) 229 230 /* 231 * Switch in the hat context for the new thread 232 * 233 */ 234 GET_THREAD_HATP(%rdi, %r12, %r11) 235 call hat_switch 236 237 /* 238 * Clear and unlock previous thread's t_lock 239 * to allow it to be dispatched by another processor. 240 */ 241 movb $0, T_LOCK(%r13) 242 243 /* 244 * IMPORTANT: Registers at this point must be: 245 * %r12 = new thread 246 * 247 * Here we are in the idle thread, have dropped the old thread. 248 */ 249 ALTENTRY(_resume_from_idle) 250 /* 251 * spin until dispatched thread's mutex has 252 * been unlocked. this mutex is unlocked when 253 * it becomes safe for the thread to run. 254 */ 255 .lock_thread_mutex: 256 lock 257 btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */ 258 jnc .thread_mutex_locked /* got it */ 259 260 .spin_thread_mutex: 261 pause 262 cmpb $0, T_LOCK(%r12) /* check mutex status */ 263 jz .lock_thread_mutex /* clear, retry lock */ 264 jmp .spin_thread_mutex /* still locked, spin... */ 265 266 .thread_mutex_locked: 267 /* 268 * Fix CPU structure to indicate new running thread. 269 * Set pointer in new thread to the CPU structure. 270 */ 271 LOADCPU(%r13) /* load current CPU pointer */ 272 cmpq %r13, T_CPU(%r12) 273 je .setup_cpu 274 275 /* cp->cpu_stats.sys.cpumigrate++ */ 276 incq CPU_STATS_SYS_CPUMIGRATE(%r13) 277 movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */ 278 279 .setup_cpu: 280 /* 281 * Setup rsp0 (kernel stack) in TSS to curthread's saved regs 282 * structure. If this thread doesn't have a regs structure above 283 * the stack -- that is, if lwp_stk_init() was never called for the 284 * thread -- this will set rsp0 to the wrong value, but it's harmless 285 * as it's a kernel thread, and it won't actually attempt to implicitly 286 * use the rsp0 via a privilege change. 287 * 288 * Note that when we have KPTI enabled on amd64, we never use this 289 * value at all (since all the interrupts have an IST set). 290 */ 291 movq CPU_TSS(%r13), %r14 292 #if !defined(__xpv) 293 cmpq $1, kpti_enable 294 jne 1f 295 leaq CPU_KPTI_TR_RSP(%r13), %rax 296 jmp 2f 297 1: 298 movq T_STACK(%r12), %rax 299 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 300 2: 301 movq %rax, TSS_RSP0(%r14) 302 #else 303 movq T_STACK(%r12), %rax 304 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 305 movl $KDS_SEL, %edi 306 movq %rax, %rsi 307 call HYPERVISOR_stack_switch 308 #endif /* __xpv */ 309 310 movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */ 311 mfence /* synchronize with mutex_exit() */ 312 xorl %ebp, %ebp /* make $<threadlist behave better */ 313 movq T_LWP(%r12), %rax /* set associated lwp to */ 314 movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */ 315 316 movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */ 317 movq T_PC(%r12), %r13 /* saved return addr */ 318 319 /* 320 * Call restorectx if context ops have been installed. 321 */ 322 cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */ 323 jz .norestorectx /* skip call when zero */ 324 movq %r12, %rdi /* arg = thread pointer */ 325 call restorectx /* call ctx ops */ 326 .norestorectx: 327 328 /* 329 * Call restorepctx if context ops have been installed for the proc. 330 */ 331 movq T_PROCP(%r12), %rcx 332 cmpq $0, P_PCTX(%rcx) 333 jz .norestorepctx 334 movq %rcx, %rdi 335 call restorepctx 336 .norestorepctx: 337 338 STORE_INTR_START(%r12) 339 340 /* 341 * If we came into swtch with the ability to access userland pages, go 342 * ahead and restore that fact by disabling SMAP. Clear the indicator 343 * flag out of paranoia. 344 */ 345 movq T_USERACC(%r12), %rax /* should we disable smap? */ 346 cmpq $0, %rax /* skip call when zero */ 347 jz .nosmap 348 xorq %rax, %rax 349 movq %rax, T_USERACC(%r12) 350 call smap_disable 351 .nosmap: 352 353 call smt_mark 354 355 /* 356 * Restore non-volatile registers, then have spl0 return to the 357 * resuming thread's PC after first setting the priority as low as 358 * possible and blocking all interrupt threads that may be active. 359 */ 360 movq %r13, %rax /* save return address */ 361 RESTORE_REGS(%r11) 362 pushq %rax /* push return address for spl0() */ 363 call __dtrace_probe___sched_on__cpu 364 jmp spl0 365 366 resume_return: 367 /* 368 * Remove stack frame created in SAVE_REGS() 369 */ 370 addq $CLONGSIZE, %rsp 371 ret 372 SET_SIZE(_resume_from_idle) 373 SET_SIZE(resume) 374 375 ENTRY(resume_from_zombie) 376 movq %gs:CPU_THREAD, %rax 377 leaq resume_from_zombie_return(%rip), %r11 378 379 /* 380 * Save non-volatile registers, and set return address for current 381 * thread to resume_from_zombie_return. 382 * 383 * %r12 = t (new thread) when done 384 */ 385 SAVE_REGS(%rax, %r11) 386 387 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ 388 389 /* clean up the fp unit. It might be left enabled */ 390 391 #if defined(__xpv) /* XXPV XXtclayton */ 392 /* 393 * Remove this after bringup. 394 * (Too many #gp's for an instrumented hypervisor.) 395 */ 396 STTS(%rax) 397 #else 398 movq %cr0, %rax 399 testq $CR0_TS, %rax 400 jnz .zfpu_disabled /* if TS already set, nothing to do */ 401 fninit /* init fpu & discard pending error */ 402 orq $CR0_TS, %rax 403 movq %rax, %cr0 404 .zfpu_disabled: 405 406 #endif /* __xpv */ 407 408 /* 409 * Temporarily switch to the idle thread's stack so that the zombie 410 * thread's stack can be reclaimed by the reaper. 411 */ 412 movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */ 413 movq T_SP(%rax), %rsp /* get onto idle thread stack */ 414 415 /* 416 * Sigh. If the idle thread has never run thread_start() 417 * then t_sp is mis-aligned by thread_load(). 418 */ 419 andq $_BITNOT(STACK_ALIGN-1), %rsp 420 421 /* 422 * Set the idle thread as the current thread. 423 */ 424 movq %rax, %gs:CPU_THREAD 425 426 /* switch in the hat context for the new thread */ 427 GET_THREAD_HATP(%rdi, %r12, %r11) 428 call hat_switch 429 430 /* 431 * Put the zombie on death-row. 432 */ 433 movq %r13, %rdi 434 call reapq_add 435 436 jmp _resume_from_idle /* finish job of resume */ 437 438 resume_from_zombie_return: 439 RESTORE_REGS(%r11) /* restore non-volatile registers */ 440 call __dtrace_probe___sched_on__cpu 441 442 /* 443 * Remove stack frame created in SAVE_REGS() 444 */ 445 addq $CLONGSIZE, %rsp 446 ret 447 SET_SIZE(resume_from_zombie) 448 449 ENTRY(resume_from_intr) 450 movq %gs:CPU_THREAD, %rax 451 leaq resume_from_intr_return(%rip), %r11 452 453 /* 454 * Save non-volatile registers, and set return address for current 455 * thread to resume_from_intr_return. 456 * 457 * %r12 = t (new thread) when done 458 */ 459 SAVE_REGS(%rax, %r11) 460 461 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ 462 movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */ 463 mfence /* synchronize with mutex_exit() */ 464 movq T_SP(%r12), %rsp /* restore resuming thread's sp */ 465 xorl %ebp, %ebp /* make $<threadlist behave better */ 466 467 /* 468 * Unlock outgoing thread's mutex dispatched by another processor. 469 */ 470 xorl %eax, %eax 471 xchgb %al, T_LOCK(%r13) 472 473 STORE_INTR_START(%r12) 474 475 call smt_mark 476 477 /* 478 * Restore non-volatile registers, then have spl0 return to the 479 * resuming thread's PC after first setting the priority as low as 480 * possible and blocking all interrupt threads that may be active. 481 */ 482 movq T_PC(%r12), %rax /* saved return addr */ 483 RESTORE_REGS(%r11); 484 pushq %rax /* push return address for spl0() */ 485 call __dtrace_probe___sched_on__cpu 486 jmp spl0 487 488 resume_from_intr_return: 489 /* 490 * Remove stack frame created in SAVE_REGS() 491 */ 492 addq $CLONGSIZE, %rsp 493 ret 494 SET_SIZE(resume_from_intr) 495 496 ENTRY(thread_start) 497 popq %rax /* start() */ 498 popq %rdi /* arg */ 499 popq %rsi /* len */ 500 movq %rsp, %rbp 501 INDIRECT_CALL_REG(rax) 502 call thread_exit /* destroy thread if it returns. */ 503 /*NOTREACHED*/ 504 SET_SIZE(thread_start)