1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2019 Joyent, Inc. 28 */ 29 30 /* 31 * Process switching routines. 32 */ 33 34 #include <sys/asm_linkage.h> 35 #include <sys/asm_misc.h> 36 #include <sys/regset.h> 37 #include <sys/privregs.h> 38 #include <sys/stack.h> 39 #include <sys/segments.h> 40 #include <sys/psw.h> 41 42 #include "assym.h" 43 44 /* 45 * resume(thread_id_t t); 46 * 47 * a thread can only run on one processor at a time. there 48 * exists a window on MPs where the current thread on one 49 * processor is capable of being dispatched by another processor. 50 * some overlap between outgoing and incoming threads can happen 51 * when they are the same thread. in this case where the threads 52 * are the same, resume() on one processor will spin on the incoming 53 * thread until resume() on the other processor has finished with 54 * the outgoing thread. 55 * 56 * The MMU context changes when the resuming thread resides in a different 57 * process. Kernel threads are known by resume to reside in process 0. 58 * The MMU context, therefore, only changes when resuming a thread in 59 * a process different from curproc. 60 * 61 * resume_from_intr() is called when the thread being resumed was not 62 * passivated by resume (e.g. was interrupted). This means that the 63 * resume lock is already held and that a restore context is not needed. 64 * Also, the MMU context is not changed on the resume in this case. 65 * 66 * resume_from_zombie() is the same as resume except the calling thread 67 * is a zombie and must be put on the deathrow list after the CPU is 68 * off the stack. 69 */ 70 71 #if LWP_PCB_FPU != 0 72 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work 73 #endif /* LWP_PCB_FPU != 0 */ 74 75 /* 76 * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) 77 * 78 * The stack frame must be created before the save of %rsp so that tracebacks 79 * of swtch()ed-out processes show the process as having last called swtch(). 80 */ 81 #define SAVE_REGS(thread_t, retaddr) \ 82 movq %rbp, T_RBP(thread_t); \ 83 movq %rbx, T_RBX(thread_t); \ 84 movq %r12, T_R12(thread_t); \ 85 movq %r13, T_R13(thread_t); \ 86 movq %r14, T_R14(thread_t); \ 87 movq %r15, T_R15(thread_t); \ 88 pushq %rbp; \ 89 movq %rsp, %rbp; \ 90 movq %rsp, T_SP(thread_t); \ 91 movq retaddr, T_PC(thread_t); \ 92 movq %rdi, %r12; \ 93 call __dtrace_probe___sched_off__cpu 94 95 /* 96 * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) 97 * 98 * We load up %rsp from the label_t as part of the context switch, so 99 * we don't repeat that here. 100 * 101 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t 102 * already has the effect of putting the stack back the way it was when 103 * we came in. 104 */ 105 #define RESTORE_REGS(scratch_reg) \ 106 movq %gs:CPU_THREAD, scratch_reg; \ 107 movq T_RBP(scratch_reg), %rbp; \ 108 movq T_RBX(scratch_reg), %rbx; \ 109 movq T_R12(scratch_reg), %r12; \ 110 movq T_R13(scratch_reg), %r13; \ 111 movq T_R14(scratch_reg), %r14; \ 112 movq T_R15(scratch_reg), %r15 113 114 /* 115 * Get pointer to a thread's hat structure 116 */ 117 #define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ 118 movq T_PROCP(thread_t), hatp; \ 119 movq P_AS(hatp), scratch_reg; \ 120 movq A_HAT(scratch_reg), hatp 121 122 #define TSC_READ() \ 123 call tsc_read; \ 124 movq %rax, %r14; 125 126 /* 127 * If we are resuming an interrupt thread, store a timestamp in the thread 128 * structure. If an interrupt occurs between tsc_read() and its subsequent 129 * store, the timestamp will be stale by the time it is stored. We can detect 130 * this by doing a compare-and-swap on the thread's timestamp, since any 131 * interrupt occurring in this window will put a new timestamp in the thread's 132 * t_intr_start field. 133 */ 134 #define STORE_INTR_START(thread_t) \ 135 testw $T_INTR_THREAD, T_FLAGS(thread_t); \ 136 jz 1f; \ 137 0: \ 138 TSC_READ(); \ 139 movq T_INTR_START(thread_t), %rax; \ 140 cmpxchgq %r14, T_INTR_START(thread_t); \ 141 jnz 0b; \ 142 1: 143 144 .global kpti_enable 145 146 ENTRY(resume) 147 movq %gs:CPU_THREAD, %rax 148 leaq resume_return(%rip), %r11 149 150 /* 151 * Deal with SMAP here. A thread may be switched out at any point while 152 * it is executing. The thread could be under on_fault() or it could be 153 * pre-empted while performing a copy interruption. If this happens and 154 * we're not in the context of an interrupt which happens to handle 155 * saving and restoring rflags correctly, we may lose our SMAP related 156 * state. 157 * 158 * To handle this, as part of being switched out, we first save whether 159 * or not userland access is allowed ($PS_ACHK in rflags) and store that 160 * in t_useracc on the kthread_t and unconditionally enable SMAP to 161 * protect the system. 162 * 163 * Later, when the thread finishes resuming, we potentially disable smap 164 * if PS_ACHK was present in rflags. See uts/intel/ia32/ml/copy.s for 165 * more information on rflags and SMAP. 166 */ 167 pushfq 168 popq %rsi 169 andq $PS_ACHK, %rsi 170 movq %rsi, T_USERACC(%rax) 171 call smap_enable 172 173 /* 174 * Save non-volatile registers, and set return address for current 175 * thread to resume_return. 176 * 177 * %r12 = t (new thread) when done 178 */ 179 SAVE_REGS(%rax, %r11) 180 181 182 LOADCPU(%r15) /* %r15 = CPU */ 183 movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */ 184 185 /* 186 * Call savectx if thread has installed context ops. 187 * 188 * Note that if we have floating point context, the save op 189 * (either fpsave_begin or fpxsave_begin) will issue the 190 * async save instruction (fnsave or fxsave respectively) 191 * that we fwait for below. 192 */ 193 cmpq $0, T_CTX(%r13) /* should current thread savectx? */ 194 je .nosavectx /* skip call when zero */ 195 196 movq %r13, %rdi /* arg = thread pointer */ 197 call savectx /* call ctx ops */ 198 .nosavectx: 199 200 /* 201 * Call savepctx if process has installed context ops. 202 */ 203 movq T_PROCP(%r13), %r14 /* %r14 = proc */ 204 cmpq $0, P_PCTX(%r14) /* should current thread savectx? */ 205 je .nosavepctx /* skip call when zero */ 206 207 movq %r14, %rdi /* arg = proc pointer */ 208 call savepctx /* call ctx ops */ 209 .nosavepctx: 210 211 /* 212 * Temporarily switch to the idle thread's stack 213 */ 214 movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */ 215 216 /* 217 * Set the idle thread as the current thread 218 */ 219 movq T_SP(%rax), %rsp /* It is safe to set rsp */ 220 movq %rax, CPU_THREAD(%r15) 221 222 /* 223 * Switch in the hat context for the new thread 224 * 225 */ 226 GET_THREAD_HATP(%rdi, %r12, %r11) 227 call hat_switch 228 229 /* 230 * Clear and unlock previous thread's t_lock 231 * to allow it to be dispatched by another processor. 232 */ 233 movb $0, T_LOCK(%r13) 234 235 /* 236 * IMPORTANT: Registers at this point must be: 237 * %r12 = new thread 238 * 239 * Here we are in the idle thread, have dropped the old thread. 240 */ 241 ALTENTRY(_resume_from_idle) 242 /* 243 * spin until dispatched thread's mutex has 244 * been unlocked. this mutex is unlocked when 245 * it becomes safe for the thread to run. 246 */ 247 .lock_thread_mutex: 248 lock 249 btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */ 250 jnc .thread_mutex_locked /* got it */ 251 252 .spin_thread_mutex: 253 pause 254 cmpb $0, T_LOCK(%r12) /* check mutex status */ 255 jz .lock_thread_mutex /* clear, retry lock */ 256 jmp .spin_thread_mutex /* still locked, spin... */ 257 258 .thread_mutex_locked: 259 /* 260 * Fix CPU structure to indicate new running thread. 261 * Set pointer in new thread to the CPU structure. 262 */ 263 LOADCPU(%r13) /* load current CPU pointer */ 264 cmpq %r13, T_CPU(%r12) 265 je .setup_cpu 266 267 /* cp->cpu_stats.sys.cpumigrate++ */ 268 incq CPU_STATS_SYS_CPUMIGRATE(%r13) 269 movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */ 270 271 .setup_cpu: 272 /* 273 * Setup rsp0 (kernel stack) in TSS to curthread's saved regs 274 * structure. If this thread doesn't have a regs structure above 275 * the stack -- that is, if lwp_stk_init() was never called for the 276 * thread -- this will set rsp0 to the wrong value, but it's harmless 277 * as it's a kernel thread, and it won't actually attempt to implicitly 278 * use the rsp0 via a privilege change. 279 * 280 * Note that when we have KPTI enabled on amd64, we never use this 281 * value at all (since all the interrupts have an IST set). 282 */ 283 movq CPU_TSS(%r13), %r14 284 #if !defined(__xpv) 285 cmpq $1, kpti_enable 286 jne 1f 287 leaq CPU_KPTI_TR_RSP(%r13), %rax 288 jmp 2f 289 1: 290 movq T_STACK(%r12), %rax 291 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 292 2: 293 movq %rax, TSS_RSP0(%r14) 294 #else 295 movq T_STACK(%r12), %rax 296 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 297 movl $KDS_SEL, %edi 298 movq %rax, %rsi 299 call HYPERVISOR_stack_switch 300 #endif /* __xpv */ 301 302 movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */ 303 mfence /* synchronize with mutex_exit() */ 304 xorl %ebp, %ebp /* make $<threadlist behave better */ 305 movq T_LWP(%r12), %rax /* set associated lwp to */ 306 movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */ 307 308 movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */ 309 movq T_PC(%r12), %r13 /* saved return addr */ 310 311 /* 312 * Call restorectx if context ops have been installed. 313 */ 314 cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */ 315 jz .norestorectx /* skip call when zero */ 316 movq %r12, %rdi /* arg = thread pointer */ 317 call restorectx /* call ctx ops */ 318 .norestorectx: 319 320 /* 321 * Call restorepctx if context ops have been installed for the proc. 322 */ 323 movq T_PROCP(%r12), %rcx 324 cmpq $0, P_PCTX(%rcx) 325 jz .norestorepctx 326 movq %rcx, %rdi 327 call restorepctx 328 .norestorepctx: 329 330 STORE_INTR_START(%r12) 331 332 /* 333 * If we came into swtch with the ability to access userland pages, go 334 * ahead and restore that fact by disabling SMAP. Clear the indicator 335 * flag out of paranoia. 336 */ 337 movq T_USERACC(%r12), %rax /* should we disable smap? */ 338 cmpq $0, %rax /* skip call when zero */ 339 jz .nosmap 340 xorq %rax, %rax 341 movq %rax, T_USERACC(%r12) 342 call smap_disable 343 .nosmap: 344 345 call ht_mark 346 347 /* 348 * Restore non-volatile registers, then have spl0 return to the 349 * resuming thread's PC after first setting the priority as low as 350 * possible and blocking all interrupt threads that may be active. 351 */ 352 movq %r13, %rax /* save return address */ 353 RESTORE_REGS(%r11) 354 pushq %rax /* push return address for spl0() */ 355 call __dtrace_probe___sched_on__cpu 356 jmp spl0 357 358 resume_return: 359 /* 360 * Remove stack frame created in SAVE_REGS() 361 */ 362 addq $CLONGSIZE, %rsp 363 ret 364 SET_SIZE(_resume_from_idle) 365 SET_SIZE(resume) 366 367 ENTRY(resume_from_zombie) 368 movq %gs:CPU_THREAD, %rax 369 leaq resume_from_zombie_return(%rip), %r11 370 371 /* 372 * Save non-volatile registers, and set return address for current 373 * thread to resume_from_zombie_return. 374 * 375 * %r12 = t (new thread) when done 376 */ 377 SAVE_REGS(%rax, %r11) 378 379 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ 380 381 /* clean up the fp unit. It might be left enabled */ 382 383 #if defined(__xpv) /* XXPV XXtclayton */ 384 /* 385 * Remove this after bringup. 386 * (Too many #gp's for an instrumented hypervisor.) 387 */ 388 STTS(%rax) 389 #else 390 movq %cr0, %rax 391 testq $CR0_TS, %rax 392 jnz .zfpu_disabled /* if TS already set, nothing to do */ 393 fninit /* init fpu & discard pending error */ 394 orq $CR0_TS, %rax 395 movq %rax, %cr0 396 .zfpu_disabled: 397 398 #endif /* __xpv */ 399 400 /* 401 * Temporarily switch to the idle thread's stack so that the zombie 402 * thread's stack can be reclaimed by the reaper. 403 */ 404 movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */ 405 movq T_SP(%rax), %rsp /* get onto idle thread stack */ 406 407 /* 408 * Sigh. If the idle thread has never run thread_start() 409 * then t_sp is mis-aligned by thread_load(). 410 */ 411 andq $_BITNOT(STACK_ALIGN-1), %rsp 412 413 /* 414 * Set the idle thread as the current thread. 415 */ 416 movq %rax, %gs:CPU_THREAD 417 418 /* switch in the hat context for the new thread */ 419 GET_THREAD_HATP(%rdi, %r12, %r11) 420 call hat_switch 421 422 /* 423 * Put the zombie on death-row. 424 */ 425 movq %r13, %rdi 426 call reapq_add 427 428 jmp _resume_from_idle /* finish job of resume */ 429 430 resume_from_zombie_return: 431 RESTORE_REGS(%r11) /* restore non-volatile registers */ 432 call __dtrace_probe___sched_on__cpu 433 434 /* 435 * Remove stack frame created in SAVE_REGS() 436 */ 437 addq $CLONGSIZE, %rsp 438 ret 439 SET_SIZE(resume_from_zombie) 440 441 ENTRY(resume_from_intr) 442 movq %gs:CPU_THREAD, %rax 443 leaq resume_from_intr_return(%rip), %r11 444 445 /* 446 * Save non-volatile registers, and set return address for current 447 * thread to resume_from_intr_return. 448 * 449 * %r12 = t (new thread) when done 450 */ 451 SAVE_REGS(%rax, %r11) 452 453 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ 454 movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */ 455 mfence /* synchronize with mutex_exit() */ 456 movq T_SP(%r12), %rsp /* restore resuming thread's sp */ 457 xorl %ebp, %ebp /* make $<threadlist behave better */ 458 459 /* 460 * Unlock outgoing thread's mutex dispatched by another processor. 461 */ 462 xorl %eax, %eax 463 xchgb %al, T_LOCK(%r13) 464 465 STORE_INTR_START(%r12) 466 467 call ht_mark 468 469 /* 470 * Restore non-volatile registers, then have spl0 return to the 471 * resuming thread's PC after first setting the priority as low as 472 * possible and blocking all interrupt threads that may be active. 473 */ 474 movq T_PC(%r12), %rax /* saved return addr */ 475 RESTORE_REGS(%r11); 476 pushq %rax /* push return address for spl0() */ 477 call __dtrace_probe___sched_on__cpu 478 jmp spl0 479 480 resume_from_intr_return: 481 /* 482 * Remove stack frame created in SAVE_REGS() 483 */ 484 addq $CLONGSIZE, %rsp 485 ret 486 SET_SIZE(resume_from_intr) 487 488 ENTRY(thread_start) 489 popq %rax /* start() */ 490 popq %rdi /* arg */ 491 popq %rsi /* len */ 492 movq %rsp, %rbp 493 call *%rax 494 call thread_exit /* destroy thread if it returns. */ 495 /*NOTREACHED*/ 496 SET_SIZE(thread_start)