Print this page
11787 Kernel needs to be built with retpolines
11788 Kernel needs to generally use RSB stuffing
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/ml/swtch.s
+++ new/usr/src/uts/intel/ia32/ml/swtch.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright 2019 Joyent, Inc.
28 28 */
29 29
30 30 /*
31 31 * Process switching routines.
32 32 */
33 33
34 34 #include <sys/asm_linkage.h>
35 35 #include <sys/asm_misc.h>
36 36 #include <sys/regset.h>
37 37 #include <sys/privregs.h>
38 38 #include <sys/stack.h>
39 39 #include <sys/segments.h>
40 40 #include <sys/psw.h>
41 41
42 42 #include "assym.h"
43 43
44 44 /*
45 45 * resume(thread_id_t t);
46 46 *
47 47 * a thread can only run on one processor at a time. there
48 48 * exists a window on MPs where the current thread on one
49 49 * processor is capable of being dispatched by another processor.
50 50 * some overlap between outgoing and incoming threads can happen
51 51 * when they are the same thread. in this case where the threads
52 52 * are the same, resume() on one processor will spin on the incoming
53 53 * thread until resume() on the other processor has finished with
54 54 * the outgoing thread.
55 55 *
56 56 * The MMU context changes when the resuming thread resides in a different
57 57 * process. Kernel threads are known by resume to reside in process 0.
58 58 * The MMU context, therefore, only changes when resuming a thread in
59 59 * a process different from curproc.
60 60 *
61 61 * resume_from_intr() is called when the thread being resumed was not
62 62 * passivated by resume (e.g. was interrupted). This means that the
63 63 * resume lock is already held and that a restore context is not needed.
64 64 * Also, the MMU context is not changed on the resume in this case.
65 65 *
66 66 * resume_from_zombie() is the same as resume except the calling thread
67 67 * is a zombie and must be put on the deathrow list after the CPU is
68 68 * off the stack.
69 69 */
70 70
71 71 #if LWP_PCB_FPU != 0
72 72 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
73 73 #endif /* LWP_PCB_FPU != 0 */
74 74
75 75 /*
76 76 * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
77 77 *
78 78 * The stack frame must be created before the save of %rsp so that tracebacks
79 79 * of swtch()ed-out processes show the process as having last called swtch().
80 80 */
81 81 #define SAVE_REGS(thread_t, retaddr) \
82 82 movq %rbp, T_RBP(thread_t); \
83 83 movq %rbx, T_RBX(thread_t); \
84 84 movq %r12, T_R12(thread_t); \
85 85 movq %r13, T_R13(thread_t); \
86 86 movq %r14, T_R14(thread_t); \
87 87 movq %r15, T_R15(thread_t); \
88 88 pushq %rbp; \
89 89 movq %rsp, %rbp; \
90 90 movq %rsp, T_SP(thread_t); \
91 91 movq retaddr, T_PC(thread_t); \
92 92 movq %rdi, %r12; \
93 93 call __dtrace_probe___sched_off__cpu
94 94
95 95 /*
96 96 * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
97 97 *
98 98 * We load up %rsp from the label_t as part of the context switch, so
99 99 * we don't repeat that here.
100 100 *
101 101 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
102 102 * already has the effect of putting the stack back the way it was when
103 103 * we came in.
104 104 */
105 105 #define RESTORE_REGS(scratch_reg) \
106 106 movq %gs:CPU_THREAD, scratch_reg; \
107 107 movq T_RBP(scratch_reg), %rbp; \
108 108 movq T_RBX(scratch_reg), %rbx; \
109 109 movq T_R12(scratch_reg), %r12; \
110 110 movq T_R13(scratch_reg), %r13; \
111 111 movq T_R14(scratch_reg), %r14; \
112 112 movq T_R15(scratch_reg), %r15
113 113
114 114 /*
115 115 * Get pointer to a thread's hat structure
116 116 */
117 117 #define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \
118 118 movq T_PROCP(thread_t), hatp; \
119 119 movq P_AS(hatp), scratch_reg; \
120 120 movq A_HAT(scratch_reg), hatp
121 121
122 122 #define TSC_READ() \
123 123 call tsc_read; \
124 124 movq %rax, %r14;
125 125
126 126 /*
127 127 * If we are resuming an interrupt thread, store a timestamp in the thread
128 128 * structure. If an interrupt occurs between tsc_read() and its subsequent
129 129 * store, the timestamp will be stale by the time it is stored. We can detect
130 130 * this by doing a compare-and-swap on the thread's timestamp, since any
131 131 * interrupt occurring in this window will put a new timestamp in the thread's
132 132 * t_intr_start field.
133 133 */
134 134 #define STORE_INTR_START(thread_t) \
135 135 testw $T_INTR_THREAD, T_FLAGS(thread_t); \
136 136 jz 1f; \
137 137 0: \
138 138 TSC_READ(); \
139 139 movq T_INTR_START(thread_t), %rax; \
140 140 cmpxchgq %r14, T_INTR_START(thread_t); \
141 141 jnz 0b; \
142 142 1:
143 143
144 144 .global kpti_enable
145 145
146 146 ENTRY(resume)
147 147 movq %gs:CPU_THREAD, %rax
148 148 leaq resume_return(%rip), %r11
149 149
150 150 /*
151 151 * Deal with SMAP here. A thread may be switched out at any point while
152 152 * it is executing. The thread could be under on_fault() or it could be
153 153 * pre-empted while performing a copy interruption. If this happens and
154 154 * we're not in the context of an interrupt which happens to handle
155 155 * saving and restoring rflags correctly, we may lose our SMAP related
156 156 * state.
157 157 *
158 158 * To handle this, as part of being switched out, we first save whether
159 159 * or not userland access is allowed ($PS_ACHK in rflags) and store that
160 160 * in t_useracc on the kthread_t and unconditionally enable SMAP to
161 161 * protect the system.
162 162 *
163 163 * Later, when the thread finishes resuming, we potentially disable smap
↓ open down ↓ |
163 lines elided |
↑ open up ↑ |
164 164 * if PS_ACHK was present in rflags. See uts/intel/ia32/ml/copy.s for
165 165 * more information on rflags and SMAP.
166 166 */
167 167 pushfq
168 168 popq %rsi
169 169 andq $PS_ACHK, %rsi
170 170 movq %rsi, T_USERACC(%rax)
171 171 call smap_enable
172 172
173 173 /*
174 + * Take a moment to potentially clear the RSB buffer. This is done to
175 + * prevent various Spectre variant 2 and SpectreRSB attacks. This may
176 + * not be sufficient. Please see uts/intel/ia32/ml/retpoline.s for more
177 + * information about this.
178 + */
179 + call x86_rsb_stuff
180 +
181 + /*
174 182 * Save non-volatile registers, and set return address for current
175 183 * thread to resume_return.
176 184 *
177 185 * %r12 = t (new thread) when done
178 186 */
179 187 SAVE_REGS(%rax, %r11)
180 188
181 189
182 190 LOADCPU(%r15) /* %r15 = CPU */
183 191 movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */
184 192
185 193 /*
186 194 * Call savectx if thread has installed context ops.
187 195 *
188 196 * Note that if we have floating point context, the save op
189 197 * (either fpsave_begin or fpxsave_begin) will issue the
190 198 * async save instruction (fnsave or fxsave respectively)
191 199 * that we fwait for below.
192 200 */
193 201 cmpq $0, T_CTX(%r13) /* should current thread savectx? */
194 202 je .nosavectx /* skip call when zero */
195 203
196 204 movq %r13, %rdi /* arg = thread pointer */
197 205 call savectx /* call ctx ops */
198 206 .nosavectx:
199 207
200 208 /*
201 209 * Call savepctx if process has installed context ops.
202 210 */
203 211 movq T_PROCP(%r13), %r14 /* %r14 = proc */
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
204 212 cmpq $0, P_PCTX(%r14) /* should current thread savectx? */
205 213 je .nosavepctx /* skip call when zero */
206 214
207 215 movq %r14, %rdi /* arg = proc pointer */
208 216 call savepctx /* call ctx ops */
209 217 .nosavepctx:
210 218
211 219 /*
212 220 * Temporarily switch to the idle thread's stack
213 221 */
214 - movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */
222 + movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */
215 223
216 224 /*
217 225 * Set the idle thread as the current thread
218 226 */
219 227 movq T_SP(%rax), %rsp /* It is safe to set rsp */
220 228 movq %rax, CPU_THREAD(%r15)
221 229
222 230 /*
223 231 * Switch in the hat context for the new thread
224 232 *
225 233 */
226 234 GET_THREAD_HATP(%rdi, %r12, %r11)
227 235 call hat_switch
228 236
229 237 /*
230 238 * Clear and unlock previous thread's t_lock
231 239 * to allow it to be dispatched by another processor.
232 240 */
233 241 movb $0, T_LOCK(%r13)
234 242
235 243 /*
236 244 * IMPORTANT: Registers at this point must be:
237 245 * %r12 = new thread
238 246 *
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
239 247 * Here we are in the idle thread, have dropped the old thread.
240 248 */
241 249 ALTENTRY(_resume_from_idle)
242 250 /*
243 251 * spin until dispatched thread's mutex has
244 252 * been unlocked. this mutex is unlocked when
245 253 * it becomes safe for the thread to run.
246 254 */
247 255 .lock_thread_mutex:
248 256 lock
249 - btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */
257 + btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */
250 258 jnc .thread_mutex_locked /* got it */
251 259
252 260 .spin_thread_mutex:
253 261 pause
254 262 cmpb $0, T_LOCK(%r12) /* check mutex status */
255 263 jz .lock_thread_mutex /* clear, retry lock */
256 264 jmp .spin_thread_mutex /* still locked, spin... */
257 265
258 266 .thread_mutex_locked:
259 267 /*
260 268 * Fix CPU structure to indicate new running thread.
261 269 * Set pointer in new thread to the CPU structure.
262 270 */
263 271 LOADCPU(%r13) /* load current CPU pointer */
264 272 cmpq %r13, T_CPU(%r12)
265 273 je .setup_cpu
266 274
267 275 /* cp->cpu_stats.sys.cpumigrate++ */
268 276 incq CPU_STATS_SYS_CPUMIGRATE(%r13)
269 277 movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */
270 278
271 279 .setup_cpu:
272 280 /*
273 281 * Setup rsp0 (kernel stack) in TSS to curthread's saved regs
274 282 * structure. If this thread doesn't have a regs structure above
275 283 * the stack -- that is, if lwp_stk_init() was never called for the
276 284 * thread -- this will set rsp0 to the wrong value, but it's harmless
277 285 * as it's a kernel thread, and it won't actually attempt to implicitly
278 286 * use the rsp0 via a privilege change.
279 287 *
280 288 * Note that when we have KPTI enabled on amd64, we never use this
281 289 * value at all (since all the interrupts have an IST set).
282 290 */
283 291 movq CPU_TSS(%r13), %r14
284 292 #if !defined(__xpv)
285 293 cmpq $1, kpti_enable
286 294 jne 1f
287 295 leaq CPU_KPTI_TR_RSP(%r13), %rax
288 296 jmp 2f
289 297 1:
290 298 movq T_STACK(%r12), %rax
291 299 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
292 300 2:
293 301 movq %rax, TSS_RSP0(%r14)
294 302 #else
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
295 303 movq T_STACK(%r12), %rax
296 304 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
297 305 movl $KDS_SEL, %edi
298 306 movq %rax, %rsi
299 307 call HYPERVISOR_stack_switch
300 308 #endif /* __xpv */
301 309
302 310 movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */
303 311 mfence /* synchronize with mutex_exit() */
304 312 xorl %ebp, %ebp /* make $<threadlist behave better */
305 - movq T_LWP(%r12), %rax /* set associated lwp to */
306 - movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */
313 + movq T_LWP(%r12), %rax /* set associated lwp to */
314 + movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */
307 315
308 316 movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */
309 317 movq T_PC(%r12), %r13 /* saved return addr */
310 318
311 319 /*
312 320 * Call restorectx if context ops have been installed.
313 321 */
314 322 cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */
315 323 jz .norestorectx /* skip call when zero */
316 324 movq %r12, %rdi /* arg = thread pointer */
317 325 call restorectx /* call ctx ops */
318 326 .norestorectx:
319 327
320 328 /*
321 329 * Call restorepctx if context ops have been installed for the proc.
322 330 */
323 331 movq T_PROCP(%r12), %rcx
324 332 cmpq $0, P_PCTX(%rcx)
325 333 jz .norestorepctx
326 334 movq %rcx, %rdi
327 335 call restorepctx
328 336 .norestorepctx:
329 337
330 338 STORE_INTR_START(%r12)
331 339
332 340 /*
333 341 * If we came into swtch with the ability to access userland pages, go
334 342 * ahead and restore that fact by disabling SMAP. Clear the indicator
335 343 * flag out of paranoia.
336 344 */
337 345 movq T_USERACC(%r12), %rax /* should we disable smap? */
338 346 cmpq $0, %rax /* skip call when zero */
339 347 jz .nosmap
340 348 xorq %rax, %rax
341 349 movq %rax, T_USERACC(%r12)
342 350 call smap_disable
343 351 .nosmap:
344 352
345 353 call smt_mark
346 354
347 355 /*
348 356 * Restore non-volatile registers, then have spl0 return to the
349 357 * resuming thread's PC after first setting the priority as low as
350 358 * possible and blocking all interrupt threads that may be active.
351 359 */
352 360 movq %r13, %rax /* save return address */
353 361 RESTORE_REGS(%r11)
354 362 pushq %rax /* push return address for spl0() */
355 363 call __dtrace_probe___sched_on__cpu
356 364 jmp spl0
357 365
358 366 resume_return:
359 367 /*
360 368 * Remove stack frame created in SAVE_REGS()
361 369 */
362 370 addq $CLONGSIZE, %rsp
363 371 ret
364 372 SET_SIZE(_resume_from_idle)
365 373 SET_SIZE(resume)
366 374
367 375 ENTRY(resume_from_zombie)
368 376 movq %gs:CPU_THREAD, %rax
369 377 leaq resume_from_zombie_return(%rip), %r11
370 378
371 379 /*
372 380 * Save non-volatile registers, and set return address for current
373 381 * thread to resume_from_zombie_return.
374 382 *
375 383 * %r12 = t (new thread) when done
376 384 */
377 385 SAVE_REGS(%rax, %r11)
378 386
379 387 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */
380 388
381 389 /* clean up the fp unit. It might be left enabled */
382 390
383 391 #if defined(__xpv) /* XXPV XXtclayton */
384 392 /*
385 393 * Remove this after bringup.
386 394 * (Too many #gp's for an instrumented hypervisor.)
387 395 */
388 396 STTS(%rax)
389 397 #else
390 398 movq %cr0, %rax
391 399 testq $CR0_TS, %rax
392 400 jnz .zfpu_disabled /* if TS already set, nothing to do */
393 401 fninit /* init fpu & discard pending error */
394 402 orq $CR0_TS, %rax
395 403 movq %rax, %cr0
396 404 .zfpu_disabled:
397 405
398 406 #endif /* __xpv */
399 407
400 408 /*
401 409 * Temporarily switch to the idle thread's stack so that the zombie
402 410 * thread's stack can be reclaimed by the reaper.
403 411 */
404 412 movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
405 413 movq T_SP(%rax), %rsp /* get onto idle thread stack */
406 414
407 415 /*
408 416 * Sigh. If the idle thread has never run thread_start()
409 417 * then t_sp is mis-aligned by thread_load().
410 418 */
411 419 andq $_BITNOT(STACK_ALIGN-1), %rsp
412 420
413 421 /*
414 422 * Set the idle thread as the current thread.
415 423 */
416 424 movq %rax, %gs:CPU_THREAD
417 425
418 426 /* switch in the hat context for the new thread */
419 427 GET_THREAD_HATP(%rdi, %r12, %r11)
420 428 call hat_switch
421 429
422 430 /*
423 431 * Put the zombie on death-row.
424 432 */
425 433 movq %r13, %rdi
426 434 call reapq_add
427 435
428 436 jmp _resume_from_idle /* finish job of resume */
429 437
430 438 resume_from_zombie_return:
431 439 RESTORE_REGS(%r11) /* restore non-volatile registers */
432 440 call __dtrace_probe___sched_on__cpu
433 441
434 442 /*
435 443 * Remove stack frame created in SAVE_REGS()
436 444 */
437 445 addq $CLONGSIZE, %rsp
438 446 ret
439 447 SET_SIZE(resume_from_zombie)
440 448
441 449 ENTRY(resume_from_intr)
442 450 movq %gs:CPU_THREAD, %rax
443 451 leaq resume_from_intr_return(%rip), %r11
444 452
445 453 /*
446 454 * Save non-volatile registers, and set return address for current
447 455 * thread to resume_from_intr_return.
448 456 *
449 457 * %r12 = t (new thread) when done
450 458 */
451 459 SAVE_REGS(%rax, %r11)
452 460
453 461 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */
454 462 movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */
455 463 mfence /* synchronize with mutex_exit() */
456 464 movq T_SP(%r12), %rsp /* restore resuming thread's sp */
457 465 xorl %ebp, %ebp /* make $<threadlist behave better */
458 466
459 467 /*
460 468 * Unlock outgoing thread's mutex dispatched by another processor.
461 469 */
462 470 xorl %eax, %eax
463 471 xchgb %al, T_LOCK(%r13)
464 472
465 473 STORE_INTR_START(%r12)
466 474
467 475 call smt_mark
468 476
469 477 /*
470 478 * Restore non-volatile registers, then have spl0 return to the
471 479 * resuming thread's PC after first setting the priority as low as
472 480 * possible and blocking all interrupt threads that may be active.
473 481 */
↓ open down ↓ |
157 lines elided |
↑ open up ↑ |
474 482 movq T_PC(%r12), %rax /* saved return addr */
475 483 RESTORE_REGS(%r11);
476 484 pushq %rax /* push return address for spl0() */
477 485 call __dtrace_probe___sched_on__cpu
478 486 jmp spl0
479 487
480 488 resume_from_intr_return:
481 489 /*
482 490 * Remove stack frame created in SAVE_REGS()
483 491 */
484 - addq $CLONGSIZE, %rsp
492 + addq $CLONGSIZE, %rsp
485 493 ret
486 494 SET_SIZE(resume_from_intr)
487 495
488 496 ENTRY(thread_start)
489 497 popq %rax /* start() */
490 498 popq %rdi /* arg */
491 499 popq %rsi /* len */
492 500 movq %rsp, %rbp
493 - call *%rax
501 + INDIRECT_CALL_REG(rax)
494 502 call thread_exit /* destroy thread if it returns. */
495 503 /*NOTREACHED*/
496 504 SET_SIZE(thread_start)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX