Print this page
de-linting of .s files
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/ml/kpti_trampolines.s
+++ new/usr/src/uts/i86pc/ml/kpti_trampolines.s
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11 /*
12 12 * Copyright 2019 Joyent, Inc.
13 13 */
14 14
15 15 /*
16 16 * This file contains the trampolines that are used by KPTI in order to be
17 17 * able to take interrupts/trap/etc while on the "user" page table.
18 18 *
19 19 * We don't map the full kernel text into the user page table: instead we
20 20 * map this one small section of trampolines (which compiles to ~13 pages).
21 21 * These trampolines are set in the IDT always (so they will run no matter
22 22 * whether we're on the kernel or user page table), and their primary job is to
23 23 * pivot us to the kernel %cr3 and %rsp without ruining everything.
24 24 *
25 25 * All of these interrupts use the amd64 IST feature when we have KPTI enabled,
26 26 * meaning that they will execute with their %rsp set to a known location, even
27 27 * if we take them in the kernel.
28 28 *
29 29 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST
30 30 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti
31 31 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be
32 32 * page-aligned, and we map the page it's on into both page tables. Using a
33 33 * struct attached to the cpu_t also means that we can use %rsp-relative
34 34 * addressing to find anything on the cpu_t, so we don't have to touch %gs or
35 35 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy).
36 36 *
37 37 * This little struct is where the CPU will push the actual interrupt frame.
38 38 * Then, in the trampoline, we change %cr3, then figure out our destination
39 39 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt
40 40 * frame). Then we jump to the regular ISR in the kernel text and carry on as
41 41 * normal.
42 42 *
43 43 * We leave the original frame and any spilled regs behind in the kpti_frame
44 44 * lazily until we want to return to userland. Then, we clear any spilled
45 45 * regs from it, and overwrite the rest with our iret frame. When switching
46 46 * this cpu to a different process (in hat_switch), we bzero the whole region to
47 47 * make sure nothing can leak between processes.
48 48 *
49 49 * When we're returning back to the original place we took the interrupt later
50 50 * (especially if it was in userland), we have to jmp back to the "return
51 51 * trampolines" here, since when we set %cr3 back to the user value, we need to
52 52 * be executing from code here in these shared pages and not the main kernel
53 53 * text again. Even though it should be fine to iret directly from kernel text
54 54 * when returning to kernel code, we make things jmp to a trampoline here just
55 55 * for consistency.
56 56 *
57 57 * Note that with IST, it's very important that we always must have pivoted
58 58 * away from the IST stack before we could possibly take any other interrupt
59 59 * on the same IST (unless it's an end-of-the-world fault and we don't care
60 60 * about coming back from it ever).
61 61 *
62 62 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they
63 63 * regularly have to happen from within trampoline code (e.g. in the sysenter
64 64 * single-step case) and then return to the world normally. As a result, these
65 65 * two are IST'd to their own kpti_frame right above the normal one (in the same
66 66 * page), so they don't clobber their parent interrupt.
67 67 *
68 68 * To aid with debugging, we also IST the page fault (#PF/pftrap), general
69 69 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to
70 70 * their own separate kpti_frame. This ensures that if we take one of these
71 71 * due to a bug in trampoline code, we preserve the original trampoline
72 72 * state that caused the trap.
73 73 *
74 74 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST
75 75 * stacks, since they can interrupt another ISR at any time. These stacks are
76 76 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in
77 77 * their trampolines (and do it unconditionally), and don't bother pivoting
78 78 * away. We're either going into the panic() path, or we're going to return
79 79 * straight away without rescheduling, so it's fine to not be on our real
80 80 * kthread stack (and some of the state we want to go find it with might be
81 81 * corrupt!)
82 82 *
83 83 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a
84 84 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to
85 85 * point at the PML4 for kas early in boot and never touch it again. Hopefully
86 86 * it survives whatever corruption brings down the rest of the kernel!
87 87 *
88 88 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
89 89 * cases) in that they do not push an interrupt frame (and also have some other
90 90 * effects). In the syscall trampolines, we assume that we can only be taking
91 91 * the call from userland and use swapgs and an unconditional overwrite of %cr3.
92 92 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
93 93 * existing %rsp pivot untouched) -- instead we spill registers into
94 94 * %gs:CPU_KPTI_* as we need to.
95 95 *
96 96 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see
97 97 * hat_switch().
98 98 */
99 99
100 100 /*
101 101 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you
102 102 * fix bugs here check to see if they should be fixed there as well.
↓ open down ↓ |
102 lines elided |
↑ open up ↑ |
103 103 */
104 104
105 105 #include <sys/asm_linkage.h>
106 106 #include <sys/asm_misc.h>
107 107 #include <sys/regset.h>
108 108 #include <sys/privregs.h>
109 109 #include <sys/psw.h>
110 110 #include <sys/machbrand.h>
111 111 #include <sys/param.h>
112 112
113 -#if defined(__lint)
114 -
115 -#include <sys/types.h>
116 -#include <sys/thread.h>
117 -#include <sys/systm.h>
118 -
119 -#else /* __lint */
120 -
121 113 #include <sys/segments.h>
122 114 #include <sys/pcb.h>
123 115 #include <sys/trap.h>
124 116 #include <sys/ftrace.h>
125 117 #include <sys/traptrace.h>
126 118 #include <sys/clock.h>
127 119 #include <sys/model.h>
128 120 #include <sys/panic.h>
129 121
130 122 #if defined(__xpv)
131 123 #include <sys/hypervisor.h>
132 124 #endif
133 125
134 126 #include "assym.h"
135 127
136 128 .data
137 129 DGDEF3(kpti_enable, 8, 8)
138 130 .fill 1, 8, 1
139 131
140 132 #if DEBUG
141 133 .data
142 134 _bad_ts_panic_msg:
143 135 .string "kpti_trampolines.s: tr_iret_user but CR0.TS set"
144 136 #endif
145 137
146 138 .section ".text";
147 139 .align MMU_PAGESIZE
148 140
149 141 .global kpti_tramp_start
150 142 kpti_tramp_start:
151 143 nop
152 144
153 145 /* This will be set by mlsetup, and then double-checked later */
154 146 .global kpti_safe_cr3
155 147 kpti_safe_cr3:
156 148 .quad 0
157 149 SET_SIZE(kpti_safe_cr3)
158 150
159 151 /* startup_kmem() will overwrite this */
160 152 .global kpti_kbase
161 153 kpti_kbase:
162 154 .quad KERNELBASE
163 155 SET_SIZE(kpti_kbase)
164 156
165 157 #define SET_KERNEL_CR3(spillreg) \
166 158 mov %cr3, spillreg; \
167 159 mov spillreg, %gs:CPU_KPTI_TR_CR3; \
168 160 mov %gs:CPU_KPTI_KCR3, spillreg; \
169 161 cmp $0, spillreg; \
170 162 je 2f; \
171 163 mov spillreg, %cr3; \
172 164 2:
173 165
174 166 #if DEBUG
175 167 #define SET_USER_CR3(spillreg) \
176 168 mov %cr3, spillreg; \
177 169 mov spillreg, %gs:CPU_KPTI_TR_CR3; \
178 170 mov %gs:CPU_KPTI_UCR3, spillreg; \
179 171 mov spillreg, %cr3
180 172 #else
181 173 #define SET_USER_CR3(spillreg) \
182 174 mov %gs:CPU_KPTI_UCR3, spillreg; \
183 175 mov spillreg, %cr3
184 176 #endif
185 177
186 178 #define PIVOT_KPTI_STK(spillreg) \
187 179 mov %rsp, spillreg; \
188 180 mov %gs:CPU_KPTI_RET_RSP, %rsp; \
189 181 pushq T_FRAMERET_SS(spillreg); \
190 182 pushq T_FRAMERET_RSP(spillreg); \
191 183 pushq T_FRAMERET_RFLAGS(spillreg); \
192 184 pushq T_FRAMERET_CS(spillreg); \
193 185 pushq T_FRAMERET_RIP(spillreg)
194 186
195 187
196 188 #define INTERRUPT_TRAMPOLINE_P(errpush) \
197 189 pushq %r13; \
198 190 pushq %r14; \
199 191 subq $KPTI_R14, %rsp; \
200 192 /* Save current %cr3. */ \
201 193 mov %cr3, %r14; \
202 194 mov %r14, KPTI_TR_CR3(%rsp); \
203 195 \
204 196 cmpw $KCS_SEL, KPTI_CS(%rsp); \
205 197 je 3f; \
206 198 1: \
207 199 /* Change to the "kernel" %cr3 */ \
208 200 mov KPTI_KCR3(%rsp), %r14; \
209 201 cmp $0, %r14; \
210 202 je 2f; \
211 203 mov %r14, %cr3; \
212 204 2: \
213 205 /* Get our cpu_t in %r13 */ \
214 206 mov %rsp, %r13; \
215 207 and $(~(MMU_PAGESIZE - 1)), %r13; \
216 208 subq $CPU_KPTI_START, %r13; \
217 209 /* Use top of the kthread stk */ \
218 210 mov CPU_THREAD(%r13), %r14; \
219 211 mov T_STACK(%r14), %r14; \
220 212 addq $REGSIZE+MINFRAME, %r14; \
221 213 jmp 4f; \
222 214 3: \
223 215 /* Check the %rsp in the frame. */ \
224 216 /* Is it above kernel base? */ \
225 217 mov kpti_kbase, %r14; \
226 218 cmp %r14, KPTI_RSP(%rsp); \
227 219 jb 1b; \
228 220 /* Use the %rsp from the trap frame */ \
229 221 mov KPTI_RSP(%rsp), %r14; \
230 222 and $(~0xf), %r14; \
231 223 4: \
232 224 mov %rsp, %r13; \
233 225 /* %r14 contains our destination stk */ \
234 226 mov %r14, %rsp; \
235 227 pushq KPTI_SS(%r13); \
236 228 pushq KPTI_RSP(%r13); \
237 229 pushq KPTI_RFLAGS(%r13); \
238 230 pushq KPTI_CS(%r13); \
239 231 pushq KPTI_RIP(%r13); \
240 232 errpush; \
241 233 mov KPTI_R14(%r13), %r14; \
242 234 mov KPTI_R13(%r13), %r13
243 235
244 236 #define INTERRUPT_TRAMPOLINE_NOERR \
245 237 INTERRUPT_TRAMPOLINE_P(/**/)
246 238
247 239 #define INTERRUPT_TRAMPOLINE \
248 240 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13))
249 241
250 242 /*
251 243 * This is used for all interrupts that can plausibly be taken inside another
252 244 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS).
253 245 *
254 246 * We also use this for #NP, even though it uses the standard IST: the
255 247 * additional %rsp checks below will catch when we get an exception doing an
256 248 * iret to userspace with a bad %cs/%ss. This appears as a kernel trap, and
257 249 * only later gets redirected via kern_gpfault().
258 250 *
259 251 * We check for whether we took the interrupt while in another trampoline, in
260 252 * which case we need to use the kthread stack.
261 253 */
262 254 #define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \
263 255 pushq %r13; \
264 256 pushq %r14; \
265 257 subq $KPTI_R14, %rsp; \
266 258 /* Check for clobbering */ \
267 259 cmp $0, KPTI_FLAG(%rsp); \
268 260 je 1f; \
269 261 /* Don't worry, this totally works */ \
270 262 int $8; \
271 263 1: \
272 264 movq $1, KPTI_FLAG(%rsp); \
273 265 /* Save current %cr3. */ \
274 266 mov %cr3, %r14; \
275 267 mov %r14, KPTI_TR_CR3(%rsp); \
276 268 \
277 269 cmpw $KCS_SEL, KPTI_CS(%rsp); \
278 270 je 4f; \
279 271 2: \
280 272 /* Change to the "kernel" %cr3 */ \
281 273 mov KPTI_KCR3(%rsp), %r14; \
282 274 cmp $0, %r14; \
283 275 je 3f; \
284 276 mov %r14, %cr3; \
285 277 3: \
286 278 /* Get our cpu_t in %r13 */ \
287 279 mov %rsp, %r13; \
288 280 and $(~(MMU_PAGESIZE - 1)), %r13; \
289 281 subq $CPU_KPTI_START, %r13; \
290 282 /* Use top of the kthread stk */ \
291 283 mov CPU_THREAD(%r13), %r14; \
292 284 mov T_STACK(%r14), %r14; \
293 285 addq $REGSIZE+MINFRAME, %r14; \
294 286 jmp 6f; \
295 287 4: \
296 288 /* Check the %rsp in the frame. */ \
297 289 /* Is it above kernel base? */ \
298 290 /* If not, treat as user. */ \
299 291 mov kpti_kbase, %r14; \
300 292 cmp %r14, KPTI_RSP(%rsp); \
301 293 jb 2b; \
302 294 /* Is it within the kpti_frame page? */ \
303 295 /* If it is, treat as user interrupt */ \
304 296 mov %rsp, %r13; \
305 297 and $(~(MMU_PAGESIZE - 1)), %r13; \
306 298 mov KPTI_RSP(%rsp), %r14; \
307 299 and $(~(MMU_PAGESIZE - 1)), %r14; \
308 300 cmp %r13, %r14; \
309 301 je 2b; \
310 302 /* Were we in trampoline code? */ \
311 303 leaq kpti_tramp_start, %r14; \
312 304 cmp %r14, KPTI_RIP(%rsp); \
313 305 jb 5f; \
314 306 leaq kpti_tramp_end, %r14; \
315 307 cmp %r14, KPTI_RIP(%rsp); \
316 308 ja 5f; \
317 309 /* If we were, change %cr3: we might */ \
318 310 /* have interrupted before it did. */ \
319 311 mov KPTI_KCR3(%rsp), %r14; \
320 312 mov %r14, %cr3; \
321 313 5: \
322 314 /* Use the %rsp from the trap frame */ \
323 315 mov KPTI_RSP(%rsp), %r14; \
324 316 and $(~0xf), %r14; \
325 317 6: \
326 318 mov %rsp, %r13; \
327 319 /* %r14 contains our destination stk */ \
328 320 mov %r14, %rsp; \
329 321 pushq KPTI_SS(%r13); \
330 322 pushq KPTI_RSP(%r13); \
331 323 pushq KPTI_RFLAGS(%r13); \
332 324 pushq KPTI_CS(%r13); \
333 325 pushq KPTI_RIP(%r13); \
334 326 errpush; \
335 327 mov KPTI_R14(%r13), %r14; \
336 328 movq $0, KPTI_FLAG(%r13); \
337 329 mov KPTI_R13(%r13), %r13
338 330
339 331 #define DBG_INTERRUPT_TRAMPOLINE_NOERR \
340 332 DBG_INTERRUPT_TRAMPOLINE_P(/**/)
341 333
342 334 #define DBG_INTERRUPT_TRAMPOLINE \
343 335 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13))
344 336
345 337 /*
346 338 * These labels (_start and _end) are used by trap.c to determine if
347 339 * we took an interrupt like an NMI during the return process.
348 340 */
349 341 .global tr_sysc_ret_start
350 342 tr_sysc_ret_start:
351 343
352 344 /*
353 345 * Syscall return trampolines.
354 346 *
355 347 * These are expected to be called on the kernel %gs. tr_sysret[ql] are
356 348 * called after %rsp is changed back to the user value, so we have no
357 349 * stack to work with. tr_sysexit has a kernel stack (but has to
358 350 * preserve rflags, soooo).
359 351 */
360 352 ENTRY_NP(tr_sysretq)
361 353 cmpq $1, kpti_enable
362 354 jne 1f
363 355
364 356 mov %r13, %gs:CPU_KPTI_R13
365 357 SET_USER_CR3(%r13)
366 358 mov %gs:CPU_KPTI_R13, %r13
367 359 /* Zero these to make sure they didn't leak from a kernel trap */
368 360 movq $0, %gs:CPU_KPTI_R13
369 361 movq $0, %gs:CPU_KPTI_R14
370 362 1:
371 363 swapgs
372 364 sysretq
373 365 SET_SIZE(tr_sysretq)
374 366
375 367 ENTRY_NP(tr_sysretl)
376 368 cmpq $1, kpti_enable
377 369 jne 1f
378 370
379 371 mov %r13, %gs:CPU_KPTI_R13
380 372 SET_USER_CR3(%r13)
381 373 mov %gs:CPU_KPTI_R13, %r13
382 374 /* Zero these to make sure they didn't leak from a kernel trap */
383 375 movq $0, %gs:CPU_KPTI_R13
384 376 movq $0, %gs:CPU_KPTI_R14
385 377 1:
386 378 SWAPGS
387 379 SYSRETL
388 380 SET_SIZE(tr_sysretl)
389 381
390 382 ENTRY_NP(tr_sysexit)
391 383 /*
392 384 * Note: we want to preserve RFLAGS across this branch, since sysexit
393 385 * (unlike sysret above) does not restore RFLAGS for us.
394 386 *
395 387 * We still have the real kernel stack (sysexit does restore that), so
396 388 * we can use pushfq/popfq.
397 389 */
398 390 pushfq
399 391
400 392 cmpq $1, kpti_enable
401 393 jne 1f
402 394
403 395 /* Have to pop it back off now before we change %cr3! */
404 396 popfq
405 397 mov %r13, %gs:CPU_KPTI_R13
406 398 SET_USER_CR3(%r13)
407 399 mov %gs:CPU_KPTI_R13, %r13
408 400 /* Zero these to make sure they didn't leak from a kernel trap */
409 401 movq $0, %gs:CPU_KPTI_R13
410 402 movq $0, %gs:CPU_KPTI_R14
411 403 jmp 2f
412 404 1:
413 405 popfq
414 406 2:
415 407 swapgs
416 408 sti
417 409 sysexit
418 410 SET_SIZE(tr_sysexit)
419 411
420 412 .global tr_sysc_ret_end
421 413 tr_sysc_ret_end:
422 414
423 415 /*
424 416 * Syscall entry trampolines.
425 417 */
426 418
427 419 #if DEBUG
428 420 #define MK_SYSCALL_TRAMPOLINE(isr) \
429 421 ENTRY_NP(tr_/**/isr); \
430 422 swapgs; \
431 423 mov %r13, %gs:CPU_KPTI_R13; \
432 424 mov %cr3, %r13; \
433 425 mov %r13, %gs:CPU_KPTI_TR_CR3; \
434 426 mov %gs:CPU_KPTI_KCR3, %r13; \
435 427 mov %r13, %cr3; \
436 428 mov %gs:CPU_KPTI_R13, %r13; \
437 429 swapgs; \
438 430 jmp isr; \
439 431 SET_SIZE(tr_/**/isr)
440 432 #else
441 433 #define MK_SYSCALL_TRAMPOLINE(isr) \
442 434 ENTRY_NP(tr_/**/isr); \
443 435 swapgs; \
444 436 mov %r13, %gs:CPU_KPTI_R13; \
445 437 mov %gs:CPU_KPTI_KCR3, %r13; \
446 438 mov %r13, %cr3; \
447 439 mov %gs:CPU_KPTI_R13, %r13; \
448 440 swapgs; \
449 441 jmp isr; \
450 442 SET_SIZE(tr_/**/isr)
451 443 #endif
452 444
453 445 MK_SYSCALL_TRAMPOLINE(sys_syscall)
454 446 MK_SYSCALL_TRAMPOLINE(sys_syscall32)
455 447 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall)
456 448 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32)
457 449
458 450 /*
459 451 * SYSENTER is special. The CPU is really not very helpful when it
460 452 * comes to preserving and restoring state with it, and as a result
461 453 * we have to do all of it by hand. So, since we want to preserve
462 454 * RFLAGS, we have to be very careful in these trampolines to not
463 455 * clobber any bits in it. That means no cmpqs or branches!
464 456 */
465 457 ENTRY_NP(tr_sys_sysenter)
466 458 swapgs
467 459 mov %r13, %gs:CPU_KPTI_R13
468 460 #if DEBUG
469 461 mov %cr3, %r13
470 462 mov %r13, %gs:CPU_KPTI_TR_CR3
471 463 #endif
472 464 mov %gs:CPU_KPTI_KCR3, %r13
473 465 mov %r13, %cr3
474 466 mov %gs:CPU_KPTI_R13, %r13
475 467 jmp _sys_sysenter_post_swapgs
476 468 SET_SIZE(tr_sys_sysenter)
477 469
478 470 ENTRY_NP(tr_brand_sys_sysenter)
479 471 swapgs
480 472 mov %r13, %gs:CPU_KPTI_R13
481 473 #if DEBUG
482 474 mov %cr3, %r13
483 475 mov %r13, %gs:CPU_KPTI_TR_CR3
484 476 #endif
485 477 mov %gs:CPU_KPTI_KCR3, %r13
486 478 mov %r13, %cr3
487 479 mov %gs:CPU_KPTI_R13, %r13
488 480 jmp _brand_sys_sysenter_post_swapgs
489 481 SET_SIZE(tr_brand_sys_sysenter)
490 482
491 483 #define MK_SYSCALL_INT_TRAMPOLINE(isr) \
492 484 ENTRY_NP(tr_/**/isr); \
493 485 swapgs; \
494 486 mov %r13, %gs:CPU_KPTI_R13; \
495 487 SET_KERNEL_CR3(%r13); \
496 488 mov %gs:CPU_THREAD, %r13; \
497 489 mov T_STACK(%r13), %r13; \
498 490 addq $REGSIZE+MINFRAME, %r13; \
499 491 mov %r13, %rsp; \
500 492 pushq %gs:CPU_KPTI_SS; \
501 493 pushq %gs:CPU_KPTI_RSP; \
502 494 pushq %gs:CPU_KPTI_RFLAGS; \
503 495 pushq %gs:CPU_KPTI_CS; \
504 496 pushq %gs:CPU_KPTI_RIP; \
505 497 mov %gs:CPU_KPTI_R13, %r13; \
506 498 swapgs; \
507 499 jmp isr; \
508 500 SET_SIZE(tr_/**/isr)
509 501
510 502 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int)
511 503 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int)
512 504
513 505 /*
514 506 * Interrupt/trap return trampolines
515 507 */
516 508
517 509 .global tr_intr_ret_start
518 510 tr_intr_ret_start:
519 511
520 512 ENTRY_NP(tr_iret_auto)
521 513 cmpq $1, kpti_enable
522 514 jne tr_iret_kernel
523 515 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp)
524 516 je tr_iret_kernel
525 517 jmp tr_iret_user
526 518 SET_SIZE(tr_iret_auto)
527 519
528 520 ENTRY_NP(tr_iret_kernel)
529 521 /*
530 522 * Yes, this does nothing extra. But this way we know if we see iret
531 523 * elsewhere, then we've failed to properly consider trampolines there.
532 524 */
533 525 iretq
534 526 SET_SIZE(tr_iret_kernel)
535 527
536 528 ENTRY_NP(tr_iret_user)
537 529 #if DEBUG
538 530 /*
539 531 * Panic if we find CR0.TS set. We're still on the kernel stack and
540 532 * %cr3, but we do need to swap back to the kernel gs. (We don't worry
541 533 * about swapgs speculation here.)
542 534 */
543 535 pushq %rax
544 536 mov %cr0, %rax
545 537 testq $CR0_TS, %rax
546 538 jz 1f
547 539 swapgs
548 540 popq %rax
549 541 leaq _bad_ts_panic_msg(%rip), %rdi
550 542 xorl %eax, %eax
551 543 pushq %rbp
552 544 movq %rsp, %rbp
553 545 call panic
554 546 1:
555 547 popq %rax
556 548 #endif
557 549
558 550 cmpq $1, kpti_enable
559 551 jne 1f
560 552
561 553 /*
562 554 * KPTI enabled: we're on the user gsbase at this point, so we
563 555 * need to swap back so we can pivot stacks.
564 556 *
565 557 * The swapgs lfence mitigation is probably not needed here
566 558 * since a mis-speculation of the above branch would imply KPTI
567 559 * is disabled, but we'll do so anyway.
568 560 */
569 561 swapgs
570 562 lfence
571 563 mov %r13, %gs:CPU_KPTI_R13
572 564 PIVOT_KPTI_STK(%r13)
573 565 SET_USER_CR3(%r13)
574 566 mov %gs:CPU_KPTI_R13, %r13
575 567 /* Zero these to make sure they didn't leak from a kernel trap. */
576 568 movq $0, %gs:CPU_KPTI_R13
577 569 movq $0, %gs:CPU_KPTI_R14
578 570 /* And back to user gsbase again. */
579 571 swapgs
580 572 1:
581 573 iretq
582 574 SET_SIZE(tr_iret_user)
583 575
584 576 /*
585 577 * This special return trampoline is for KDI's use only (with kmdb).
586 578 *
587 579 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR
588 580 * instead. This trampoline runs after GSBASE has already been changed
589 581 * back to the userland value (so we can't use %gs).
590 582 *
591 583 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13.
592 584 * The KPTI_R13 member in the kpti_dbg has already been set to what the
593 585 * real %r13 should be before we IRET.
594 586 *
595 587 * Additionally, KDI keeps a copy of the incoming %cr3 value when it
596 588 * took an interrupt, and has put that back in the kpti_dbg area for us
597 589 * to use, so we don't do any sniffing of %cs here. This is important
598 590 * so that debugging code that changes %cr3 is possible.
599 591 */
600 592 ENTRY_NP(tr_iret_kdi)
601 593 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */
602 594
603 595 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */
604 596 leaq KPTI_TOP(%r13), %rsp
605 597 pushq T_FRAMERET_SS(%r14)
606 598 pushq T_FRAMERET_RSP(%r14)
607 599 pushq T_FRAMERET_RFLAGS(%r14)
608 600 pushq T_FRAMERET_CS(%r14)
609 601 pushq T_FRAMERET_RIP(%r14)
610 602
611 603 movq KPTI_TR_CR3(%r13), %r14
612 604 movq %r14, %cr3
613 605
614 606 movq KPTI_R14(%r13), %r14
615 607 movq KPTI_R13(%r13), %r13 /* preserved by our caller */
616 608
617 609 iretq
618 610 SET_SIZE(tr_iret_kdi)
619 611
620 612 .global tr_intr_ret_end
621 613 tr_intr_ret_end:
622 614
623 615 /*
624 616 * Interrupt/trap entry trampolines
625 617 */
626 618
627 619 /* CPU pushed an error code, and ISR wants one */
628 620 #define MK_INTR_TRAMPOLINE(isr) \
629 621 ENTRY_NP(tr_/**/isr); \
630 622 INTERRUPT_TRAMPOLINE; \
631 623 jmp isr; \
632 624 SET_SIZE(tr_/**/isr)
633 625
634 626 /* CPU didn't push an error code, and ISR doesn't want one */
635 627 #define MK_INTR_TRAMPOLINE_NOERR(isr) \
636 628 ENTRY_NP(tr_/**/isr); \
637 629 push $0; \
638 630 INTERRUPT_TRAMPOLINE_NOERR; \
639 631 jmp isr; \
640 632 SET_SIZE(tr_/**/isr)
641 633
642 634 /* CPU pushed an error code, and ISR wants one */
643 635 #define MK_DBG_INTR_TRAMPOLINE(isr) \
644 636 ENTRY_NP(tr_/**/isr); \
645 637 DBG_INTERRUPT_TRAMPOLINE; \
646 638 jmp isr; \
647 639 SET_SIZE(tr_/**/isr)
648 640
649 641 /* CPU didn't push an error code, and ISR doesn't want one */
650 642 #define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \
651 643 ENTRY_NP(tr_/**/isr); \
652 644 push $0; \
653 645 DBG_INTERRUPT_TRAMPOLINE_NOERR; \
654 646 jmp isr; \
655 647 SET_SIZE(tr_/**/isr)
656 648
657 649
658 650 MK_INTR_TRAMPOLINE_NOERR(div0trap)
659 651 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap)
660 652 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap)
661 653 MK_INTR_TRAMPOLINE_NOERR(ovflotrap)
662 654 MK_INTR_TRAMPOLINE_NOERR(boundstrap)
663 655 MK_INTR_TRAMPOLINE_NOERR(invoptrap)
664 656 MK_INTR_TRAMPOLINE_NOERR(ndptrap)
665 657 MK_INTR_TRAMPOLINE(invtsstrap)
666 658 MK_DBG_INTR_TRAMPOLINE(segnptrap)
667 659 MK_DBG_INTR_TRAMPOLINE(stktrap)
668 660 MK_DBG_INTR_TRAMPOLINE(gptrap)
669 661 MK_DBG_INTR_TRAMPOLINE(pftrap)
670 662 MK_INTR_TRAMPOLINE_NOERR(resvtrap)
671 663 MK_INTR_TRAMPOLINE_NOERR(ndperr)
672 664 MK_INTR_TRAMPOLINE(achktrap)
673 665 MK_INTR_TRAMPOLINE_NOERR(xmtrap)
674 666 MK_INTR_TRAMPOLINE_NOERR(invaltrap)
675 667 MK_INTR_TRAMPOLINE_NOERR(fasttrap)
676 668 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret)
677 669
678 670 /*
679 671 * These are special because they can interrupt other traps, and
680 672 * each other. We don't need to pivot their stacks, because they have
681 673 * dedicated IST stack space, but we need to change %cr3.
682 674 */
683 675 ENTRY_NP(tr_nmiint)
684 676 pushq %r13
685 677 mov kpti_safe_cr3, %r13
686 678 mov %r13, %cr3
687 679 popq %r13
688 680 jmp nmiint
689 681 SET_SIZE(tr_nmiint)
690 682
691 683 #if !defined(__xpv)
692 684 ENTRY_NP(tr_syserrtrap)
693 685 /*
694 686 * If we got here we should always have a zero error code pushed.
695 687 * The INT $0x8 instr doesn't seem to push one, though, which we use
696 688 * as an emergency panic in the other trampolines. So adjust things
697 689 * here.
698 690 */
699 691 cmpq $0, (%rsp)
700 692 je 1f
701 693 pushq $0
702 694 1:
703 695 pushq %r13
704 696 mov kpti_safe_cr3, %r13
705 697 mov %r13, %cr3
706 698 popq %r13
707 699 jmp syserrtrap
708 700 SET_SIZE(tr_syserrtrap)
709 701 #endif
710 702
711 703 ENTRY_NP(tr_mcetrap)
712 704 pushq %r13
713 705 mov kpti_safe_cr3, %r13
714 706 mov %r13, %cr3
715 707 popq %r13
716 708 jmp mcetrap
717 709 SET_SIZE(tr_mcetrap)
718 710
719 711 /*
720 712 * Interrupts start at 32
721 713 */
722 714 #define MKIVCT(n) \
723 715 ENTRY_NP(tr_ivct/**/n) \
724 716 push $0; \
725 717 INTERRUPT_TRAMPOLINE; \
726 718 push $n - 0x20; \
727 719 jmp cmnint; \
728 720 SET_SIZE(tr_ivct/**/n)
729 721
730 722 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35);
731 723 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39);
732 724 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43);
733 725 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47);
734 726 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51);
735 727 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55);
736 728 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59);
737 729 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63);
738 730 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67);
739 731 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71);
740 732 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75);
741 733 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79);
742 734 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83);
743 735 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87);
744 736 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91);
745 737 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95);
746 738 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99);
747 739 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103);
748 740 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107);
749 741 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111);
750 742 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115);
751 743 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119);
752 744 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123);
753 745 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127);
754 746 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131);
755 747 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135);
756 748 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139);
757 749 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143);
758 750 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147);
759 751 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151);
760 752 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155);
761 753 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159);
762 754 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163);
763 755 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167);
764 756 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171);
765 757 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175);
766 758 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179);
767 759 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183);
768 760 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187);
769 761 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191);
770 762 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195);
771 763 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199);
772 764 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203);
773 765 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207);
774 766 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211);
775 767 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215);
776 768 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219);
777 769 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223);
778 770 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227);
779 771 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231);
780 772 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235);
781 773 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239);
782 774 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243);
783 775 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247);
784 776 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251);
785 777 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255);
786 778
787 779 /*
788 780 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a
789 781 * PCID other than the current one, then, is to load its cr3 then
790 782 * invlpg. But loading kf_user_cr3 means we can longer access our
791 783 * caller's text mapping (or indeed, its stack). So this little helper
792 784 * has to live within our trampoline text region.
793 785 *
794 786 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3)
795 787 */
796 788 ENTRY_NP(tr_mmu_flush_user_range)
797 789 push %rbx
798 790 /* When we read cr3, it never has the NOINVL bit set. */
799 791 mov %cr3, %rax
800 792 movq $CR3_NOINVL_BIT, %rbx
801 793 orq %rbx, %rax
802 794
803 795 mov %rcx, %cr3
804 796 add %rdi, %rsi
805 797 .align ASM_ENTRY_ALIGN
806 798 1:
807 799 invlpg (%rdi)
808 800 add %rdx, %rdi
809 801 cmp %rsi, %rdi
810 802 jb 1b
↓ open down ↓ |
680 lines elided |
↑ open up ↑ |
811 803 mov %rax, %cr3
812 804 pop %rbx
813 805 retq
814 806 SET_SIZE(tr_mmu_flush_user_range)
815 807
816 808 .align MMU_PAGESIZE
817 809 .global kpti_tramp_end
818 810 kpti_tramp_end:
819 811 nop
820 812
821 -#endif /* __lint */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX