de-linting of .s files
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11 /*
12 * Copyright 2019 Joyent, Inc.
13 */
14
15 /*
16 * This file contains the trampolines that are used by KPTI in order to be
17 * able to take interrupts/trap/etc while on the "user" page table.
18 *
19 * We don't map the full kernel text into the user page table: instead we
20 * map this one small section of trampolines (which compiles to ~13 pages).
21 * These trampolines are set in the IDT always (so they will run no matter
22 * whether we're on the kernel or user page table), and their primary job is to
23 * pivot us to the kernel %cr3 and %rsp without ruining everything.
24 *
25 * All of these interrupts use the amd64 IST feature when we have KPTI enabled,
26 * meaning that they will execute with their %rsp set to a known location, even
27 * if we take them in the kernel.
28 *
29 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST
30 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti
31 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be
32 * page-aligned, and we map the page it's on into both page tables. Using a
33 * struct attached to the cpu_t also means that we can use %rsp-relative
34 * addressing to find anything on the cpu_t, so we don't have to touch %gs or
35 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy).
36 *
37 * This little struct is where the CPU will push the actual interrupt frame.
38 * Then, in the trampoline, we change %cr3, then figure out our destination
39 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt
40 * frame). Then we jump to the regular ISR in the kernel text and carry on as
41 * normal.
42 *
43 * We leave the original frame and any spilled regs behind in the kpti_frame
44 * lazily until we want to return to userland. Then, we clear any spilled
45 * regs from it, and overwrite the rest with our iret frame. When switching
46 * this cpu to a different process (in hat_switch), we bzero the whole region to
47 * make sure nothing can leak between processes.
48 *
49 * When we're returning back to the original place we took the interrupt later
50 * (especially if it was in userland), we have to jmp back to the "return
51 * trampolines" here, since when we set %cr3 back to the user value, we need to
52 * be executing from code here in these shared pages and not the main kernel
53 * text again. Even though it should be fine to iret directly from kernel text
54 * when returning to kernel code, we make things jmp to a trampoline here just
55 * for consistency.
56 *
57 * Note that with IST, it's very important that we always must have pivoted
58 * away from the IST stack before we could possibly take any other interrupt
59 * on the same IST (unless it's an end-of-the-world fault and we don't care
60 * about coming back from it ever).
61 *
62 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they
63 * regularly have to happen from within trampoline code (e.g. in the sysenter
64 * single-step case) and then return to the world normally. As a result, these
65 * two are IST'd to their own kpti_frame right above the normal one (in the same
66 * page), so they don't clobber their parent interrupt.
67 *
68 * To aid with debugging, we also IST the page fault (#PF/pftrap), general
69 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to
70 * their own separate kpti_frame. This ensures that if we take one of these
71 * due to a bug in trampoline code, we preserve the original trampoline
72 * state that caused the trap.
73 *
74 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST
75 * stacks, since they can interrupt another ISR at any time. These stacks are
76 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in
77 * their trampolines (and do it unconditionally), and don't bother pivoting
78 * away. We're either going into the panic() path, or we're going to return
79 * straight away without rescheduling, so it's fine to not be on our real
80 * kthread stack (and some of the state we want to go find it with might be
81 * corrupt!)
82 *
83 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a
84 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to
85 * point at the PML4 for kas early in boot and never touch it again. Hopefully
86 * it survives whatever corruption brings down the rest of the kernel!
87 *
88 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
89 * cases) in that they do not push an interrupt frame (and also have some other
90 * effects). In the syscall trampolines, we assume that we can only be taking
91 * the call from userland and use swapgs and an unconditional overwrite of %cr3.
92 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
93 * existing %rsp pivot untouched) -- instead we spill registers into
94 * %gs:CPU_KPTI_* as we need to.
95 *
96 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see
97 * hat_switch().
98 */
99
100 /*
101 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you
102 * fix bugs here check to see if they should be fixed there as well.
103 */
104
105 #include <sys/asm_linkage.h>
106 #include <sys/asm_misc.h>
107 #include <sys/regset.h>
108 #include <sys/privregs.h>
109 #include <sys/psw.h>
110 #include <sys/machbrand.h>
111 #include <sys/param.h>
112
113 #include <sys/segments.h>
114 #include <sys/pcb.h>
115 #include <sys/trap.h>
116 #include <sys/ftrace.h>
117 #include <sys/traptrace.h>
118 #include <sys/clock.h>
119 #include <sys/model.h>
120 #include <sys/panic.h>
121
122 #if defined(__xpv)
123 #include <sys/hypervisor.h>
124 #endif
125
126 #include "assym.h"
127
128 .data
129 DGDEF3(kpti_enable, 8, 8)
130 .fill 1, 8, 1
131
132 #if DEBUG
133 .data
134 _bad_ts_panic_msg:
135 .string "kpti_trampolines.s: tr_iret_user but CR0.TS set"
136 #endif
137
138 .section ".text";
139 .align MMU_PAGESIZE
140
141 .global kpti_tramp_start
142 kpti_tramp_start:
143 nop
144
145 /* This will be set by mlsetup, and then double-checked later */
146 .global kpti_safe_cr3
147 kpti_safe_cr3:
148 .quad 0
149 SET_SIZE(kpti_safe_cr3)
150
151 /* startup_kmem() will overwrite this */
152 .global kpti_kbase
153 kpti_kbase:
154 .quad KERNELBASE
155 SET_SIZE(kpti_kbase)
156
157 #define SET_KERNEL_CR3(spillreg) \
158 mov %cr3, spillreg; \
159 mov spillreg, %gs:CPU_KPTI_TR_CR3; \
160 mov %gs:CPU_KPTI_KCR3, spillreg; \
161 cmp $0, spillreg; \
162 je 2f; \
163 mov spillreg, %cr3; \
164 2:
165
166 #if DEBUG
167 #define SET_USER_CR3(spillreg) \
168 mov %cr3, spillreg; \
169 mov spillreg, %gs:CPU_KPTI_TR_CR3; \
170 mov %gs:CPU_KPTI_UCR3, spillreg; \
171 mov spillreg, %cr3
172 #else
173 #define SET_USER_CR3(spillreg) \
174 mov %gs:CPU_KPTI_UCR3, spillreg; \
175 mov spillreg, %cr3
176 #endif
177
178 #define PIVOT_KPTI_STK(spillreg) \
179 mov %rsp, spillreg; \
180 mov %gs:CPU_KPTI_RET_RSP, %rsp; \
181 pushq T_FRAMERET_SS(spillreg); \
182 pushq T_FRAMERET_RSP(spillreg); \
183 pushq T_FRAMERET_RFLAGS(spillreg); \
184 pushq T_FRAMERET_CS(spillreg); \
185 pushq T_FRAMERET_RIP(spillreg)
186
187
188 #define INTERRUPT_TRAMPOLINE_P(errpush) \
189 pushq %r13; \
190 pushq %r14; \
191 subq $KPTI_R14, %rsp; \
192 /* Save current %cr3. */ \
193 mov %cr3, %r14; \
194 mov %r14, KPTI_TR_CR3(%rsp); \
195 \
196 cmpw $KCS_SEL, KPTI_CS(%rsp); \
197 je 3f; \
198 1: \
199 /* Change to the "kernel" %cr3 */ \
200 mov KPTI_KCR3(%rsp), %r14; \
201 cmp $0, %r14; \
202 je 2f; \
203 mov %r14, %cr3; \
204 2: \
205 /* Get our cpu_t in %r13 */ \
206 mov %rsp, %r13; \
207 and $(~(MMU_PAGESIZE - 1)), %r13; \
208 subq $CPU_KPTI_START, %r13; \
209 /* Use top of the kthread stk */ \
210 mov CPU_THREAD(%r13), %r14; \
211 mov T_STACK(%r14), %r14; \
212 addq $REGSIZE+MINFRAME, %r14; \
213 jmp 4f; \
214 3: \
215 /* Check the %rsp in the frame. */ \
216 /* Is it above kernel base? */ \
217 mov kpti_kbase, %r14; \
218 cmp %r14, KPTI_RSP(%rsp); \
219 jb 1b; \
220 /* Use the %rsp from the trap frame */ \
221 mov KPTI_RSP(%rsp), %r14; \
222 and $(~0xf), %r14; \
223 4: \
224 mov %rsp, %r13; \
225 /* %r14 contains our destination stk */ \
226 mov %r14, %rsp; \
227 pushq KPTI_SS(%r13); \
228 pushq KPTI_RSP(%r13); \
229 pushq KPTI_RFLAGS(%r13); \
230 pushq KPTI_CS(%r13); \
231 pushq KPTI_RIP(%r13); \
232 errpush; \
233 mov KPTI_R14(%r13), %r14; \
234 mov KPTI_R13(%r13), %r13
235
236 #define INTERRUPT_TRAMPOLINE_NOERR \
237 INTERRUPT_TRAMPOLINE_P(/**/)
238
239 #define INTERRUPT_TRAMPOLINE \
240 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13))
241
242 /*
243 * This is used for all interrupts that can plausibly be taken inside another
244 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS).
245 *
246 * We also use this for #NP, even though it uses the standard IST: the
247 * additional %rsp checks below will catch when we get an exception doing an
248 * iret to userspace with a bad %cs/%ss. This appears as a kernel trap, and
249 * only later gets redirected via kern_gpfault().
250 *
251 * We check for whether we took the interrupt while in another trampoline, in
252 * which case we need to use the kthread stack.
253 */
254 #define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \
255 pushq %r13; \
256 pushq %r14; \
257 subq $KPTI_R14, %rsp; \
258 /* Check for clobbering */ \
259 cmp $0, KPTI_FLAG(%rsp); \
260 je 1f; \
261 /* Don't worry, this totally works */ \
262 int $8; \
263 1: \
264 movq $1, KPTI_FLAG(%rsp); \
265 /* Save current %cr3. */ \
266 mov %cr3, %r14; \
267 mov %r14, KPTI_TR_CR3(%rsp); \
268 \
269 cmpw $KCS_SEL, KPTI_CS(%rsp); \
270 je 4f; \
271 2: \
272 /* Change to the "kernel" %cr3 */ \
273 mov KPTI_KCR3(%rsp), %r14; \
274 cmp $0, %r14; \
275 je 3f; \
276 mov %r14, %cr3; \
277 3: \
278 /* Get our cpu_t in %r13 */ \
279 mov %rsp, %r13; \
280 and $(~(MMU_PAGESIZE - 1)), %r13; \
281 subq $CPU_KPTI_START, %r13; \
282 /* Use top of the kthread stk */ \
283 mov CPU_THREAD(%r13), %r14; \
284 mov T_STACK(%r14), %r14; \
285 addq $REGSIZE+MINFRAME, %r14; \
286 jmp 6f; \
287 4: \
288 /* Check the %rsp in the frame. */ \
289 /* Is it above kernel base? */ \
290 /* If not, treat as user. */ \
291 mov kpti_kbase, %r14; \
292 cmp %r14, KPTI_RSP(%rsp); \
293 jb 2b; \
294 /* Is it within the kpti_frame page? */ \
295 /* If it is, treat as user interrupt */ \
296 mov %rsp, %r13; \
297 and $(~(MMU_PAGESIZE - 1)), %r13; \
298 mov KPTI_RSP(%rsp), %r14; \
299 and $(~(MMU_PAGESIZE - 1)), %r14; \
300 cmp %r13, %r14; \
301 je 2b; \
302 /* Were we in trampoline code? */ \
303 leaq kpti_tramp_start, %r14; \
304 cmp %r14, KPTI_RIP(%rsp); \
305 jb 5f; \
306 leaq kpti_tramp_end, %r14; \
307 cmp %r14, KPTI_RIP(%rsp); \
308 ja 5f; \
309 /* If we were, change %cr3: we might */ \
310 /* have interrupted before it did. */ \
311 mov KPTI_KCR3(%rsp), %r14; \
312 mov %r14, %cr3; \
313 5: \
314 /* Use the %rsp from the trap frame */ \
315 mov KPTI_RSP(%rsp), %r14; \
316 and $(~0xf), %r14; \
317 6: \
318 mov %rsp, %r13; \
319 /* %r14 contains our destination stk */ \
320 mov %r14, %rsp; \
321 pushq KPTI_SS(%r13); \
322 pushq KPTI_RSP(%r13); \
323 pushq KPTI_RFLAGS(%r13); \
324 pushq KPTI_CS(%r13); \
325 pushq KPTI_RIP(%r13); \
326 errpush; \
327 mov KPTI_R14(%r13), %r14; \
328 movq $0, KPTI_FLAG(%r13); \
329 mov KPTI_R13(%r13), %r13
330
331 #define DBG_INTERRUPT_TRAMPOLINE_NOERR \
332 DBG_INTERRUPT_TRAMPOLINE_P(/**/)
333
334 #define DBG_INTERRUPT_TRAMPOLINE \
335 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13))
336
337 /*
338 * These labels (_start and _end) are used by trap.c to determine if
339 * we took an interrupt like an NMI during the return process.
340 */
341 .global tr_sysc_ret_start
342 tr_sysc_ret_start:
343
344 /*
345 * Syscall return trampolines.
346 *
347 * These are expected to be called on the kernel %gs. tr_sysret[ql] are
348 * called after %rsp is changed back to the user value, so we have no
349 * stack to work with. tr_sysexit has a kernel stack (but has to
350 * preserve rflags, soooo).
351 */
352 ENTRY_NP(tr_sysretq)
353 cmpq $1, kpti_enable
354 jne 1f
355
356 mov %r13, %gs:CPU_KPTI_R13
357 SET_USER_CR3(%r13)
358 mov %gs:CPU_KPTI_R13, %r13
359 /* Zero these to make sure they didn't leak from a kernel trap */
360 movq $0, %gs:CPU_KPTI_R13
361 movq $0, %gs:CPU_KPTI_R14
362 1:
363 swapgs
364 sysretq
365 SET_SIZE(tr_sysretq)
366
367 ENTRY_NP(tr_sysretl)
368 cmpq $1, kpti_enable
369 jne 1f
370
371 mov %r13, %gs:CPU_KPTI_R13
372 SET_USER_CR3(%r13)
373 mov %gs:CPU_KPTI_R13, %r13
374 /* Zero these to make sure they didn't leak from a kernel trap */
375 movq $0, %gs:CPU_KPTI_R13
376 movq $0, %gs:CPU_KPTI_R14
377 1:
378 SWAPGS
379 SYSRETL
380 SET_SIZE(tr_sysretl)
381
382 ENTRY_NP(tr_sysexit)
383 /*
384 * Note: we want to preserve RFLAGS across this branch, since sysexit
385 * (unlike sysret above) does not restore RFLAGS for us.
386 *
387 * We still have the real kernel stack (sysexit does restore that), so
388 * we can use pushfq/popfq.
389 */
390 pushfq
391
392 cmpq $1, kpti_enable
393 jne 1f
394
395 /* Have to pop it back off now before we change %cr3! */
396 popfq
397 mov %r13, %gs:CPU_KPTI_R13
398 SET_USER_CR3(%r13)
399 mov %gs:CPU_KPTI_R13, %r13
400 /* Zero these to make sure they didn't leak from a kernel trap */
401 movq $0, %gs:CPU_KPTI_R13
402 movq $0, %gs:CPU_KPTI_R14
403 jmp 2f
404 1:
405 popfq
406 2:
407 swapgs
408 sti
409 sysexit
410 SET_SIZE(tr_sysexit)
411
412 .global tr_sysc_ret_end
413 tr_sysc_ret_end:
414
415 /*
416 * Syscall entry trampolines.
417 */
418
419 #if DEBUG
420 #define MK_SYSCALL_TRAMPOLINE(isr) \
421 ENTRY_NP(tr_/**/isr); \
422 swapgs; \
423 mov %r13, %gs:CPU_KPTI_R13; \
424 mov %cr3, %r13; \
425 mov %r13, %gs:CPU_KPTI_TR_CR3; \
426 mov %gs:CPU_KPTI_KCR3, %r13; \
427 mov %r13, %cr3; \
428 mov %gs:CPU_KPTI_R13, %r13; \
429 swapgs; \
430 jmp isr; \
431 SET_SIZE(tr_/**/isr)
432 #else
433 #define MK_SYSCALL_TRAMPOLINE(isr) \
434 ENTRY_NP(tr_/**/isr); \
435 swapgs; \
436 mov %r13, %gs:CPU_KPTI_R13; \
437 mov %gs:CPU_KPTI_KCR3, %r13; \
438 mov %r13, %cr3; \
439 mov %gs:CPU_KPTI_R13, %r13; \
440 swapgs; \
441 jmp isr; \
442 SET_SIZE(tr_/**/isr)
443 #endif
444
445 MK_SYSCALL_TRAMPOLINE(sys_syscall)
446 MK_SYSCALL_TRAMPOLINE(sys_syscall32)
447 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall)
448 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32)
449
450 /*
451 * SYSENTER is special. The CPU is really not very helpful when it
452 * comes to preserving and restoring state with it, and as a result
453 * we have to do all of it by hand. So, since we want to preserve
454 * RFLAGS, we have to be very careful in these trampolines to not
455 * clobber any bits in it. That means no cmpqs or branches!
456 */
457 ENTRY_NP(tr_sys_sysenter)
458 swapgs
459 mov %r13, %gs:CPU_KPTI_R13
460 #if DEBUG
461 mov %cr3, %r13
462 mov %r13, %gs:CPU_KPTI_TR_CR3
463 #endif
464 mov %gs:CPU_KPTI_KCR3, %r13
465 mov %r13, %cr3
466 mov %gs:CPU_KPTI_R13, %r13
467 jmp _sys_sysenter_post_swapgs
468 SET_SIZE(tr_sys_sysenter)
469
470 ENTRY_NP(tr_brand_sys_sysenter)
471 swapgs
472 mov %r13, %gs:CPU_KPTI_R13
473 #if DEBUG
474 mov %cr3, %r13
475 mov %r13, %gs:CPU_KPTI_TR_CR3
476 #endif
477 mov %gs:CPU_KPTI_KCR3, %r13
478 mov %r13, %cr3
479 mov %gs:CPU_KPTI_R13, %r13
480 jmp _brand_sys_sysenter_post_swapgs
481 SET_SIZE(tr_brand_sys_sysenter)
482
483 #define MK_SYSCALL_INT_TRAMPOLINE(isr) \
484 ENTRY_NP(tr_/**/isr); \
485 swapgs; \
486 mov %r13, %gs:CPU_KPTI_R13; \
487 SET_KERNEL_CR3(%r13); \
488 mov %gs:CPU_THREAD, %r13; \
489 mov T_STACK(%r13), %r13; \
490 addq $REGSIZE+MINFRAME, %r13; \
491 mov %r13, %rsp; \
492 pushq %gs:CPU_KPTI_SS; \
493 pushq %gs:CPU_KPTI_RSP; \
494 pushq %gs:CPU_KPTI_RFLAGS; \
495 pushq %gs:CPU_KPTI_CS; \
496 pushq %gs:CPU_KPTI_RIP; \
497 mov %gs:CPU_KPTI_R13, %r13; \
498 swapgs; \
499 jmp isr; \
500 SET_SIZE(tr_/**/isr)
501
502 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int)
503 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int)
504
505 /*
506 * Interrupt/trap return trampolines
507 */
508
509 .global tr_intr_ret_start
510 tr_intr_ret_start:
511
512 ENTRY_NP(tr_iret_auto)
513 cmpq $1, kpti_enable
514 jne tr_iret_kernel
515 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp)
516 je tr_iret_kernel
517 jmp tr_iret_user
518 SET_SIZE(tr_iret_auto)
519
520 ENTRY_NP(tr_iret_kernel)
521 /*
522 * Yes, this does nothing extra. But this way we know if we see iret
523 * elsewhere, then we've failed to properly consider trampolines there.
524 */
525 iretq
526 SET_SIZE(tr_iret_kernel)
527
528 ENTRY_NP(tr_iret_user)
529 #if DEBUG
530 /*
531 * Panic if we find CR0.TS set. We're still on the kernel stack and
532 * %cr3, but we do need to swap back to the kernel gs. (We don't worry
533 * about swapgs speculation here.)
534 */
535 pushq %rax
536 mov %cr0, %rax
537 testq $CR0_TS, %rax
538 jz 1f
539 swapgs
540 popq %rax
541 leaq _bad_ts_panic_msg(%rip), %rdi
542 xorl %eax, %eax
543 pushq %rbp
544 movq %rsp, %rbp
545 call panic
546 1:
547 popq %rax
548 #endif
549
550 cmpq $1, kpti_enable
551 jne 1f
552
553 /*
554 * KPTI enabled: we're on the user gsbase at this point, so we
555 * need to swap back so we can pivot stacks.
556 *
557 * The swapgs lfence mitigation is probably not needed here
558 * since a mis-speculation of the above branch would imply KPTI
559 * is disabled, but we'll do so anyway.
560 */
561 swapgs
562 lfence
563 mov %r13, %gs:CPU_KPTI_R13
564 PIVOT_KPTI_STK(%r13)
565 SET_USER_CR3(%r13)
566 mov %gs:CPU_KPTI_R13, %r13
567 /* Zero these to make sure they didn't leak from a kernel trap. */
568 movq $0, %gs:CPU_KPTI_R13
569 movq $0, %gs:CPU_KPTI_R14
570 /* And back to user gsbase again. */
571 swapgs
572 1:
573 iretq
574 SET_SIZE(tr_iret_user)
575
576 /*
577 * This special return trampoline is for KDI's use only (with kmdb).
578 *
579 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR
580 * instead. This trampoline runs after GSBASE has already been changed
581 * back to the userland value (so we can't use %gs).
582 *
583 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13.
584 * The KPTI_R13 member in the kpti_dbg has already been set to what the
585 * real %r13 should be before we IRET.
586 *
587 * Additionally, KDI keeps a copy of the incoming %cr3 value when it
588 * took an interrupt, and has put that back in the kpti_dbg area for us
589 * to use, so we don't do any sniffing of %cs here. This is important
590 * so that debugging code that changes %cr3 is possible.
591 */
592 ENTRY_NP(tr_iret_kdi)
593 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */
594
595 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */
596 leaq KPTI_TOP(%r13), %rsp
597 pushq T_FRAMERET_SS(%r14)
598 pushq T_FRAMERET_RSP(%r14)
599 pushq T_FRAMERET_RFLAGS(%r14)
600 pushq T_FRAMERET_CS(%r14)
601 pushq T_FRAMERET_RIP(%r14)
602
603 movq KPTI_TR_CR3(%r13), %r14
604 movq %r14, %cr3
605
606 movq KPTI_R14(%r13), %r14
607 movq KPTI_R13(%r13), %r13 /* preserved by our caller */
608
609 iretq
610 SET_SIZE(tr_iret_kdi)
611
612 .global tr_intr_ret_end
613 tr_intr_ret_end:
614
615 /*
616 * Interrupt/trap entry trampolines
617 */
618
619 /* CPU pushed an error code, and ISR wants one */
620 #define MK_INTR_TRAMPOLINE(isr) \
621 ENTRY_NP(tr_/**/isr); \
622 INTERRUPT_TRAMPOLINE; \
623 jmp isr; \
624 SET_SIZE(tr_/**/isr)
625
626 /* CPU didn't push an error code, and ISR doesn't want one */
627 #define MK_INTR_TRAMPOLINE_NOERR(isr) \
628 ENTRY_NP(tr_/**/isr); \
629 push $0; \
630 INTERRUPT_TRAMPOLINE_NOERR; \
631 jmp isr; \
632 SET_SIZE(tr_/**/isr)
633
634 /* CPU pushed an error code, and ISR wants one */
635 #define MK_DBG_INTR_TRAMPOLINE(isr) \
636 ENTRY_NP(tr_/**/isr); \
637 DBG_INTERRUPT_TRAMPOLINE; \
638 jmp isr; \
639 SET_SIZE(tr_/**/isr)
640
641 /* CPU didn't push an error code, and ISR doesn't want one */
642 #define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \
643 ENTRY_NP(tr_/**/isr); \
644 push $0; \
645 DBG_INTERRUPT_TRAMPOLINE_NOERR; \
646 jmp isr; \
647 SET_SIZE(tr_/**/isr)
648
649
650 MK_INTR_TRAMPOLINE_NOERR(div0trap)
651 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap)
652 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap)
653 MK_INTR_TRAMPOLINE_NOERR(ovflotrap)
654 MK_INTR_TRAMPOLINE_NOERR(boundstrap)
655 MK_INTR_TRAMPOLINE_NOERR(invoptrap)
656 MK_INTR_TRAMPOLINE_NOERR(ndptrap)
657 MK_INTR_TRAMPOLINE(invtsstrap)
658 MK_DBG_INTR_TRAMPOLINE(segnptrap)
659 MK_DBG_INTR_TRAMPOLINE(stktrap)
660 MK_DBG_INTR_TRAMPOLINE(gptrap)
661 MK_DBG_INTR_TRAMPOLINE(pftrap)
662 MK_INTR_TRAMPOLINE_NOERR(resvtrap)
663 MK_INTR_TRAMPOLINE_NOERR(ndperr)
664 MK_INTR_TRAMPOLINE(achktrap)
665 MK_INTR_TRAMPOLINE_NOERR(xmtrap)
666 MK_INTR_TRAMPOLINE_NOERR(invaltrap)
667 MK_INTR_TRAMPOLINE_NOERR(fasttrap)
668 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret)
669
670 /*
671 * These are special because they can interrupt other traps, and
672 * each other. We don't need to pivot their stacks, because they have
673 * dedicated IST stack space, but we need to change %cr3.
674 */
675 ENTRY_NP(tr_nmiint)
676 pushq %r13
677 mov kpti_safe_cr3, %r13
678 mov %r13, %cr3
679 popq %r13
680 jmp nmiint
681 SET_SIZE(tr_nmiint)
682
683 #if !defined(__xpv)
684 ENTRY_NP(tr_syserrtrap)
685 /*
686 * If we got here we should always have a zero error code pushed.
687 * The INT $0x8 instr doesn't seem to push one, though, which we use
688 * as an emergency panic in the other trampolines. So adjust things
689 * here.
690 */
691 cmpq $0, (%rsp)
692 je 1f
693 pushq $0
694 1:
695 pushq %r13
696 mov kpti_safe_cr3, %r13
697 mov %r13, %cr3
698 popq %r13
699 jmp syserrtrap
700 SET_SIZE(tr_syserrtrap)
701 #endif
702
703 ENTRY_NP(tr_mcetrap)
704 pushq %r13
705 mov kpti_safe_cr3, %r13
706 mov %r13, %cr3
707 popq %r13
708 jmp mcetrap
709 SET_SIZE(tr_mcetrap)
710
711 /*
712 * Interrupts start at 32
713 */
714 #define MKIVCT(n) \
715 ENTRY_NP(tr_ivct/**/n) \
716 push $0; \
717 INTERRUPT_TRAMPOLINE; \
718 push $n - 0x20; \
719 jmp cmnint; \
720 SET_SIZE(tr_ivct/**/n)
721
722 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35);
723 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39);
724 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43);
725 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47);
726 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51);
727 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55);
728 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59);
729 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63);
730 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67);
731 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71);
732 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75);
733 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79);
734 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83);
735 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87);
736 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91);
737 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95);
738 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99);
739 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103);
740 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107);
741 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111);
742 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115);
743 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119);
744 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123);
745 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127);
746 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131);
747 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135);
748 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139);
749 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143);
750 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147);
751 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151);
752 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155);
753 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159);
754 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163);
755 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167);
756 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171);
757 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175);
758 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179);
759 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183);
760 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187);
761 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191);
762 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195);
763 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199);
764 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203);
765 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207);
766 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211);
767 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215);
768 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219);
769 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223);
770 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227);
771 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231);
772 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235);
773 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239);
774 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243);
775 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247);
776 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251);
777 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255);
778
779 /*
780 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a
781 * PCID other than the current one, then, is to load its cr3 then
782 * invlpg. But loading kf_user_cr3 means we can longer access our
783 * caller's text mapping (or indeed, its stack). So this little helper
784 * has to live within our trampoline text region.
785 *
786 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3)
787 */
788 ENTRY_NP(tr_mmu_flush_user_range)
789 push %rbx
790 /* When we read cr3, it never has the NOINVL bit set. */
791 mov %cr3, %rax
792 movq $CR3_NOINVL_BIT, %rbx
793 orq %rbx, %rax
794
795 mov %rcx, %cr3
796 add %rdi, %rsi
797 .align ASM_ENTRY_ALIGN
798 1:
799 invlpg (%rdi)
800 add %rdx, %rdi
801 cmp %rsi, %rdi
802 jb 1b
803 mov %rax, %cr3
804 pop %rbx
805 retq
806 SET_SIZE(tr_mmu_flush_user_range)
807
808 .align MMU_PAGESIZE
809 .global kpti_tramp_end
810 kpti_tramp_end:
811 nop
812
--- EOF ---