1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* 12 * Copyright 2019 Joyent, Inc. 13 */ 14 15 /* 16 * This file contains the trampolines that are used by KPTI in order to be 17 * able to take interrupts/trap/etc while on the "user" page table. 18 * 19 * We don't map the full kernel text into the user page table: instead we 20 * map this one small section of trampolines (which compiles to ~13 pages). 21 * These trampolines are set in the IDT always (so they will run no matter 22 * whether we're on the kernel or user page table), and their primary job is to 23 * pivot us to the kernel %cr3 and %rsp without ruining everything. 24 * 25 * All of these interrupts use the amd64 IST feature when we have KPTI enabled, 26 * meaning that they will execute with their %rsp set to a known location, even 27 * if we take them in the kernel. 28 * 29 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST 30 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti 31 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be 32 * page-aligned, and we map the page it's on into both page tables. Using a 33 * struct attached to the cpu_t also means that we can use %rsp-relative 34 * addressing to find anything on the cpu_t, so we don't have to touch %gs or 35 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy). 36 * 37 * This little struct is where the CPU will push the actual interrupt frame. 38 * Then, in the trampoline, we change %cr3, then figure out our destination 39 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt 40 * frame). Then we jump to the regular ISR in the kernel text and carry on as 41 * normal. 42 * 43 * We leave the original frame and any spilled regs behind in the kpti_frame 44 * lazily until we want to return to userland. Then, we clear any spilled 45 * regs from it, and overwrite the rest with our iret frame. When switching 46 * this cpu to a different process (in hat_switch), we bzero the whole region to 47 * make sure nothing can leak between processes. 48 * 49 * When we're returning back to the original place we took the interrupt later 50 * (especially if it was in userland), we have to jmp back to the "return 51 * trampolines" here, since when we set %cr3 back to the user value, we need to 52 * be executing from code here in these shared pages and not the main kernel 53 * text again. Even though it should be fine to iret directly from kernel text 54 * when returning to kernel code, we make things jmp to a trampoline here just 55 * for consistency. 56 * 57 * Note that with IST, it's very important that we always must have pivoted 58 * away from the IST stack before we could possibly take any other interrupt 59 * on the same IST (unless it's an end-of-the-world fault and we don't care 60 * about coming back from it ever). 61 * 62 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they 63 * regularly have to happen from within trampoline code (e.g. in the sysenter 64 * single-step case) and then return to the world normally. As a result, these 65 * two are IST'd to their own kpti_frame right above the normal one (in the same 66 * page), so they don't clobber their parent interrupt. 67 * 68 * To aid with debugging, we also IST the page fault (#PF/pftrap), general 69 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to 70 * their own separate kpti_frame. This ensures that if we take one of these 71 * due to a bug in trampoline code, we preserve the original trampoline 72 * state that caused the trap. 73 * 74 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST 75 * stacks, since they can interrupt another ISR at any time. These stacks are 76 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in 77 * their trampolines (and do it unconditionally), and don't bother pivoting 78 * away. We're either going into the panic() path, or we're going to return 79 * straight away without rescheduling, so it's fine to not be on our real 80 * kthread stack (and some of the state we want to go find it with might be 81 * corrupt!) 82 * 83 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a 84 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to 85 * point at the PML4 for kas early in boot and never touch it again. Hopefully 86 * it survives whatever corruption brings down the rest of the kernel! 87 * 88 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64 89 * cases) in that they do not push an interrupt frame (and also have some other 90 * effects). In the syscall trampolines, we assume that we can only be taking 91 * the call from userland and use swapgs and an unconditional overwrite of %cr3. 92 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's 93 * existing %rsp pivot untouched) -- instead we spill registers into 94 * %gs:CPU_KPTI_* as we need to. 95 * 96 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see 97 * hat_switch(). 98 */ 99 100 /* 101 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you 102 * fix bugs here check to see if they should be fixed there as well. 103 */ 104 105 #include <sys/asm_linkage.h> 106 #include <sys/asm_misc.h> 107 #include <sys/regset.h> 108 #include <sys/privregs.h> 109 #include <sys/psw.h> 110 #include <sys/machbrand.h> 111 #include <sys/param.h> 112 113 #if defined(__lint) 114 115 #include <sys/types.h> 116 #include <sys/thread.h> 117 #include <sys/systm.h> 118 119 #else /* __lint */ 120 121 #include <sys/segments.h> 122 #include <sys/pcb.h> 123 #include <sys/trap.h> 124 #include <sys/ftrace.h> 125 #include <sys/traptrace.h> 126 #include <sys/clock.h> 127 #include <sys/model.h> 128 #include <sys/panic.h> 129 130 #if defined(__xpv) 131 #include <sys/hypervisor.h> 132 #endif 133 134 #include "assym.h" 135 136 .data 137 DGDEF3(kpti_enable, 8, 8) 138 .fill 1, 8, 1 139 140 #if DEBUG 141 .data 142 _bad_ts_panic_msg: 143 .string "kpti_trampolines.s: tr_iret_user but CR0.TS set" 144 #endif 145 146 .section ".text"; 147 .align MMU_PAGESIZE 148 149 .global kpti_tramp_start 150 kpti_tramp_start: 151 nop 152 153 /* This will be set by mlsetup, and then double-checked later */ 154 .global kpti_safe_cr3 155 kpti_safe_cr3: 156 .quad 0 157 SET_SIZE(kpti_safe_cr3) 158 159 /* startup_kmem() will overwrite this */ 160 .global kpti_kbase 161 kpti_kbase: 162 .quad KERNELBASE 163 SET_SIZE(kpti_kbase) 164 165 #define SET_KERNEL_CR3(spillreg) \ 166 mov %cr3, spillreg; \ 167 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 168 mov %gs:CPU_KPTI_KCR3, spillreg; \ 169 cmp $0, spillreg; \ 170 je 2f; \ 171 mov spillreg, %cr3; \ 172 2: 173 174 #if DEBUG 175 #define SET_USER_CR3(spillreg) \ 176 mov %cr3, spillreg; \ 177 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 178 mov %gs:CPU_KPTI_UCR3, spillreg; \ 179 mov spillreg, %cr3 180 #else 181 #define SET_USER_CR3(spillreg) \ 182 mov %gs:CPU_KPTI_UCR3, spillreg; \ 183 mov spillreg, %cr3 184 #endif 185 186 #define PIVOT_KPTI_STK(spillreg) \ 187 mov %rsp, spillreg; \ 188 mov %gs:CPU_KPTI_RET_RSP, %rsp; \ 189 pushq T_FRAMERET_SS(spillreg); \ 190 pushq T_FRAMERET_RSP(spillreg); \ 191 pushq T_FRAMERET_RFLAGS(spillreg); \ 192 pushq T_FRAMERET_CS(spillreg); \ 193 pushq T_FRAMERET_RIP(spillreg) 194 195 196 #define INTERRUPT_TRAMPOLINE_P(errpush) \ 197 pushq %r13; \ 198 pushq %r14; \ 199 subq $KPTI_R14, %rsp; \ 200 /* Save current %cr3. */ \ 201 mov %cr3, %r14; \ 202 mov %r14, KPTI_TR_CR3(%rsp); \ 203 \ 204 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 205 je 3f; \ 206 1: \ 207 /* Change to the "kernel" %cr3 */ \ 208 mov KPTI_KCR3(%rsp), %r14; \ 209 cmp $0, %r14; \ 210 je 2f; \ 211 mov %r14, %cr3; \ 212 2: \ 213 /* Get our cpu_t in %r13 */ \ 214 mov %rsp, %r13; \ 215 and $(~(MMU_PAGESIZE - 1)), %r13; \ 216 subq $CPU_KPTI_START, %r13; \ 217 /* Use top of the kthread stk */ \ 218 mov CPU_THREAD(%r13), %r14; \ 219 mov T_STACK(%r14), %r14; \ 220 addq $REGSIZE+MINFRAME, %r14; \ 221 jmp 4f; \ 222 3: \ 223 /* Check the %rsp in the frame. */ \ 224 /* Is it above kernel base? */ \ 225 mov kpti_kbase, %r14; \ 226 cmp %r14, KPTI_RSP(%rsp); \ 227 jb 1b; \ 228 /* Use the %rsp from the trap frame */ \ 229 mov KPTI_RSP(%rsp), %r14; \ 230 and $(~0xf), %r14; \ 231 4: \ 232 mov %rsp, %r13; \ 233 /* %r14 contains our destination stk */ \ 234 mov %r14, %rsp; \ 235 pushq KPTI_SS(%r13); \ 236 pushq KPTI_RSP(%r13); \ 237 pushq KPTI_RFLAGS(%r13); \ 238 pushq KPTI_CS(%r13); \ 239 pushq KPTI_RIP(%r13); \ 240 errpush; \ 241 mov KPTI_R14(%r13), %r14; \ 242 mov KPTI_R13(%r13), %r13 243 244 #define INTERRUPT_TRAMPOLINE_NOERR \ 245 INTERRUPT_TRAMPOLINE_P(/**/) 246 247 #define INTERRUPT_TRAMPOLINE \ 248 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 249 250 /* 251 * This is used for all interrupts that can plausibly be taken inside another 252 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS). 253 * 254 * We also use this for #NP, even though it uses the standard IST: the 255 * additional %rsp checks below will catch when we get an exception doing an 256 * iret to userspace with a bad %cs/%ss. This appears as a kernel trap, and 257 * only later gets redirected via kern_gpfault(). 258 * 259 * We check for whether we took the interrupt while in another trampoline, in 260 * which case we need to use the kthread stack. 261 */ 262 #define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \ 263 pushq %r13; \ 264 pushq %r14; \ 265 subq $KPTI_R14, %rsp; \ 266 /* Check for clobbering */ \ 267 cmp $0, KPTI_FLAG(%rsp); \ 268 je 1f; \ 269 /* Don't worry, this totally works */ \ 270 int $8; \ 271 1: \ 272 movq $1, KPTI_FLAG(%rsp); \ 273 /* Save current %cr3. */ \ 274 mov %cr3, %r14; \ 275 mov %r14, KPTI_TR_CR3(%rsp); \ 276 \ 277 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 278 je 4f; \ 279 2: \ 280 /* Change to the "kernel" %cr3 */ \ 281 mov KPTI_KCR3(%rsp), %r14; \ 282 cmp $0, %r14; \ 283 je 3f; \ 284 mov %r14, %cr3; \ 285 3: \ 286 /* Get our cpu_t in %r13 */ \ 287 mov %rsp, %r13; \ 288 and $(~(MMU_PAGESIZE - 1)), %r13; \ 289 subq $CPU_KPTI_START, %r13; \ 290 /* Use top of the kthread stk */ \ 291 mov CPU_THREAD(%r13), %r14; \ 292 mov T_STACK(%r14), %r14; \ 293 addq $REGSIZE+MINFRAME, %r14; \ 294 jmp 6f; \ 295 4: \ 296 /* Check the %rsp in the frame. */ \ 297 /* Is it above kernel base? */ \ 298 /* If not, treat as user. */ \ 299 mov kpti_kbase, %r14; \ 300 cmp %r14, KPTI_RSP(%rsp); \ 301 jb 2b; \ 302 /* Is it within the kpti_frame page? */ \ 303 /* If it is, treat as user interrupt */ \ 304 mov %rsp, %r13; \ 305 and $(~(MMU_PAGESIZE - 1)), %r13; \ 306 mov KPTI_RSP(%rsp), %r14; \ 307 and $(~(MMU_PAGESIZE - 1)), %r14; \ 308 cmp %r13, %r14; \ 309 je 2b; \ 310 /* Were we in trampoline code? */ \ 311 leaq kpti_tramp_start, %r14; \ 312 cmp %r14, KPTI_RIP(%rsp); \ 313 jb 5f; \ 314 leaq kpti_tramp_end, %r14; \ 315 cmp %r14, KPTI_RIP(%rsp); \ 316 ja 5f; \ 317 /* If we were, change %cr3: we might */ \ 318 /* have interrupted before it did. */ \ 319 mov KPTI_KCR3(%rsp), %r14; \ 320 mov %r14, %cr3; \ 321 5: \ 322 /* Use the %rsp from the trap frame */ \ 323 mov KPTI_RSP(%rsp), %r14; \ 324 and $(~0xf), %r14; \ 325 6: \ 326 mov %rsp, %r13; \ 327 /* %r14 contains our destination stk */ \ 328 mov %r14, %rsp; \ 329 pushq KPTI_SS(%r13); \ 330 pushq KPTI_RSP(%r13); \ 331 pushq KPTI_RFLAGS(%r13); \ 332 pushq KPTI_CS(%r13); \ 333 pushq KPTI_RIP(%r13); \ 334 errpush; \ 335 mov KPTI_R14(%r13), %r14; \ 336 movq $0, KPTI_FLAG(%r13); \ 337 mov KPTI_R13(%r13), %r13 338 339 #define DBG_INTERRUPT_TRAMPOLINE_NOERR \ 340 DBG_INTERRUPT_TRAMPOLINE_P(/**/) 341 342 #define DBG_INTERRUPT_TRAMPOLINE \ 343 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 344 345 /* 346 * These labels (_start and _end) are used by trap.c to determine if 347 * we took an interrupt like an NMI during the return process. 348 */ 349 .global tr_sysc_ret_start 350 tr_sysc_ret_start: 351 352 /* 353 * Syscall return trampolines. 354 * 355 * These are expected to be called on the kernel %gs. tr_sysret[ql] are 356 * called after %rsp is changed back to the user value, so we have no 357 * stack to work with. tr_sysexit has a kernel stack (but has to 358 * preserve rflags, soooo). 359 */ 360 ENTRY_NP(tr_sysretq) 361 cmpq $1, kpti_enable 362 jne 1f 363 364 mov %r13, %gs:CPU_KPTI_R13 365 SET_USER_CR3(%r13) 366 mov %gs:CPU_KPTI_R13, %r13 367 /* Zero these to make sure they didn't leak from a kernel trap */ 368 movq $0, %gs:CPU_KPTI_R13 369 movq $0, %gs:CPU_KPTI_R14 370 1: 371 swapgs 372 sysretq 373 SET_SIZE(tr_sysretq) 374 375 ENTRY_NP(tr_sysretl) 376 cmpq $1, kpti_enable 377 jne 1f 378 379 mov %r13, %gs:CPU_KPTI_R13 380 SET_USER_CR3(%r13) 381 mov %gs:CPU_KPTI_R13, %r13 382 /* Zero these to make sure they didn't leak from a kernel trap */ 383 movq $0, %gs:CPU_KPTI_R13 384 movq $0, %gs:CPU_KPTI_R14 385 1: 386 SWAPGS 387 SYSRETL 388 SET_SIZE(tr_sysretl) 389 390 ENTRY_NP(tr_sysexit) 391 /* 392 * Note: we want to preserve RFLAGS across this branch, since sysexit 393 * (unlike sysret above) does not restore RFLAGS for us. 394 * 395 * We still have the real kernel stack (sysexit does restore that), so 396 * we can use pushfq/popfq. 397 */ 398 pushfq 399 400 cmpq $1, kpti_enable 401 jne 1f 402 403 /* Have to pop it back off now before we change %cr3! */ 404 popfq 405 mov %r13, %gs:CPU_KPTI_R13 406 SET_USER_CR3(%r13) 407 mov %gs:CPU_KPTI_R13, %r13 408 /* Zero these to make sure they didn't leak from a kernel trap */ 409 movq $0, %gs:CPU_KPTI_R13 410 movq $0, %gs:CPU_KPTI_R14 411 jmp 2f 412 1: 413 popfq 414 2: 415 swapgs 416 sti 417 sysexit 418 SET_SIZE(tr_sysexit) 419 420 .global tr_sysc_ret_end 421 tr_sysc_ret_end: 422 423 /* 424 * Syscall entry trampolines. 425 */ 426 427 #if DEBUG 428 #define MK_SYSCALL_TRAMPOLINE(isr) \ 429 ENTRY_NP(tr_/**/isr); \ 430 swapgs; \ 431 mov %r13, %gs:CPU_KPTI_R13; \ 432 mov %cr3, %r13; \ 433 mov %r13, %gs:CPU_KPTI_TR_CR3; \ 434 mov %gs:CPU_KPTI_KCR3, %r13; \ 435 mov %r13, %cr3; \ 436 mov %gs:CPU_KPTI_R13, %r13; \ 437 swapgs; \ 438 jmp isr; \ 439 SET_SIZE(tr_/**/isr) 440 #else 441 #define MK_SYSCALL_TRAMPOLINE(isr) \ 442 ENTRY_NP(tr_/**/isr); \ 443 swapgs; \ 444 mov %r13, %gs:CPU_KPTI_R13; \ 445 mov %gs:CPU_KPTI_KCR3, %r13; \ 446 mov %r13, %cr3; \ 447 mov %gs:CPU_KPTI_R13, %r13; \ 448 swapgs; \ 449 jmp isr; \ 450 SET_SIZE(tr_/**/isr) 451 #endif 452 453 MK_SYSCALL_TRAMPOLINE(sys_syscall) 454 MK_SYSCALL_TRAMPOLINE(sys_syscall32) 455 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall) 456 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32) 457 458 /* 459 * SYSENTER is special. The CPU is really not very helpful when it 460 * comes to preserving and restoring state with it, and as a result 461 * we have to do all of it by hand. So, since we want to preserve 462 * RFLAGS, we have to be very careful in these trampolines to not 463 * clobber any bits in it. That means no cmpqs or branches! 464 */ 465 ENTRY_NP(tr_sys_sysenter) 466 swapgs 467 mov %r13, %gs:CPU_KPTI_R13 468 #if DEBUG 469 mov %cr3, %r13 470 mov %r13, %gs:CPU_KPTI_TR_CR3 471 #endif 472 mov %gs:CPU_KPTI_KCR3, %r13 473 mov %r13, %cr3 474 mov %gs:CPU_KPTI_R13, %r13 475 jmp _sys_sysenter_post_swapgs 476 SET_SIZE(tr_sys_sysenter) 477 478 ENTRY_NP(tr_brand_sys_sysenter) 479 swapgs 480 mov %r13, %gs:CPU_KPTI_R13 481 #if DEBUG 482 mov %cr3, %r13 483 mov %r13, %gs:CPU_KPTI_TR_CR3 484 #endif 485 mov %gs:CPU_KPTI_KCR3, %r13 486 mov %r13, %cr3 487 mov %gs:CPU_KPTI_R13, %r13 488 jmp _brand_sys_sysenter_post_swapgs 489 SET_SIZE(tr_brand_sys_sysenter) 490 491 #define MK_SYSCALL_INT_TRAMPOLINE(isr) \ 492 ENTRY_NP(tr_/**/isr); \ 493 swapgs; \ 494 mov %r13, %gs:CPU_KPTI_R13; \ 495 SET_KERNEL_CR3(%r13); \ 496 mov %gs:CPU_THREAD, %r13; \ 497 mov T_STACK(%r13), %r13; \ 498 addq $REGSIZE+MINFRAME, %r13; \ 499 mov %r13, %rsp; \ 500 pushq %gs:CPU_KPTI_SS; \ 501 pushq %gs:CPU_KPTI_RSP; \ 502 pushq %gs:CPU_KPTI_RFLAGS; \ 503 pushq %gs:CPU_KPTI_CS; \ 504 pushq %gs:CPU_KPTI_RIP; \ 505 mov %gs:CPU_KPTI_R13, %r13; \ 506 swapgs; \ 507 jmp isr; \ 508 SET_SIZE(tr_/**/isr) 509 510 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int) 511 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int) 512 513 /* 514 * Interrupt/trap return trampolines 515 */ 516 517 .global tr_intr_ret_start 518 tr_intr_ret_start: 519 520 ENTRY_NP(tr_iret_auto) 521 cmpq $1, kpti_enable 522 jne tr_iret_kernel 523 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp) 524 je tr_iret_kernel 525 jmp tr_iret_user 526 SET_SIZE(tr_iret_auto) 527 528 ENTRY_NP(tr_iret_kernel) 529 /* 530 * Yes, this does nothing extra. But this way we know if we see iret 531 * elsewhere, then we've failed to properly consider trampolines there. 532 */ 533 iretq 534 SET_SIZE(tr_iret_kernel) 535 536 ENTRY_NP(tr_iret_user) 537 #if DEBUG 538 /* 539 * Panic if we find CR0.TS set. We're still on the kernel stack and 540 * %cr3, but we do need to swap back to the kernel gs. (We don't worry 541 * about swapgs speculation here.) 542 */ 543 pushq %rax 544 mov %cr0, %rax 545 testq $CR0_TS, %rax 546 jz 1f 547 swapgs 548 popq %rax 549 leaq _bad_ts_panic_msg(%rip), %rdi 550 xorl %eax, %eax 551 pushq %rbp 552 movq %rsp, %rbp 553 call panic 554 1: 555 popq %rax 556 #endif 557 558 cmpq $1, kpti_enable 559 jne 1f 560 561 /* 562 * KPTI enabled: we're on the user gsbase at this point, so we 563 * need to swap back so we can pivot stacks. 564 * 565 * The swapgs lfence mitigation is probably not needed here 566 * since a mis-speculation of the above branch would imply KPTI 567 * is disabled, but we'll do so anyway. 568 */ 569 swapgs 570 lfence 571 mov %r13, %gs:CPU_KPTI_R13 572 PIVOT_KPTI_STK(%r13) 573 SET_USER_CR3(%r13) 574 mov %gs:CPU_KPTI_R13, %r13 575 /* Zero these to make sure they didn't leak from a kernel trap. */ 576 movq $0, %gs:CPU_KPTI_R13 577 movq $0, %gs:CPU_KPTI_R14 578 /* And back to user gsbase again. */ 579 swapgs 580 1: 581 iretq 582 SET_SIZE(tr_iret_user) 583 584 /* 585 * This special return trampoline is for KDI's use only (with kmdb). 586 * 587 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR 588 * instead. This trampoline runs after GSBASE has already been changed 589 * back to the userland value (so we can't use %gs). 590 * 591 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13. 592 * The KPTI_R13 member in the kpti_dbg has already been set to what the 593 * real %r13 should be before we IRET. 594 * 595 * Additionally, KDI keeps a copy of the incoming %cr3 value when it 596 * took an interrupt, and has put that back in the kpti_dbg area for us 597 * to use, so we don't do any sniffing of %cs here. This is important 598 * so that debugging code that changes %cr3 is possible. 599 */ 600 ENTRY_NP(tr_iret_kdi) 601 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */ 602 603 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */ 604 leaq KPTI_TOP(%r13), %rsp 605 pushq T_FRAMERET_SS(%r14) 606 pushq T_FRAMERET_RSP(%r14) 607 pushq T_FRAMERET_RFLAGS(%r14) 608 pushq T_FRAMERET_CS(%r14) 609 pushq T_FRAMERET_RIP(%r14) 610 611 movq KPTI_TR_CR3(%r13), %r14 612 movq %r14, %cr3 613 614 movq KPTI_R14(%r13), %r14 615 movq KPTI_R13(%r13), %r13 /* preserved by our caller */ 616 617 iretq 618 SET_SIZE(tr_iret_kdi) 619 620 .global tr_intr_ret_end 621 tr_intr_ret_end: 622 623 /* 624 * Interrupt/trap entry trampolines 625 */ 626 627 /* CPU pushed an error code, and ISR wants one */ 628 #define MK_INTR_TRAMPOLINE(isr) \ 629 ENTRY_NP(tr_/**/isr); \ 630 INTERRUPT_TRAMPOLINE; \ 631 jmp isr; \ 632 SET_SIZE(tr_/**/isr) 633 634 /* CPU didn't push an error code, and ISR doesn't want one */ 635 #define MK_INTR_TRAMPOLINE_NOERR(isr) \ 636 ENTRY_NP(tr_/**/isr); \ 637 push $0; \ 638 INTERRUPT_TRAMPOLINE_NOERR; \ 639 jmp isr; \ 640 SET_SIZE(tr_/**/isr) 641 642 /* CPU pushed an error code, and ISR wants one */ 643 #define MK_DBG_INTR_TRAMPOLINE(isr) \ 644 ENTRY_NP(tr_/**/isr); \ 645 DBG_INTERRUPT_TRAMPOLINE; \ 646 jmp isr; \ 647 SET_SIZE(tr_/**/isr) 648 649 /* CPU didn't push an error code, and ISR doesn't want one */ 650 #define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \ 651 ENTRY_NP(tr_/**/isr); \ 652 push $0; \ 653 DBG_INTERRUPT_TRAMPOLINE_NOERR; \ 654 jmp isr; \ 655 SET_SIZE(tr_/**/isr) 656 657 658 MK_INTR_TRAMPOLINE_NOERR(div0trap) 659 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap) 660 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap) 661 MK_INTR_TRAMPOLINE_NOERR(ovflotrap) 662 MK_INTR_TRAMPOLINE_NOERR(boundstrap) 663 MK_INTR_TRAMPOLINE_NOERR(invoptrap) 664 MK_INTR_TRAMPOLINE_NOERR(ndptrap) 665 MK_INTR_TRAMPOLINE(invtsstrap) 666 MK_DBG_INTR_TRAMPOLINE(segnptrap) 667 MK_DBG_INTR_TRAMPOLINE(stktrap) 668 MK_DBG_INTR_TRAMPOLINE(gptrap) 669 MK_DBG_INTR_TRAMPOLINE(pftrap) 670 MK_INTR_TRAMPOLINE_NOERR(resvtrap) 671 MK_INTR_TRAMPOLINE_NOERR(ndperr) 672 MK_INTR_TRAMPOLINE(achktrap) 673 MK_INTR_TRAMPOLINE_NOERR(xmtrap) 674 MK_INTR_TRAMPOLINE_NOERR(invaltrap) 675 MK_INTR_TRAMPOLINE_NOERR(fasttrap) 676 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret) 677 678 /* 679 * These are special because they can interrupt other traps, and 680 * each other. We don't need to pivot their stacks, because they have 681 * dedicated IST stack space, but we need to change %cr3. 682 */ 683 ENTRY_NP(tr_nmiint) 684 pushq %r13 685 mov kpti_safe_cr3, %r13 686 mov %r13, %cr3 687 popq %r13 688 jmp nmiint 689 SET_SIZE(tr_nmiint) 690 691 #if !defined(__xpv) 692 ENTRY_NP(tr_syserrtrap) 693 /* 694 * If we got here we should always have a zero error code pushed. 695 * The INT $0x8 instr doesn't seem to push one, though, which we use 696 * as an emergency panic in the other trampolines. So adjust things 697 * here. 698 */ 699 cmpq $0, (%rsp) 700 je 1f 701 pushq $0 702 1: 703 pushq %r13 704 mov kpti_safe_cr3, %r13 705 mov %r13, %cr3 706 popq %r13 707 jmp syserrtrap 708 SET_SIZE(tr_syserrtrap) 709 #endif 710 711 ENTRY_NP(tr_mcetrap) 712 pushq %r13 713 mov kpti_safe_cr3, %r13 714 mov %r13, %cr3 715 popq %r13 716 jmp mcetrap 717 SET_SIZE(tr_mcetrap) 718 719 /* 720 * Interrupts start at 32 721 */ 722 #define MKIVCT(n) \ 723 ENTRY_NP(tr_ivct/**/n) \ 724 push $0; \ 725 INTERRUPT_TRAMPOLINE; \ 726 push $n - 0x20; \ 727 jmp cmnint; \ 728 SET_SIZE(tr_ivct/**/n) 729 730 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35); 731 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39); 732 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43); 733 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47); 734 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51); 735 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55); 736 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59); 737 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63); 738 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67); 739 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71); 740 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75); 741 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79); 742 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83); 743 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87); 744 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91); 745 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95); 746 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99); 747 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103); 748 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107); 749 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111); 750 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115); 751 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119); 752 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123); 753 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127); 754 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131); 755 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135); 756 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139); 757 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143); 758 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147); 759 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151); 760 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155); 761 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159); 762 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163); 763 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167); 764 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171); 765 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175); 766 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179); 767 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183); 768 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187); 769 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191); 770 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195); 771 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199); 772 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203); 773 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207); 774 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211); 775 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215); 776 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219); 777 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223); 778 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227); 779 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231); 780 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235); 781 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239); 782 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243); 783 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247); 784 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251); 785 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255); 786 787 /* 788 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a 789 * PCID other than the current one, then, is to load its cr3 then 790 * invlpg. But loading kf_user_cr3 means we can longer access our 791 * caller's text mapping (or indeed, its stack). So this little helper 792 * has to live within our trampoline text region. 793 * 794 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3) 795 */ 796 ENTRY_NP(tr_mmu_flush_user_range) 797 push %rbx 798 /* When we read cr3, it never has the NOINVL bit set. */ 799 mov %cr3, %rax 800 movq $CR3_NOINVL_BIT, %rbx 801 orq %rbx, %rax 802 803 mov %rcx, %cr3 804 add %rdi, %rsi 805 .align ASM_ENTRY_ALIGN 806 1: 807 invlpg (%rdi) 808 add %rdx, %rdi 809 cmp %rsi, %rdi 810 jb 1b 811 mov %rax, %cr3 812 pop %rbx 813 retq 814 SET_SIZE(tr_mmu_flush_user_range) 815 816 .align MMU_PAGESIZE 817 .global kpti_tramp_end 818 kpti_tramp_end: 819 nop 820 821 #endif /* __lint */