1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* 12 * Copyright 2018 Joyent, Inc. 13 */ 14 15 /* 16 * This file contains the trampolines that are used by KPTI in order to be 17 * able to take interrupts/trap/etc while on the "user" page table. 18 * 19 * We don't map the full kernel text into the user page table: instead we 20 * map this one small section of trampolines (which compiles to ~13 pages). 21 * These trampolines are set in the IDT always (so they will run no matter 22 * whether we're on the kernel or user page table), and their primary job is to 23 * pivot us to the kernel %cr3 and %rsp without ruining everything. 24 * 25 * All of these interrupts use the amd64 IST feature when we have KPTI enabled, 26 * meaning that they will execute with their %rsp set to a known location, even 27 * if we take them in the kernel. 28 * 29 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST 30 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti 31 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be 32 * page-aligned, and we map the page it's on into both page tables. Using a 33 * struct attached to the cpu_t also means that we can use %rsp-relative 34 * addressing to find anything on the cpu_t, so we don't have to touch %gs or 35 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy). 36 * 37 * This little struct is where the CPU will push the actual interrupt frame. 38 * Then, in the trampoline, we change %cr3, then figure out our destination 39 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt 40 * frame). Then we jump to the regular ISR in the kernel text and carry on as 41 * normal. 42 * 43 * We leave the original frame and any spilled regs behind in the kpti_frame 44 * lazily until we want to return to userland. Then, we clear any spilled 45 * regs from it, and overwrite the rest with our iret frame. When switching 46 * this cpu to a different process (in hat_switch), we bzero the whole region to 47 * make sure nothing can leak between processes. 48 * 49 * When we're returning back to the original place we took the interrupt later 50 * (especially if it was in userland), we have to jmp back to the "return 51 * trampolines" here, since when we set %cr3 back to the user value, we need to 52 * be executing from code here in these shared pages and not the main kernel 53 * text again. Even though it should be fine to iret directly from kernel text 54 * when returning to kernel code, we make things jmp to a trampoline here just 55 * for consistency. 56 * 57 * Note that with IST, it's very important that we always must have pivoted 58 * away from the IST stack before we could possibly take any other interrupt 59 * on the same IST (unless it's an end-of-the-world fault and we don't care 60 * about coming back from it ever). 61 * 62 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they 63 * regularly have to happen from within trampoline code (e.g. in the sysenter 64 * single-step case) and then return to the world normally. As a result, these 65 * two are IST'd to their own kpti_frame right above the normal one (in the same 66 * page), so they don't clobber their parent interrupt. 67 * 68 * To aid with debugging, we also IST the page fault (#PF/pftrap), general 69 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to 70 * their own separate kpti_frame. This ensures that if we take one of these 71 * due to a bug in trampoline code, we preserve the original trampoline 72 * state that caused the trap. 73 * 74 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST 75 * stacks, since they can interrupt another ISR at any time. These stacks are 76 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in 77 * their trampolines (and do it unconditionally), and don't bother pivoting 78 * away. We're either going into the panic() path, or we're going to return 79 * straight away without rescheduling, so it's fine to not be on our real 80 * kthread stack (and some of the state we want to go find it with might be 81 * corrupt!) 82 * 83 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a 84 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to 85 * point at the PML4 for kas early in boot and never touch it again. Hopefully 86 * it survives whatever corruption brings down the rest of the kernel! 87 * 88 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64 89 * cases) in that they do not push an interrupt frame (and also have some other 90 * effects). In the syscall trampolines, we assume that we can only be taking 91 * the call from userland and use SWAPGS and an unconditional overwrite of %cr3. 92 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's 93 * existing %rsp pivot untouched) -- instead we spill registers into 94 * %gs:CPU_KPTI_* as we need to. 95 * 96 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see 97 * hat_switch(). 98 */ 99 100 /* 101 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you 102 * fix bugs here check to see if they should be fixed there as well. 103 */ 104 105 #include <sys/asm_linkage.h> 106 #include <sys/asm_misc.h> 107 #include <sys/regset.h> 108 #include <sys/privregs.h> 109 #include <sys/psw.h> 110 #include <sys/machbrand.h> 111 #include <sys/param.h> 112 113 #if defined(__lint) 114 115 #include <sys/types.h> 116 #include <sys/thread.h> 117 #include <sys/systm.h> 118 119 #else /* __lint */ 120 121 #include <sys/segments.h> 122 #include <sys/pcb.h> 123 #include <sys/trap.h> 124 #include <sys/ftrace.h> 125 #include <sys/traptrace.h> 126 #include <sys/clock.h> 127 #include <sys/model.h> 128 #include <sys/panic.h> 129 130 #if defined(__xpv) 131 #include <sys/hypervisor.h> 132 #endif 133 134 #include "assym.h" 135 136 .data 137 DGDEF3(kpti_enable, 8, 8) 138 .fill 1, 8, 1 139 140 #if DEBUG 141 .data 142 _bad_ts_panic_msg: 143 .string "kpti_trampolines.s: tr_iret_user but CR0.TS set" 144 #endif 145 146 .section ".text"; 147 .align MMU_PAGESIZE 148 149 .global kpti_tramp_start 150 kpti_tramp_start: 151 nop 152 153 /* This will be set by mlsetup, and then double-checked later */ 154 .global kpti_safe_cr3 155 kpti_safe_cr3: 156 .quad 0 157 SET_SIZE(kpti_safe_cr3) 158 159 /* startup_kmem() will overwrite this */ 160 .global kpti_kbase 161 kpti_kbase: 162 .quad KERNELBASE 163 SET_SIZE(kpti_kbase) 164 165 #define SET_KERNEL_CR3(spillreg) \ 166 mov %cr3, spillreg; \ 167 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 168 mov %gs:CPU_KPTI_KCR3, spillreg; \ 169 cmp $0, spillreg; \ 170 je 2f; \ 171 mov spillreg, %cr3; \ 172 2: 173 174 #if DEBUG 175 #define SET_USER_CR3(spillreg) \ 176 mov %cr3, spillreg; \ 177 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 178 mov %gs:CPU_KPTI_UCR3, spillreg; \ 179 mov spillreg, %cr3 180 #else 181 #define SET_USER_CR3(spillreg) \ 182 mov %gs:CPU_KPTI_UCR3, spillreg; \ 183 mov spillreg, %cr3 184 #endif 185 186 #define PIVOT_KPTI_STK(spillreg) \ 187 mov %rsp, spillreg; \ 188 mov %gs:CPU_KPTI_RET_RSP, %rsp; \ 189 pushq T_FRAMERET_SS(spillreg); \ 190 pushq T_FRAMERET_RSP(spillreg); \ 191 pushq T_FRAMERET_RFLAGS(spillreg); \ 192 pushq T_FRAMERET_CS(spillreg); \ 193 pushq T_FRAMERET_RIP(spillreg) 194 195 196 #define INTERRUPT_TRAMPOLINE_P(errpush) \ 197 pushq %r13; \ 198 pushq %r14; \ 199 subq $KPTI_R14, %rsp; \ 200 /* Save current %cr3. */ \ 201 mov %cr3, %r14; \ 202 mov %r14, KPTI_TR_CR3(%rsp); \ 203 \ 204 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 205 je 3f; \ 206 1: \ 207 /* Change to the "kernel" %cr3 */ \ 208 mov KPTI_KCR3(%rsp), %r14; \ 209 cmp $0, %r14; \ 210 je 2f; \ 211 mov %r14, %cr3; \ 212 2: \ 213 /* Get our cpu_t in %r13 */ \ 214 mov %rsp, %r13; \ 215 and $(~(MMU_PAGESIZE - 1)), %r13; \ 216 subq $CPU_KPTI_START, %r13; \ 217 /* Use top of the kthread stk */ \ 218 mov CPU_THREAD(%r13), %r14; \ 219 mov T_STACK(%r14), %r14; \ 220 addq $REGSIZE+MINFRAME, %r14; \ 221 jmp 4f; \ 222 3: \ 223 /* Check the %rsp in the frame. */ \ 224 /* Is it above kernel base? */ \ 225 mov kpti_kbase, %r14; \ 226 cmp %r14, KPTI_RSP(%rsp); \ 227 jb 1b; \ 228 /* Use the %rsp from the trap frame */ \ 229 mov KPTI_RSP(%rsp), %r14; \ 230 and $(~0xf), %r14; \ 231 4: \ 232 mov %rsp, %r13; \ 233 /* %r14 contains our destination stk */ \ 234 mov %r14, %rsp; \ 235 pushq KPTI_SS(%r13); \ 236 pushq KPTI_RSP(%r13); \ 237 pushq KPTI_RFLAGS(%r13); \ 238 pushq KPTI_CS(%r13); \ 239 pushq KPTI_RIP(%r13); \ 240 errpush; \ 241 mov KPTI_R14(%r13), %r14; \ 242 mov KPTI_R13(%r13), %r13 243 244 #define INTERRUPT_TRAMPOLINE_NOERR \ 245 INTERRUPT_TRAMPOLINE_P(/**/) 246 247 #define INTERRUPT_TRAMPOLINE \ 248 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 249 250 /* 251 * This is used for all interrupts that can plausibly be taken inside another 252 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS). 253 * 254 * We check for whether we took the interrupt while in another trampoline, in 255 * which case we need to use the kthread stack. 256 */ 257 #define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \ 258 pushq %r13; \ 259 pushq %r14; \ 260 subq $KPTI_R14, %rsp; \ 261 /* Check for clobbering */ \ 262 cmp $0, KPTI_FLAG(%rsp); \ 263 je 1f; \ 264 /* Don't worry, this totally works */ \ 265 int $8; \ 266 1: \ 267 movq $1, KPTI_FLAG(%rsp); \ 268 /* Save current %cr3. */ \ 269 mov %cr3, %r14; \ 270 mov %r14, KPTI_TR_CR3(%rsp); \ 271 \ 272 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 273 je 4f; \ 274 2: \ 275 /* Change to the "kernel" %cr3 */ \ 276 mov KPTI_KCR3(%rsp), %r14; \ 277 cmp $0, %r14; \ 278 je 3f; \ 279 mov %r14, %cr3; \ 280 3: \ 281 /* Get our cpu_t in %r13 */ \ 282 mov %rsp, %r13; \ 283 and $(~(MMU_PAGESIZE - 1)), %r13; \ 284 subq $CPU_KPTI_START, %r13; \ 285 /* Use top of the kthread stk */ \ 286 mov CPU_THREAD(%r13), %r14; \ 287 mov T_STACK(%r14), %r14; \ 288 addq $REGSIZE+MINFRAME, %r14; \ 289 jmp 6f; \ 290 4: \ 291 /* Check the %rsp in the frame. */ \ 292 /* Is it above kernel base? */ \ 293 /* If not, treat as user. */ \ 294 mov kpti_kbase, %r14; \ 295 cmp %r14, KPTI_RSP(%rsp); \ 296 jb 2b; \ 297 /* Is it within the kpti_frame page? */ \ 298 /* If it is, treat as user interrupt */ \ 299 mov %rsp, %r13; \ 300 and $(~(MMU_PAGESIZE - 1)), %r13; \ 301 mov KPTI_RSP(%rsp), %r14; \ 302 and $(~(MMU_PAGESIZE - 1)), %r14; \ 303 cmp %r13, %r14; \ 304 je 2b; \ 305 /* Were we in trampoline code? */ \ 306 leaq kpti_tramp_start, %r14; \ 307 cmp %r14, KPTI_RIP(%rsp); \ 308 jb 5f; \ 309 leaq kpti_tramp_end, %r14; \ 310 cmp %r14, KPTI_RIP(%rsp); \ 311 ja 5f; \ 312 /* If we were, change %cr3: we might */ \ 313 /* have interrupted before it did. */ \ 314 mov KPTI_KCR3(%rsp), %r14; \ 315 mov %r14, %cr3; \ 316 5: \ 317 /* Use the %rsp from the trap frame */ \ 318 mov KPTI_RSP(%rsp), %r14; \ 319 and $(~0xf), %r14; \ 320 6: \ 321 mov %rsp, %r13; \ 322 /* %r14 contains our destination stk */ \ 323 mov %r14, %rsp; \ 324 pushq KPTI_SS(%r13); \ 325 pushq KPTI_RSP(%r13); \ 326 pushq KPTI_RFLAGS(%r13); \ 327 pushq KPTI_CS(%r13); \ 328 pushq KPTI_RIP(%r13); \ 329 errpush; \ 330 mov KPTI_R14(%r13), %r14; \ 331 movq $0, KPTI_FLAG(%r13); \ 332 mov KPTI_R13(%r13), %r13 333 334 #define DBG_INTERRUPT_TRAMPOLINE_NOERR \ 335 DBG_INTERRUPT_TRAMPOLINE_P(/**/) 336 337 #define DBG_INTERRUPT_TRAMPOLINE \ 338 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 339 340 /* 341 * These labels (_start and _end) are used by trap.c to determine if 342 * we took an interrupt like an NMI during the return process. 343 */ 344 .global tr_sysc_ret_start 345 tr_sysc_ret_start: 346 347 /* 348 * Syscall return trampolines. 349 * 350 * These are expected to be called on the kernel %gs. tr_sysret[ql] are 351 * called after %rsp is changed back to the user value, so we have no 352 * stack to work with. tr_sysexit has a kernel stack (but has to 353 * preserve rflags, soooo). 354 */ 355 ENTRY_NP(tr_sysretq) 356 cmpq $1, kpti_enable 357 jne 1f 358 359 mov %r13, %gs:CPU_KPTI_R13 360 SET_USER_CR3(%r13) 361 mov %gs:CPU_KPTI_R13, %r13 362 /* Zero these to make sure they didn't leak from a kernel trap */ 363 movq $0, %gs:CPU_KPTI_R13 364 movq $0, %gs:CPU_KPTI_R14 365 1: 366 swapgs 367 sysretq 368 SET_SIZE(tr_sysretq) 369 370 ENTRY_NP(tr_sysretl) 371 cmpq $1, kpti_enable 372 jne 1f 373 374 mov %r13, %gs:CPU_KPTI_R13 375 SET_USER_CR3(%r13) 376 mov %gs:CPU_KPTI_R13, %r13 377 /* Zero these to make sure they didn't leak from a kernel trap */ 378 movq $0, %gs:CPU_KPTI_R13 379 movq $0, %gs:CPU_KPTI_R14 380 1: 381 SWAPGS 382 SYSRETL 383 SET_SIZE(tr_sysretl) 384 385 ENTRY_NP(tr_sysexit) 386 /* 387 * Note: we want to preserve RFLAGS across this branch, since sysexit 388 * (unlike sysret above) does not restore RFLAGS for us. 389 * 390 * We still have the real kernel stack (sysexit does restore that), so 391 * we can use pushfq/popfq. 392 */ 393 pushfq 394 395 cmpq $1, kpti_enable 396 jne 1f 397 398 /* Have to pop it back off now before we change %cr3! */ 399 popfq 400 mov %r13, %gs:CPU_KPTI_R13 401 SET_USER_CR3(%r13) 402 mov %gs:CPU_KPTI_R13, %r13 403 /* Zero these to make sure they didn't leak from a kernel trap */ 404 movq $0, %gs:CPU_KPTI_R13 405 movq $0, %gs:CPU_KPTI_R14 406 jmp 2f 407 1: 408 popfq 409 2: 410 swapgs 411 sti 412 sysexit 413 SET_SIZE(tr_sysexit) 414 415 .global tr_sysc_ret_end 416 tr_sysc_ret_end: 417 418 /* 419 * Syscall entry trampolines. 420 */ 421 422 #if DEBUG 423 #define MK_SYSCALL_TRAMPOLINE(isr) \ 424 ENTRY_NP(tr_/**/isr); \ 425 swapgs; \ 426 mov %r13, %gs:CPU_KPTI_R13; \ 427 mov %cr3, %r13; \ 428 mov %r13, %gs:CPU_KPTI_TR_CR3; \ 429 mov %gs:CPU_KPTI_KCR3, %r13; \ 430 mov %r13, %cr3; \ 431 mov %gs:CPU_KPTI_R13, %r13; \ 432 swapgs; \ 433 jmp isr; \ 434 SET_SIZE(tr_/**/isr) 435 #else 436 #define MK_SYSCALL_TRAMPOLINE(isr) \ 437 ENTRY_NP(tr_/**/isr); \ 438 swapgs; \ 439 mov %r13, %gs:CPU_KPTI_R13; \ 440 mov %gs:CPU_KPTI_KCR3, %r13; \ 441 mov %r13, %cr3; \ 442 mov %gs:CPU_KPTI_R13, %r13; \ 443 swapgs; \ 444 jmp isr; \ 445 SET_SIZE(tr_/**/isr) 446 #endif 447 448 MK_SYSCALL_TRAMPOLINE(sys_syscall) 449 MK_SYSCALL_TRAMPOLINE(sys_syscall32) 450 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall) 451 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32) 452 453 /* 454 * SYSENTER is special. The CPU is really not very helpful when it 455 * comes to preserving and restoring state with it, and as a result 456 * we have to do all of it by hand. So, since we want to preserve 457 * RFLAGS, we have to be very careful in these trampolines to not 458 * clobber any bits in it. That means no cmpqs or branches! 459 */ 460 ENTRY_NP(tr_sys_sysenter) 461 swapgs 462 mov %r13, %gs:CPU_KPTI_R13 463 #if DEBUG 464 mov %cr3, %r13 465 mov %r13, %gs:CPU_KPTI_TR_CR3 466 #endif 467 mov %gs:CPU_KPTI_KCR3, %r13 468 mov %r13, %cr3 469 mov %gs:CPU_KPTI_R13, %r13 470 jmp _sys_sysenter_post_swapgs 471 SET_SIZE(tr_sys_sysenter) 472 473 ENTRY_NP(tr_brand_sys_sysenter) 474 swapgs 475 mov %r13, %gs:CPU_KPTI_R13 476 #if DEBUG 477 mov %cr3, %r13 478 mov %r13, %gs:CPU_KPTI_TR_CR3 479 #endif 480 mov %gs:CPU_KPTI_KCR3, %r13 481 mov %r13, %cr3 482 mov %gs:CPU_KPTI_R13, %r13 483 jmp _brand_sys_sysenter_post_swapgs 484 SET_SIZE(tr_brand_sys_sysenter) 485 486 #define MK_SYSCALL_INT_TRAMPOLINE(isr) \ 487 ENTRY_NP(tr_/**/isr); \ 488 swapgs; \ 489 mov %r13, %gs:CPU_KPTI_R13; \ 490 SET_KERNEL_CR3(%r13); \ 491 mov %gs:CPU_THREAD, %r13; \ 492 mov T_STACK(%r13), %r13; \ 493 addq $REGSIZE+MINFRAME, %r13; \ 494 mov %r13, %rsp; \ 495 pushq %gs:CPU_KPTI_SS; \ 496 pushq %gs:CPU_KPTI_RSP; \ 497 pushq %gs:CPU_KPTI_RFLAGS; \ 498 pushq %gs:CPU_KPTI_CS; \ 499 pushq %gs:CPU_KPTI_RIP; \ 500 mov %gs:CPU_KPTI_R13, %r13; \ 501 SWAPGS; \ 502 jmp isr; \ 503 SET_SIZE(tr_/**/isr) 504 505 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int) 506 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int) 507 508 /* 509 * Interrupt/trap return trampolines 510 */ 511 512 .global tr_intr_ret_start 513 tr_intr_ret_start: 514 515 ENTRY_NP(tr_iret_auto) 516 cmpq $1, kpti_enable 517 jne tr_iret_kernel 518 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp) 519 je tr_iret_kernel 520 jmp tr_iret_user 521 SET_SIZE(tr_iret_auto) 522 523 ENTRY_NP(tr_iret_kernel) 524 /* 525 * Yes, this does nothing extra. But this way we know if we see iret 526 * elsewhere, then we've failed to properly consider trampolines there. 527 */ 528 iretq 529 SET_SIZE(tr_iret_kernel) 530 531 ENTRY_NP(tr_iret_user) 532 #if DEBUG 533 /* 534 * Ensure that we return to user land with CR0.TS clear. We do this 535 * before we trampoline back and pivot the stack and %cr3. This way 536 * we're still on the kernel stack and kernel %cr3, though we are on the 537 * user GSBASE. 538 */ 539 pushq %rax 540 mov %cr0, %rax 541 testq $CR0_TS, %rax 542 jz 1f 543 swapgs 544 popq %rax 545 leaq _bad_ts_panic_msg(%rip), %rdi 546 xorl %eax, %eax 547 pushq %rbp 548 movq %rsp, %rbp 549 call panic 550 1: 551 popq %rax 552 #endif 553 554 cmpq $1, kpti_enable 555 jne 1f 556 557 swapgs 558 mov %r13, %gs:CPU_KPTI_R13 559 PIVOT_KPTI_STK(%r13) 560 SET_USER_CR3(%r13) 561 mov %gs:CPU_KPTI_R13, %r13 562 /* Zero these to make sure they didn't leak from a kernel trap */ 563 movq $0, %gs:CPU_KPTI_R13 564 movq $0, %gs:CPU_KPTI_R14 565 swapgs 566 1: 567 iretq 568 SET_SIZE(tr_iret_user) 569 570 /* 571 * This special return trampoline is for KDI's use only (with kmdb). 572 * 573 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR 574 * instead. This trampoline runs after GSBASE has already been changed 575 * back to the userland value (so we can't use %gs). 576 * 577 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13. 578 * The KPTI_R13 member in the kpti_dbg has already been set to what the 579 * real %r13 should be before we IRET. 580 * 581 * Additionally, KDI keeps a copy of the incoming %cr3 value when it 582 * took an interrupt, and has put that back in the kpti_dbg area for us 583 * to use, so we don't do any sniffing of %cs here. This is important 584 * so that debugging code that changes %cr3 is possible. 585 */ 586 ENTRY_NP(tr_iret_kdi) 587 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */ 588 589 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */ 590 leaq KPTI_TOP(%r13), %rsp 591 pushq T_FRAMERET_SS(%r14) 592 pushq T_FRAMERET_RSP(%r14) 593 pushq T_FRAMERET_RFLAGS(%r14) 594 pushq T_FRAMERET_CS(%r14) 595 pushq T_FRAMERET_RIP(%r14) 596 597 movq KPTI_TR_CR3(%r13), %r14 598 movq %r14, %cr3 599 600 movq KPTI_R14(%r13), %r14 601 movq KPTI_R13(%r13), %r13 /* preserved by our caller */ 602 603 iretq 604 SET_SIZE(tr_iret_kdi) 605 606 .global tr_intr_ret_end 607 tr_intr_ret_end: 608 609 /* 610 * Interrupt/trap entry trampolines 611 */ 612 613 /* CPU pushed an error code, and ISR wants one */ 614 #define MK_INTR_TRAMPOLINE(isr) \ 615 ENTRY_NP(tr_/**/isr); \ 616 INTERRUPT_TRAMPOLINE; \ 617 jmp isr; \ 618 SET_SIZE(tr_/**/isr) 619 620 /* CPU didn't push an error code, and ISR doesn't want one */ 621 #define MK_INTR_TRAMPOLINE_NOERR(isr) \ 622 ENTRY_NP(tr_/**/isr); \ 623 push $0; \ 624 INTERRUPT_TRAMPOLINE_NOERR; \ 625 jmp isr; \ 626 SET_SIZE(tr_/**/isr) 627 628 /* CPU pushed an error code, and ISR wants one */ 629 #define MK_DBG_INTR_TRAMPOLINE(isr) \ 630 ENTRY_NP(tr_/**/isr); \ 631 DBG_INTERRUPT_TRAMPOLINE; \ 632 jmp isr; \ 633 SET_SIZE(tr_/**/isr) 634 635 /* CPU didn't push an error code, and ISR doesn't want one */ 636 #define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \ 637 ENTRY_NP(tr_/**/isr); \ 638 push $0; \ 639 DBG_INTERRUPT_TRAMPOLINE_NOERR; \ 640 jmp isr; \ 641 SET_SIZE(tr_/**/isr) 642 643 644 MK_INTR_TRAMPOLINE_NOERR(div0trap) 645 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap) 646 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap) 647 MK_INTR_TRAMPOLINE_NOERR(ovflotrap) 648 MK_INTR_TRAMPOLINE_NOERR(boundstrap) 649 MK_INTR_TRAMPOLINE_NOERR(invoptrap) 650 MK_INTR_TRAMPOLINE_NOERR(ndptrap) 651 MK_INTR_TRAMPOLINE(invtsstrap) 652 MK_INTR_TRAMPOLINE(segnptrap) 653 MK_DBG_INTR_TRAMPOLINE(stktrap) 654 MK_DBG_INTR_TRAMPOLINE(gptrap) 655 MK_DBG_INTR_TRAMPOLINE(pftrap) 656 MK_INTR_TRAMPOLINE_NOERR(resvtrap) 657 MK_INTR_TRAMPOLINE_NOERR(ndperr) 658 MK_INTR_TRAMPOLINE(achktrap) 659 MK_INTR_TRAMPOLINE_NOERR(xmtrap) 660 MK_INTR_TRAMPOLINE_NOERR(invaltrap) 661 MK_INTR_TRAMPOLINE_NOERR(fasttrap) 662 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret) 663 664 /* 665 * These are special because they can interrupt other traps, and 666 * each other. We don't need to pivot their stacks, because they have 667 * dedicated IST stack space, but we need to change %cr3. 668 */ 669 ENTRY_NP(tr_nmiint) 670 pushq %r13 671 mov kpti_safe_cr3, %r13 672 mov %r13, %cr3 673 popq %r13 674 jmp nmiint 675 SET_SIZE(tr_nmiint) 676 677 #if !defined(__xpv) 678 ENTRY_NP(tr_syserrtrap) 679 /* 680 * If we got here we should always have a zero error code pushed. 681 * The INT $0x8 instr doesn't seem to push one, though, which we use 682 * as an emergency panic in the other trampolines. So adjust things 683 * here. 684 */ 685 cmpq $0, (%rsp) 686 je 1f 687 pushq $0 688 1: 689 pushq %r13 690 mov kpti_safe_cr3, %r13 691 mov %r13, %cr3 692 popq %r13 693 jmp syserrtrap 694 SET_SIZE(tr_syserrtrap) 695 #endif 696 697 ENTRY_NP(tr_mcetrap) 698 pushq %r13 699 mov kpti_safe_cr3, %r13 700 mov %r13, %cr3 701 popq %r13 702 jmp mcetrap 703 SET_SIZE(tr_mcetrap) 704 705 /* 706 * Interrupts start at 32 707 */ 708 #define MKIVCT(n) \ 709 ENTRY_NP(tr_ivct/**/n) \ 710 push $0; \ 711 INTERRUPT_TRAMPOLINE; \ 712 push $n - 0x20; \ 713 jmp cmnint; \ 714 SET_SIZE(tr_ivct/**/n) 715 716 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35); 717 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39); 718 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43); 719 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47); 720 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51); 721 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55); 722 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59); 723 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63); 724 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67); 725 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71); 726 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75); 727 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79); 728 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83); 729 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87); 730 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91); 731 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95); 732 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99); 733 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103); 734 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107); 735 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111); 736 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115); 737 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119); 738 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123); 739 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127); 740 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131); 741 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135); 742 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139); 743 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143); 744 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147); 745 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151); 746 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155); 747 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159); 748 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163); 749 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167); 750 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171); 751 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175); 752 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179); 753 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183); 754 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187); 755 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191); 756 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195); 757 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199); 758 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203); 759 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207); 760 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211); 761 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215); 762 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219); 763 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223); 764 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227); 765 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231); 766 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235); 767 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239); 768 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243); 769 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247); 770 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251); 771 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255); 772 773 /* 774 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a 775 * PCID other than the current one, then, is to load its cr3 then 776 * invlpg. But loading kf_user_cr3 means we can longer access our 777 * caller's text mapping (or indeed, its stack). So this little helper 778 * has to live within our trampoline text region. 779 * 780 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3) 781 */ 782 ENTRY_NP(tr_mmu_flush_user_range) 783 push %rbx 784 /* When we read cr3, it never has the NOINVL bit set. */ 785 mov %cr3, %rax 786 movq $CR3_NOINVL_BIT, %rbx 787 orq %rbx, %rax 788 789 mov %rcx, %cr3 790 add %rdi, %rsi 791 .align ASM_ENTRY_ALIGN 792 1: 793 invlpg (%rdi) 794 add %rdx, %rdi 795 cmp %rsi, %rdi 796 jb 1b 797 mov %rax, %cr3 798 pop %rbx 799 retq 800 SET_SIZE(tr_mmu_flush_user_range) 801 802 .align MMU_PAGESIZE 803 .global kpti_tramp_end 804 kpti_tramp_end: 805 nop 806 807 #endif /* __lint */