1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* 12 * Copyright 2018 Joyent, Inc. 13 */ 14 15 /* 16 * This file contains the trampolines that are used by KPTI in order to be 17 * able to take interrupts/trap/etc while on the "user" page table. 18 * 19 * We don't map the full kernel text into the user page table: instead we 20 * map this one small section of trampolines (which compiles to ~13 pages). 21 * These trampolines are set in the IDT always (so they will run no matter 22 * whether we're on the kernel or user page table), and their primary job is to 23 * pivot us to the kernel %cr3 and %rsp without ruining everything. 24 * 25 * All of these interrupts use the amd64 IST feature when we have KPTI enabled, 26 * meaning that they will execute with their %rsp set to a known location, even 27 * if we take them in the kernel. 28 * 29 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST 30 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti 31 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be 32 * page-aligned, and we map the page it's on into both page tables. Using a 33 * struct attached to the cpu_t also means that we can use %rsp-relative 34 * addressing to find anything on the cpu_t, so we don't have to touch %gs or 35 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy). 36 * 37 * This little struct is where the CPU will push the actual interrupt frame. 38 * Then, in the trampoline, we change %cr3, then figure out our destination 39 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt 40 * frame). Then we jump to the regular ISR in the kernel text and carry on as 41 * normal. 42 * 43 * We leave the original frame and any spilled regs behind in the kpti_frame 44 * lazily until we want to return to userland. Then, we clear any spilled 45 * regs from it, and overwrite the rest with our iret frame. When switching 46 * this cpu to a different process (in hat_switch), we bzero the whole region to 47 * make sure nothing can leak between processes. 48 * 49 * When we're returning back to the original place we took the interrupt later 50 * (especially if it was in userland), we have to jmp back to the "return 51 * trampolines" here, since when we set %cr3 back to the user value, we need to 52 * be executing from code here in these shared pages and not the main kernel 53 * text again. Even though it should be fine to iret directly from kernel text 54 * when returning to kernel code, we make things jmp to a trampoline here just 55 * for consistency. 56 * 57 * Note that with IST, it's very important that we always must have pivoted 58 * away from the IST stack before we could possibly take any other interrupt 59 * on the same IST (unless it's an end-of-the-world fault and we don't care 60 * about coming back from it ever). 61 * 62 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they 63 * regularly have to happen from within trampoline code (e.g. in the sysenter 64 * single-step case) and then return to the world normally. As a result, these 65 * two are IST'd to their own kpti_frame right above the normal one (in the same 66 * page), so they don't clobber their parent interrupt. 67 * 68 * To aid with debugging, we also IST the page fault (#PF/pftrap), general 69 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to 70 * their own separate kpti_frame. This ensures that if we take one of these 71 * due to a bug in trampoline code, we preserve the original trampoline 72 * state that caused the trap. 73 * 74 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST 75 * stacks, since they can interrupt another ISR at any time. These stacks are 76 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in 77 * their trampolines (and do it unconditionally), and don't bother pivoting 78 * away. We're either going into the panic() path, or we're going to return 79 * straight away without rescheduling, so it's fine to not be on our real 80 * kthread stack (and some of the state we want to go find it with might be 81 * corrupt!) 82 * 83 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a 84 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to 85 * point at the PML4 for kas early in boot and never touch it again. Hopefully 86 * it survives whatever corruption brings down the rest of the kernel! 87 * 88 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64 89 * cases) in that they do not push an interrupt frame (and also have some other 90 * effects). In the syscall trampolines, we assume that we can only be taking 91 * the call from userland and use SWAPGS and an unconditional overwrite of %cr3. 92 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's 93 * existing %rsp pivot untouched) -- instead we spill registers into 94 * %gs:CPU_KPTI_* as we need to. 95 * 96 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see 97 * hat_switch(). 98 */ 99 100 /* 101 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you 102 * fix bugs here check to see if they should be fixed there as well. 103 */ 104 105 #include <sys/asm_linkage.h> 106 #include <sys/asm_misc.h> 107 #include <sys/regset.h> 108 #include <sys/privregs.h> 109 #include <sys/psw.h> 110 #include <sys/machbrand.h> 111 #include <sys/param.h> 112 113 #if defined(__lint) 114 115 #include <sys/types.h> 116 #include <sys/thread.h> 117 #include <sys/systm.h> 118 119 #else /* __lint */ 120 121 #include <sys/segments.h> 122 #include <sys/pcb.h> 123 #include <sys/trap.h> 124 #include <sys/ftrace.h> 125 #include <sys/traptrace.h> 126 #include <sys/clock.h> 127 #include <sys/model.h> 128 #include <sys/panic.h> 129 130 #if defined(__xpv) 131 #include <sys/hypervisor.h> 132 #endif 133 134 #include "assym.h" 135 136 .data 137 DGDEF3(kpti_enable, 8, 8) 138 .fill 1, 8, 1 139 140 .section ".text"; 141 .align MMU_PAGESIZE 142 143 .global kpti_tramp_start 144 kpti_tramp_start: 145 nop 146 147 /* This will be set by mlsetup, and then double-checked later */ 148 .global kpti_safe_cr3 149 kpti_safe_cr3: 150 .quad 0 151 SET_SIZE(kpti_safe_cr3) 152 153 /* startup_kmem() will overwrite this */ 154 .global kpti_kbase 155 kpti_kbase: 156 .quad KERNELBASE 157 SET_SIZE(kpti_kbase) 158 159 #define SET_KERNEL_CR3(spillreg) \ 160 mov %cr3, spillreg; \ 161 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 162 mov %gs:CPU_KPTI_KCR3, spillreg; \ 163 cmp $0, spillreg; \ 164 je 2f; \ 165 mov spillreg, %cr3; \ 166 2: 167 168 #if DEBUG 169 #define SET_USER_CR3(spillreg) \ 170 mov %cr3, spillreg; \ 171 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 172 mov %gs:CPU_KPTI_UCR3, spillreg; \ 173 mov spillreg, %cr3 174 #else 175 #define SET_USER_CR3(spillreg) \ 176 mov %gs:CPU_KPTI_UCR3, spillreg; \ 177 mov spillreg, %cr3 178 #endif 179 180 #define PIVOT_KPTI_STK(spillreg) \ 181 mov %rsp, spillreg; \ 182 mov %gs:CPU_KPTI_RET_RSP, %rsp; \ 183 pushq T_FRAMERET_SS(spillreg); \ 184 pushq T_FRAMERET_RSP(spillreg); \ 185 pushq T_FRAMERET_RFLAGS(spillreg); \ 186 pushq T_FRAMERET_CS(spillreg); \ 187 pushq T_FRAMERET_RIP(spillreg) 188 189 190 #define INTERRUPT_TRAMPOLINE_P(errpush) \ 191 pushq %r13; \ 192 pushq %r14; \ 193 subq $KPTI_R14, %rsp; \ 194 /* Save current %cr3. */ \ 195 mov %cr3, %r14; \ 196 mov %r14, KPTI_TR_CR3(%rsp); \ 197 \ 198 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 199 je 3f; \ 200 1: \ 201 /* Change to the "kernel" %cr3 */ \ 202 mov KPTI_KCR3(%rsp), %r14; \ 203 cmp $0, %r14; \ 204 je 2f; \ 205 mov %r14, %cr3; \ 206 2: \ 207 /* Get our cpu_t in %r13 */ \ 208 mov %rsp, %r13; \ 209 and $(~(MMU_PAGESIZE - 1)), %r13; \ 210 subq $CPU_KPTI_START, %r13; \ 211 /* Use top of the kthread stk */ \ 212 mov CPU_THREAD(%r13), %r14; \ 213 mov T_STACK(%r14), %r14; \ 214 addq $REGSIZE+MINFRAME, %r14; \ 215 jmp 4f; \ 216 3: \ 217 /* Check the %rsp in the frame. */ \ 218 /* Is it above kernel base? */ \ 219 mov kpti_kbase, %r14; \ 220 cmp %r14, KPTI_RSP(%rsp); \ 221 jb 1b; \ 222 /* Use the %rsp from the trap frame */ \ 223 mov KPTI_RSP(%rsp), %r14; \ 224 and $(~0xf), %r14; \ 225 4: \ 226 mov %rsp, %r13; \ 227 /* %r14 contains our destination stk */ \ 228 mov %r14, %rsp; \ 229 pushq KPTI_SS(%r13); \ 230 pushq KPTI_RSP(%r13); \ 231 pushq KPTI_RFLAGS(%r13); \ 232 pushq KPTI_CS(%r13); \ 233 pushq KPTI_RIP(%r13); \ 234 errpush; \ 235 mov KPTI_R14(%r13), %r14; \ 236 mov KPTI_R13(%r13), %r13 237 238 #define INTERRUPT_TRAMPOLINE_NOERR \ 239 INTERRUPT_TRAMPOLINE_P(/**/) 240 241 #define INTERRUPT_TRAMPOLINE \ 242 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 243 244 /* 245 * This is used for all interrupts that can plausibly be taken inside another 246 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS). 247 * 248 * We check for whether we took the interrupt while in another trampoline, in 249 * which case we need to use the kthread stack. 250 */ 251 #define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \ 252 pushq %r13; \ 253 pushq %r14; \ 254 subq $KPTI_R14, %rsp; \ 255 /* Check for clobbering */ \ 256 cmp $0, KPTI_FLAG(%rsp); \ 257 je 1f; \ 258 /* Don't worry, this totally works */ \ 259 int $8; \ 260 1: \ 261 movq $1, KPTI_FLAG(%rsp); \ 262 /* Save current %cr3. */ \ 263 mov %cr3, %r14; \ 264 mov %r14, KPTI_TR_CR3(%rsp); \ 265 \ 266 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 267 je 4f; \ 268 2: \ 269 /* Change to the "kernel" %cr3 */ \ 270 mov KPTI_KCR3(%rsp), %r14; \ 271 cmp $0, %r14; \ 272 je 3f; \ 273 mov %r14, %cr3; \ 274 3: \ 275 /* Get our cpu_t in %r13 */ \ 276 mov %rsp, %r13; \ 277 and $(~(MMU_PAGESIZE - 1)), %r13; \ 278 subq $CPU_KPTI_START, %r13; \ 279 /* Use top of the kthread stk */ \ 280 mov CPU_THREAD(%r13), %r14; \ 281 mov T_STACK(%r14), %r14; \ 282 addq $REGSIZE+MINFRAME, %r14; \ 283 jmp 6f; \ 284 4: \ 285 /* Check the %rsp in the frame. */ \ 286 /* Is it above kernel base? */ \ 287 /* If not, treat as user. */ \ 288 mov kpti_kbase, %r14; \ 289 cmp %r14, KPTI_RSP(%rsp); \ 290 jb 2b; \ 291 /* Is it within the kpti_frame page? */ \ 292 /* If it is, treat as user interrupt */ \ 293 mov %rsp, %r13; \ 294 and $(~(MMU_PAGESIZE - 1)), %r13; \ 295 mov KPTI_RSP(%rsp), %r14; \ 296 and $(~(MMU_PAGESIZE - 1)), %r14; \ 297 cmp %r13, %r14; \ 298 je 2b; \ 299 /* Were we in trampoline code? */ \ 300 leaq kpti_tramp_start, %r14; \ 301 cmp %r14, KPTI_RIP(%rsp); \ 302 jb 5f; \ 303 leaq kpti_tramp_end, %r14; \ 304 cmp %r14, KPTI_RIP(%rsp); \ 305 ja 5f; \ 306 /* If we were, change %cr3: we might */ \ 307 /* have interrupted before it did. */ \ 308 mov KPTI_KCR3(%rsp), %r14; \ 309 mov %r14, %cr3; \ 310 5: \ 311 /* Use the %rsp from the trap frame */ \ 312 mov KPTI_RSP(%rsp), %r14; \ 313 and $(~0xf), %r14; \ 314 6: \ 315 mov %rsp, %r13; \ 316 /* %r14 contains our destination stk */ \ 317 mov %r14, %rsp; \ 318 pushq KPTI_SS(%r13); \ 319 pushq KPTI_RSP(%r13); \ 320 pushq KPTI_RFLAGS(%r13); \ 321 pushq KPTI_CS(%r13); \ 322 pushq KPTI_RIP(%r13); \ 323 errpush; \ 324 mov KPTI_R14(%r13), %r14; \ 325 movq $0, KPTI_FLAG(%r13); \ 326 mov KPTI_R13(%r13), %r13 327 328 #define DBG_INTERRUPT_TRAMPOLINE_NOERR \ 329 DBG_INTERRUPT_TRAMPOLINE_P(/**/) 330 331 #define DBG_INTERRUPT_TRAMPOLINE \ 332 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 333 334 /* 335 * These labels (_start and _end) are used by trap.c to determine if 336 * we took an interrupt like an NMI during the return process. 337 */ 338 .global tr_sysc_ret_start 339 tr_sysc_ret_start: 340 341 /* 342 * Syscall return trampolines. 343 * 344 * These are expected to be called on the kernel %gs. tr_sysret[ql] are 345 * called after %rsp is changed back to the user value, so we have no 346 * stack to work with. tr_sysexit has a kernel stack (but has to 347 * preserve rflags, soooo). 348 */ 349 ENTRY_NP(tr_sysretq) 350 cmpq $1, kpti_enable 351 jne 1f 352 353 mov %r13, %gs:CPU_KPTI_R13 354 SET_USER_CR3(%r13) 355 mov %gs:CPU_KPTI_R13, %r13 356 /* Zero these to make sure they didn't leak from a kernel trap */ 357 movq $0, %gs:CPU_KPTI_R13 358 movq $0, %gs:CPU_KPTI_R14 359 1: 360 swapgs 361 sysretq 362 SET_SIZE(tr_sysretq) 363 364 ENTRY_NP(tr_sysretl) 365 cmpq $1, kpti_enable 366 jne 1f 367 368 mov %r13, %gs:CPU_KPTI_R13 369 SET_USER_CR3(%r13) 370 mov %gs:CPU_KPTI_R13, %r13 371 /* Zero these to make sure they didn't leak from a kernel trap */ 372 movq $0, %gs:CPU_KPTI_R13 373 movq $0, %gs:CPU_KPTI_R14 374 1: 375 SWAPGS 376 SYSRETL 377 SET_SIZE(tr_sysretl) 378 379 ENTRY_NP(tr_sysexit) 380 /* 381 * Note: we want to preserve RFLAGS across this branch, since sysexit 382 * (unlike sysret above) does not restore RFLAGS for us. 383 * 384 * We still have the real kernel stack (sysexit does restore that), so 385 * we can use pushfq/popfq. 386 */ 387 pushfq 388 389 cmpq $1, kpti_enable 390 jne 1f 391 392 /* Have to pop it back off now before we change %cr3! */ 393 popfq 394 mov %r13, %gs:CPU_KPTI_R13 395 SET_USER_CR3(%r13) 396 mov %gs:CPU_KPTI_R13, %r13 397 /* Zero these to make sure they didn't leak from a kernel trap */ 398 movq $0, %gs:CPU_KPTI_R13 399 movq $0, %gs:CPU_KPTI_R14 400 jmp 2f 401 1: 402 popfq 403 2: 404 swapgs 405 sti 406 sysexit 407 SET_SIZE(tr_sysexit) 408 409 .global tr_sysc_ret_end 410 tr_sysc_ret_end: 411 412 /* 413 * Syscall entry trampolines. 414 */ 415 416 #if DEBUG 417 #define MK_SYSCALL_TRAMPOLINE(isr) \ 418 ENTRY_NP(tr_/**/isr); \ 419 swapgs; \ 420 mov %r13, %gs:CPU_KPTI_R13; \ 421 mov %cr3, %r13; \ 422 mov %r13, %gs:CPU_KPTI_TR_CR3; \ 423 mov %gs:CPU_KPTI_KCR3, %r13; \ 424 mov %r13, %cr3; \ 425 mov %gs:CPU_KPTI_R13, %r13; \ 426 swapgs; \ 427 jmp isr; \ 428 SET_SIZE(tr_/**/isr) 429 #else 430 #define MK_SYSCALL_TRAMPOLINE(isr) \ 431 ENTRY_NP(tr_/**/isr); \ 432 swapgs; \ 433 mov %r13, %gs:CPU_KPTI_R13; \ 434 mov %gs:CPU_KPTI_KCR3, %r13; \ 435 mov %r13, %cr3; \ 436 mov %gs:CPU_KPTI_R13, %r13; \ 437 swapgs; \ 438 jmp isr; \ 439 SET_SIZE(tr_/**/isr) 440 #endif 441 442 MK_SYSCALL_TRAMPOLINE(sys_syscall) 443 MK_SYSCALL_TRAMPOLINE(sys_syscall32) 444 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall) 445 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32) 446 447 /* 448 * SYSENTER is special. The CPU is really not very helpful when it 449 * comes to preserving and restoring state with it, and as a result 450 * we have to do all of it by hand. So, since we want to preserve 451 * RFLAGS, we have to be very careful in these trampolines to not 452 * clobber any bits in it. That means no cmpqs or branches! 453 */ 454 ENTRY_NP(tr_sys_sysenter) 455 swapgs 456 mov %r13, %gs:CPU_KPTI_R13 457 #if DEBUG 458 mov %cr3, %r13 459 mov %r13, %gs:CPU_KPTI_TR_CR3 460 #endif 461 mov %gs:CPU_KPTI_KCR3, %r13 462 mov %r13, %cr3 463 mov %gs:CPU_KPTI_R13, %r13 464 jmp _sys_sysenter_post_swapgs 465 SET_SIZE(tr_sys_sysenter) 466 467 ENTRY_NP(tr_brand_sys_sysenter) 468 swapgs 469 mov %r13, %gs:CPU_KPTI_R13 470 #if DEBUG 471 mov %cr3, %r13 472 mov %r13, %gs:CPU_KPTI_TR_CR3 473 #endif 474 mov %gs:CPU_KPTI_KCR3, %r13 475 mov %r13, %cr3 476 mov %gs:CPU_KPTI_R13, %r13 477 jmp _brand_sys_sysenter_post_swapgs 478 SET_SIZE(tr_brand_sys_sysenter) 479 480 #define MK_SYSCALL_INT_TRAMPOLINE(isr) \ 481 ENTRY_NP(tr_/**/isr); \ 482 swapgs; \ 483 mov %r13, %gs:CPU_KPTI_R13; \ 484 SET_KERNEL_CR3(%r13); \ 485 mov %gs:CPU_THREAD, %r13; \ 486 mov T_STACK(%r13), %r13; \ 487 addq $REGSIZE+MINFRAME, %r13; \ 488 mov %r13, %rsp; \ 489 pushq %gs:CPU_KPTI_SS; \ 490 pushq %gs:CPU_KPTI_RSP; \ 491 pushq %gs:CPU_KPTI_RFLAGS; \ 492 pushq %gs:CPU_KPTI_CS; \ 493 pushq %gs:CPU_KPTI_RIP; \ 494 mov %gs:CPU_KPTI_R13, %r13; \ 495 SWAPGS; \ 496 jmp isr; \ 497 SET_SIZE(tr_/**/isr) 498 499 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int) 500 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int) 501 502 /* 503 * Interrupt/trap return trampolines 504 */ 505 506 .global tr_intr_ret_start 507 tr_intr_ret_start: 508 509 ENTRY_NP(tr_iret_auto) 510 cmpq $1, kpti_enable 511 jne tr_iret_kernel 512 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp) 513 je tr_iret_kernel 514 jmp tr_iret_user 515 SET_SIZE(tr_iret_auto) 516 517 ENTRY_NP(tr_iret_kernel) 518 /* 519 * Yes, this does nothing extra. But this way we know if we see iret 520 * elsewhere, then we've failed to properly consider trampolines there. 521 */ 522 iretq 523 SET_SIZE(tr_iret_kernel) 524 525 ENTRY_NP(tr_iret_user) 526 cmpq $1, kpti_enable 527 jne 1f 528 529 swapgs 530 mov %r13, %gs:CPU_KPTI_R13 531 PIVOT_KPTI_STK(%r13) 532 SET_USER_CR3(%r13) 533 mov %gs:CPU_KPTI_R13, %r13 534 /* Zero these to make sure they didn't leak from a kernel trap */ 535 movq $0, %gs:CPU_KPTI_R13 536 movq $0, %gs:CPU_KPTI_R14 537 swapgs 538 1: 539 iretq 540 SET_SIZE(tr_iret_user) 541 542 /* 543 * This special return trampoline is for KDI's use only (with kmdb). 544 * 545 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR 546 * instead. This trampoline runs after GSBASE has already been changed 547 * back to the userland value (so we can't use %gs). 548 * 549 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13. 550 * The KPTI_R13 member in the kpti_dbg has already been set to what the 551 * real %r13 should be before we IRET. 552 * 553 * Additionally, KDI keeps a copy of the incoming %cr3 value when it 554 * took an interrupt, and has put that back in the kpti_dbg area for us 555 * to use, so we don't do any sniffing of %cs here. This is important 556 * so that debugging code that changes %cr3 is possible. 557 */ 558 ENTRY_NP(tr_iret_kdi) 559 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */ 560 561 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */ 562 leaq KPTI_TOP(%r13), %rsp 563 pushq T_FRAMERET_SS(%r14) 564 pushq T_FRAMERET_RSP(%r14) 565 pushq T_FRAMERET_RFLAGS(%r14) 566 pushq T_FRAMERET_CS(%r14) 567 pushq T_FRAMERET_RIP(%r14) 568 569 movq KPTI_TR_CR3(%r13), %r14 570 movq %r14, %cr3 571 572 movq KPTI_R14(%r13), %r14 573 movq KPTI_R13(%r13), %r13 /* preserved by our caller */ 574 575 iretq 576 SET_SIZE(tr_iret_kdi) 577 578 .global tr_intr_ret_end 579 tr_intr_ret_end: 580 581 /* 582 * Interrupt/trap entry trampolines 583 */ 584 585 /* CPU pushed an error code, and ISR wants one */ 586 #define MK_INTR_TRAMPOLINE(isr) \ 587 ENTRY_NP(tr_/**/isr); \ 588 INTERRUPT_TRAMPOLINE; \ 589 jmp isr; \ 590 SET_SIZE(tr_/**/isr) 591 592 /* CPU didn't push an error code, and ISR doesn't want one */ 593 #define MK_INTR_TRAMPOLINE_NOERR(isr) \ 594 ENTRY_NP(tr_/**/isr); \ 595 push $0; \ 596 INTERRUPT_TRAMPOLINE_NOERR; \ 597 jmp isr; \ 598 SET_SIZE(tr_/**/isr) 599 600 /* CPU pushed an error code, and ISR wants one */ 601 #define MK_DBG_INTR_TRAMPOLINE(isr) \ 602 ENTRY_NP(tr_/**/isr); \ 603 DBG_INTERRUPT_TRAMPOLINE; \ 604 jmp isr; \ 605 SET_SIZE(tr_/**/isr) 606 607 /* CPU didn't push an error code, and ISR doesn't want one */ 608 #define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \ 609 ENTRY_NP(tr_/**/isr); \ 610 push $0; \ 611 DBG_INTERRUPT_TRAMPOLINE_NOERR; \ 612 jmp isr; \ 613 SET_SIZE(tr_/**/isr) 614 615 616 MK_INTR_TRAMPOLINE_NOERR(div0trap) 617 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap) 618 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap) 619 MK_INTR_TRAMPOLINE_NOERR(ovflotrap) 620 MK_INTR_TRAMPOLINE_NOERR(boundstrap) 621 MK_INTR_TRAMPOLINE_NOERR(invoptrap) 622 MK_INTR_TRAMPOLINE_NOERR(ndptrap) 623 MK_INTR_TRAMPOLINE(invtsstrap) 624 MK_INTR_TRAMPOLINE(segnptrap) 625 MK_DBG_INTR_TRAMPOLINE(stktrap) 626 MK_DBG_INTR_TRAMPOLINE(gptrap) 627 MK_DBG_INTR_TRAMPOLINE(pftrap) 628 MK_INTR_TRAMPOLINE_NOERR(resvtrap) 629 MK_INTR_TRAMPOLINE_NOERR(ndperr) 630 MK_INTR_TRAMPOLINE(achktrap) 631 MK_INTR_TRAMPOLINE_NOERR(xmtrap) 632 MK_INTR_TRAMPOLINE_NOERR(invaltrap) 633 MK_INTR_TRAMPOLINE_NOERR(fasttrap) 634 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret) 635 636 /* 637 * These are special because they can interrupt other traps, and 638 * each other. We don't need to pivot their stacks, because they have 639 * dedicated IST stack space, but we need to change %cr3. 640 */ 641 ENTRY_NP(tr_nmiint) 642 pushq %r13 643 mov kpti_safe_cr3, %r13 644 mov %r13, %cr3 645 popq %r13 646 jmp nmiint 647 SET_SIZE(tr_nmiint) 648 649 #if !defined(__xpv) 650 ENTRY_NP(tr_syserrtrap) 651 /* 652 * If we got here we should always have a zero error code pushed. 653 * The INT $0x8 instr doesn't seem to push one, though, which we use 654 * as an emergency panic in the other trampolines. So adjust things 655 * here. 656 */ 657 cmpq $0, (%rsp) 658 je 1f 659 pushq $0 660 1: 661 pushq %r13 662 mov kpti_safe_cr3, %r13 663 mov %r13, %cr3 664 popq %r13 665 jmp syserrtrap 666 SET_SIZE(tr_syserrtrap) 667 #endif 668 669 ENTRY_NP(tr_mcetrap) 670 pushq %r13 671 mov kpti_safe_cr3, %r13 672 mov %r13, %cr3 673 popq %r13 674 jmp mcetrap 675 SET_SIZE(tr_mcetrap) 676 677 /* 678 * Interrupts start at 32 679 */ 680 #define MKIVCT(n) \ 681 ENTRY_NP(tr_ivct/**/n) \ 682 push $0; \ 683 INTERRUPT_TRAMPOLINE; \ 684 push $n - 0x20; \ 685 jmp cmnint; \ 686 SET_SIZE(tr_ivct/**/n) 687 688 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35); 689 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39); 690 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43); 691 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47); 692 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51); 693 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55); 694 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59); 695 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63); 696 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67); 697 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71); 698 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75); 699 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79); 700 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83); 701 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87); 702 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91); 703 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95); 704 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99); 705 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103); 706 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107); 707 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111); 708 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115); 709 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119); 710 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123); 711 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127); 712 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131); 713 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135); 714 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139); 715 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143); 716 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147); 717 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151); 718 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155); 719 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159); 720 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163); 721 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167); 722 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171); 723 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175); 724 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179); 725 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183); 726 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187); 727 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191); 728 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195); 729 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199); 730 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203); 731 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207); 732 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211); 733 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215); 734 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219); 735 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223); 736 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227); 737 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231); 738 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235); 739 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239); 740 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243); 741 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247); 742 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251); 743 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255); 744 745 /* 746 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a 747 * PCID other than the current one, then, is to load its cr3 then 748 * invlpg. But loading kf_user_cr3 means we can longer access our 749 * caller's text mapping (or indeed, its stack). So this little helper 750 * has to live within our trampoline text region. 751 * 752 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3) 753 */ 754 ENTRY_NP(tr_mmu_flush_user_range) 755 push %rbx 756 /* When we read cr3, it never has the NOINVL bit set. */ 757 mov %cr3, %rax 758 movq $CR3_NOINVL_BIT, %rbx 759 orq %rbx, %rax 760 761 mov %rcx, %cr3 762 add %rdi, %rsi 763 .align ASM_ENTRY_ALIGN 764 1: 765 invlpg (%rdi) 766 add %rdx, %rdi 767 cmp %rsi, %rdi 768 jb 1b 769 mov %rax, %cr3 770 pop %rbx 771 retq 772 SET_SIZE(tr_mmu_flush_user_range) 773 774 .align MMU_PAGESIZE 775 .global kpti_tramp_end 776 kpti_tramp_end: 777 nop 778 779 #endif /* __lint */