Print this page
9685 KPTI %cr3 handling needs fixes
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/ml/kpti_trampolines.s
+++ new/usr/src/uts/i86pc/ml/kpti_trampolines.s
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11 /*
12 12 * Copyright 2018 Joyent, Inc.
13 13 */
14 14
15 15 /*
16 16 * This file contains the trampolines that are used by KPTI in order to be
17 17 * able to take interrupts/trap/etc while on the "user" page table.
18 18 *
19 19 * We don't map the full kernel text into the user page table: instead we
20 20 * map this one small section of trampolines (which compiles to ~13 pages).
21 21 * These trampolines are set in the IDT always (so they will run no matter
22 22 * whether we're on the kernel or user page table), and their primary job is to
23 23 * pivot us to the kernel %cr3 and %rsp without ruining everything.
24 24 *
25 25 * All of these interrupts use the amd64 IST feature when we have KPTI enabled,
26 26 * meaning that they will execute with their %rsp set to a known location, even
27 27 * if we take them in the kernel.
28 28 *
29 29 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST
30 30 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti
31 31 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be
32 32 * page-aligned, and we map the page it's on into both page tables. Using a
33 33 * struct attached to the cpu_t also means that we can use %rsp-relative
34 34 * addressing to find anything on the cpu_t, so we don't have to touch %gs or
35 35 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy).
36 36 *
37 37 * This little struct is where the CPU will push the actual interrupt frame.
38 38 * Then, in the trampoline, we change %cr3, then figure out our destination
39 39 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt
40 40 * frame). Then we jump to the regular ISR in the kernel text and carry on as
41 41 * normal.
42 42 *
43 43 * We leave the original frame and any spilled regs behind in the kpti_frame
44 44 * lazily until we want to return to userland. Then, we clear any spilled
45 45 * regs from it, and overwrite the rest with our iret frame. When switching
46 46 * this cpu to a different process (in hat_switch), we bzero the whole region to
47 47 * make sure nothing can leak between processes.
48 48 *
49 49 * When we're returning back to the original place we took the interrupt later
50 50 * (especially if it was in userland), we have to jmp back to the "return
51 51 * trampolines" here, since when we set %cr3 back to the user value, we need to
52 52 * be executing from code here in these shared pages and not the main kernel
53 53 * text again. Even though it should be fine to iret directly from kernel text
54 54 * when returning to kernel code, we make things jmp to a trampoline here just
55 55 * for consistency.
56 56 *
57 57 * Note that with IST, it's very important that we always must have pivoted
58 58 * away from the IST stack before we could possibly take any other interrupt
59 59 * on the same IST (unless it's an end-of-the-world fault and we don't care
60 60 * about coming back from it ever).
61 61 *
62 62 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they
63 63 * regularly have to happen from within trampoline code (e.g. in the sysenter
64 64 * single-step case) and then return to the world normally. As a result, these
65 65 * two are IST'd to their own kpti_frame right above the normal one (in the same
66 66 * page), so they don't clobber their parent interrupt.
67 67 *
68 68 * To aid with debugging, we also IST the page fault (#PF/pftrap), general
69 69 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to
70 70 * their own separate kpti_frame. This ensures that if we take one of these
71 71 * due to a bug in trampoline code, we preserve the original trampoline
72 72 * state that caused the trap.
73 73 *
74 74 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST
75 75 * stacks, since they can interrupt another ISR at any time. These stacks are
76 76 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in
77 77 * their trampolines (and do it unconditionally), and don't bother pivoting
78 78 * away. We're either going into the panic() path, or we're going to return
79 79 * straight away without rescheduling, so it's fine to not be on our real
80 80 * kthread stack (and some of the state we want to go find it with might be
81 81 * corrupt!)
82 82 *
83 83 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a
84 84 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to
85 85 * point at the PML4 for kas early in boot and never touch it again. Hopefully
86 86 * it survives whatever corruption brings down the rest of the kernel!
87 87 *
88 88 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
89 89 * cases) in that they do not push an interrupt frame (and also have some other
90 90 * effects). In the syscall trampolines, we assume that we can only be taking
91 91 * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
92 92 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
93 93 * existing %rsp pivot untouched) -- instead we spill registers into
94 94 * %gs:CPU_KPTI_* as we need to.
95 95 *
96 96 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see
97 97 * hat_switch().
98 98 */
99 99
100 100 /*
101 101 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you
102 102 * fix bugs here check to see if they should be fixed there as well.
103 103 */
104 104
105 105 #include <sys/asm_linkage.h>
106 106 #include <sys/asm_misc.h>
107 107 #include <sys/regset.h>
108 108 #include <sys/privregs.h>
109 109 #include <sys/psw.h>
110 110 #include <sys/machbrand.h>
111 111 #include <sys/param.h>
112 112
113 113 #if defined(__lint)
114 114
115 115 #include <sys/types.h>
116 116 #include <sys/thread.h>
117 117 #include <sys/systm.h>
118 118
119 119 #else /* __lint */
120 120
121 121 #include <sys/segments.h>
122 122 #include <sys/pcb.h>
123 123 #include <sys/trap.h>
124 124 #include <sys/ftrace.h>
125 125 #include <sys/traptrace.h>
126 126 #include <sys/clock.h>
127 127 #include <sys/model.h>
128 128 #include <sys/panic.h>
129 129
130 130 #if defined(__xpv)
131 131 #include <sys/hypervisor.h>
132 132 #endif
133 133
134 134 #include "assym.h"
135 135
136 136 .data
137 137 DGDEF3(kpti_enable, 8, 8)
138 138 .fill 1, 8, 1
139 139
140 140 #if DEBUG
141 141 .data
142 142 _bad_ts_panic_msg:
143 143 .string "kpti_trampolines.s: tr_iret_user but CR0.TS set"
144 144 #endif
145 145
146 146 .section ".text";
147 147 .align MMU_PAGESIZE
148 148
149 149 .global kpti_tramp_start
150 150 kpti_tramp_start:
151 151 nop
152 152
153 153 /* This will be set by mlsetup, and then double-checked later */
154 154 .global kpti_safe_cr3
155 155 kpti_safe_cr3:
156 156 .quad 0
157 157 SET_SIZE(kpti_safe_cr3)
158 158
159 159 /* startup_kmem() will overwrite this */
160 160 .global kpti_kbase
161 161 kpti_kbase:
162 162 .quad KERNELBASE
163 163 SET_SIZE(kpti_kbase)
164 164
165 165 #define SET_KERNEL_CR3(spillreg) \
166 166 mov %cr3, spillreg; \
167 167 mov spillreg, %gs:CPU_KPTI_TR_CR3; \
168 168 mov %gs:CPU_KPTI_KCR3, spillreg; \
169 169 cmp $0, spillreg; \
170 170 je 2f; \
171 171 mov spillreg, %cr3; \
172 172 2:
173 173
174 174 #if DEBUG
175 175 #define SET_USER_CR3(spillreg) \
176 176 mov %cr3, spillreg; \
177 177 mov spillreg, %gs:CPU_KPTI_TR_CR3; \
178 178 mov %gs:CPU_KPTI_UCR3, spillreg; \
179 179 mov spillreg, %cr3
180 180 #else
181 181 #define SET_USER_CR3(spillreg) \
182 182 mov %gs:CPU_KPTI_UCR3, spillreg; \
183 183 mov spillreg, %cr3
184 184 #endif
185 185
186 186 #define PIVOT_KPTI_STK(spillreg) \
187 187 mov %rsp, spillreg; \
188 188 mov %gs:CPU_KPTI_RET_RSP, %rsp; \
189 189 pushq T_FRAMERET_SS(spillreg); \
190 190 pushq T_FRAMERET_RSP(spillreg); \
191 191 pushq T_FRAMERET_RFLAGS(spillreg); \
192 192 pushq T_FRAMERET_CS(spillreg); \
193 193 pushq T_FRAMERET_RIP(spillreg)
194 194
195 195
196 196 #define INTERRUPT_TRAMPOLINE_P(errpush) \
197 197 pushq %r13; \
198 198 pushq %r14; \
199 199 subq $KPTI_R14, %rsp; \
200 200 /* Save current %cr3. */ \
201 201 mov %cr3, %r14; \
202 202 mov %r14, KPTI_TR_CR3(%rsp); \
203 203 \
204 204 cmpw $KCS_SEL, KPTI_CS(%rsp); \
205 205 je 3f; \
206 206 1: \
207 207 /* Change to the "kernel" %cr3 */ \
208 208 mov KPTI_KCR3(%rsp), %r14; \
209 209 cmp $0, %r14; \
210 210 je 2f; \
211 211 mov %r14, %cr3; \
212 212 2: \
213 213 /* Get our cpu_t in %r13 */ \
214 214 mov %rsp, %r13; \
215 215 and $(~(MMU_PAGESIZE - 1)), %r13; \
216 216 subq $CPU_KPTI_START, %r13; \
217 217 /* Use top of the kthread stk */ \
218 218 mov CPU_THREAD(%r13), %r14; \
219 219 mov T_STACK(%r14), %r14; \
220 220 addq $REGSIZE+MINFRAME, %r14; \
221 221 jmp 4f; \
222 222 3: \
223 223 /* Check the %rsp in the frame. */ \
224 224 /* Is it above kernel base? */ \
225 225 mov kpti_kbase, %r14; \
226 226 cmp %r14, KPTI_RSP(%rsp); \
227 227 jb 1b; \
228 228 /* Use the %rsp from the trap frame */ \
229 229 mov KPTI_RSP(%rsp), %r14; \
230 230 and $(~0xf), %r14; \
231 231 4: \
232 232 mov %rsp, %r13; \
233 233 /* %r14 contains our destination stk */ \
234 234 mov %r14, %rsp; \
235 235 pushq KPTI_SS(%r13); \
236 236 pushq KPTI_RSP(%r13); \
237 237 pushq KPTI_RFLAGS(%r13); \
238 238 pushq KPTI_CS(%r13); \
239 239 pushq KPTI_RIP(%r13); \
240 240 errpush; \
241 241 mov KPTI_R14(%r13), %r14; \
242 242 mov KPTI_R13(%r13), %r13
243 243
↓ open down ↓ |
243 lines elided |
↑ open up ↑ |
244 244 #define INTERRUPT_TRAMPOLINE_NOERR \
245 245 INTERRUPT_TRAMPOLINE_P(/**/)
246 246
247 247 #define INTERRUPT_TRAMPOLINE \
248 248 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13))
249 249
250 250 /*
251 251 * This is used for all interrupts that can plausibly be taken inside another
252 252 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS).
253 253 *
254 + * We also use this for #NP, even though it uses the standard IST: the
255 + * additional %rsp checks below will catch when we get an exception doing an
256 + * iret to userspace with a bad %cs/%ss. This appears as a kernel trap, and
257 + * only later gets redirected via kern_gpfault().
258 + *
254 259 * We check for whether we took the interrupt while in another trampoline, in
255 260 * which case we need to use the kthread stack.
256 261 */
257 262 #define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \
258 263 pushq %r13; \
259 264 pushq %r14; \
260 265 subq $KPTI_R14, %rsp; \
261 266 /* Check for clobbering */ \
262 267 cmp $0, KPTI_FLAG(%rsp); \
263 268 je 1f; \
264 269 /* Don't worry, this totally works */ \
265 270 int $8; \
266 271 1: \
267 272 movq $1, KPTI_FLAG(%rsp); \
268 273 /* Save current %cr3. */ \
269 274 mov %cr3, %r14; \
270 275 mov %r14, KPTI_TR_CR3(%rsp); \
271 276 \
272 277 cmpw $KCS_SEL, KPTI_CS(%rsp); \
273 278 je 4f; \
274 279 2: \
275 280 /* Change to the "kernel" %cr3 */ \
276 281 mov KPTI_KCR3(%rsp), %r14; \
277 282 cmp $0, %r14; \
278 283 je 3f; \
279 284 mov %r14, %cr3; \
280 285 3: \
281 286 /* Get our cpu_t in %r13 */ \
282 287 mov %rsp, %r13; \
283 288 and $(~(MMU_PAGESIZE - 1)), %r13; \
284 289 subq $CPU_KPTI_START, %r13; \
285 290 /* Use top of the kthread stk */ \
286 291 mov CPU_THREAD(%r13), %r14; \
287 292 mov T_STACK(%r14), %r14; \
288 293 addq $REGSIZE+MINFRAME, %r14; \
289 294 jmp 6f; \
290 295 4: \
291 296 /* Check the %rsp in the frame. */ \
292 297 /* Is it above kernel base? */ \
293 298 /* If not, treat as user. */ \
294 299 mov kpti_kbase, %r14; \
295 300 cmp %r14, KPTI_RSP(%rsp); \
296 301 jb 2b; \
297 302 /* Is it within the kpti_frame page? */ \
298 303 /* If it is, treat as user interrupt */ \
299 304 mov %rsp, %r13; \
300 305 and $(~(MMU_PAGESIZE - 1)), %r13; \
301 306 mov KPTI_RSP(%rsp), %r14; \
302 307 and $(~(MMU_PAGESIZE - 1)), %r14; \
303 308 cmp %r13, %r14; \
304 309 je 2b; \
305 310 /* Were we in trampoline code? */ \
306 311 leaq kpti_tramp_start, %r14; \
307 312 cmp %r14, KPTI_RIP(%rsp); \
308 313 jb 5f; \
309 314 leaq kpti_tramp_end, %r14; \
310 315 cmp %r14, KPTI_RIP(%rsp); \
311 316 ja 5f; \
312 317 /* If we were, change %cr3: we might */ \
313 318 /* have interrupted before it did. */ \
314 319 mov KPTI_KCR3(%rsp), %r14; \
315 320 mov %r14, %cr3; \
316 321 5: \
317 322 /* Use the %rsp from the trap frame */ \
318 323 mov KPTI_RSP(%rsp), %r14; \
319 324 and $(~0xf), %r14; \
320 325 6: \
321 326 mov %rsp, %r13; \
322 327 /* %r14 contains our destination stk */ \
323 328 mov %r14, %rsp; \
324 329 pushq KPTI_SS(%r13); \
325 330 pushq KPTI_RSP(%r13); \
326 331 pushq KPTI_RFLAGS(%r13); \
327 332 pushq KPTI_CS(%r13); \
328 333 pushq KPTI_RIP(%r13); \
329 334 errpush; \
330 335 mov KPTI_R14(%r13), %r14; \
331 336 movq $0, KPTI_FLAG(%r13); \
332 337 mov KPTI_R13(%r13), %r13
333 338
334 339 #define DBG_INTERRUPT_TRAMPOLINE_NOERR \
335 340 DBG_INTERRUPT_TRAMPOLINE_P(/**/)
336 341
337 342 #define DBG_INTERRUPT_TRAMPOLINE \
338 343 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13))
339 344
340 345 /*
341 346 * These labels (_start and _end) are used by trap.c to determine if
342 347 * we took an interrupt like an NMI during the return process.
343 348 */
344 349 .global tr_sysc_ret_start
345 350 tr_sysc_ret_start:
346 351
347 352 /*
348 353 * Syscall return trampolines.
349 354 *
350 355 * These are expected to be called on the kernel %gs. tr_sysret[ql] are
351 356 * called after %rsp is changed back to the user value, so we have no
352 357 * stack to work with. tr_sysexit has a kernel stack (but has to
353 358 * preserve rflags, soooo).
354 359 */
355 360 ENTRY_NP(tr_sysretq)
356 361 cmpq $1, kpti_enable
357 362 jne 1f
358 363
359 364 mov %r13, %gs:CPU_KPTI_R13
360 365 SET_USER_CR3(%r13)
361 366 mov %gs:CPU_KPTI_R13, %r13
362 367 /* Zero these to make sure they didn't leak from a kernel trap */
363 368 movq $0, %gs:CPU_KPTI_R13
364 369 movq $0, %gs:CPU_KPTI_R14
365 370 1:
366 371 swapgs
367 372 sysretq
368 373 SET_SIZE(tr_sysretq)
369 374
370 375 ENTRY_NP(tr_sysretl)
371 376 cmpq $1, kpti_enable
372 377 jne 1f
373 378
374 379 mov %r13, %gs:CPU_KPTI_R13
375 380 SET_USER_CR3(%r13)
376 381 mov %gs:CPU_KPTI_R13, %r13
377 382 /* Zero these to make sure they didn't leak from a kernel trap */
378 383 movq $0, %gs:CPU_KPTI_R13
379 384 movq $0, %gs:CPU_KPTI_R14
380 385 1:
381 386 SWAPGS
382 387 SYSRETL
383 388 SET_SIZE(tr_sysretl)
384 389
385 390 ENTRY_NP(tr_sysexit)
386 391 /*
387 392 * Note: we want to preserve RFLAGS across this branch, since sysexit
388 393 * (unlike sysret above) does not restore RFLAGS for us.
389 394 *
390 395 * We still have the real kernel stack (sysexit does restore that), so
391 396 * we can use pushfq/popfq.
392 397 */
393 398 pushfq
394 399
395 400 cmpq $1, kpti_enable
396 401 jne 1f
397 402
398 403 /* Have to pop it back off now before we change %cr3! */
399 404 popfq
400 405 mov %r13, %gs:CPU_KPTI_R13
401 406 SET_USER_CR3(%r13)
402 407 mov %gs:CPU_KPTI_R13, %r13
403 408 /* Zero these to make sure they didn't leak from a kernel trap */
404 409 movq $0, %gs:CPU_KPTI_R13
405 410 movq $0, %gs:CPU_KPTI_R14
406 411 jmp 2f
407 412 1:
408 413 popfq
409 414 2:
410 415 swapgs
411 416 sti
412 417 sysexit
413 418 SET_SIZE(tr_sysexit)
414 419
415 420 .global tr_sysc_ret_end
416 421 tr_sysc_ret_end:
417 422
418 423 /*
419 424 * Syscall entry trampolines.
420 425 */
421 426
422 427 #if DEBUG
423 428 #define MK_SYSCALL_TRAMPOLINE(isr) \
424 429 ENTRY_NP(tr_/**/isr); \
425 430 swapgs; \
426 431 mov %r13, %gs:CPU_KPTI_R13; \
427 432 mov %cr3, %r13; \
428 433 mov %r13, %gs:CPU_KPTI_TR_CR3; \
429 434 mov %gs:CPU_KPTI_KCR3, %r13; \
430 435 mov %r13, %cr3; \
431 436 mov %gs:CPU_KPTI_R13, %r13; \
432 437 swapgs; \
433 438 jmp isr; \
434 439 SET_SIZE(tr_/**/isr)
435 440 #else
436 441 #define MK_SYSCALL_TRAMPOLINE(isr) \
437 442 ENTRY_NP(tr_/**/isr); \
438 443 swapgs; \
439 444 mov %r13, %gs:CPU_KPTI_R13; \
440 445 mov %gs:CPU_KPTI_KCR3, %r13; \
441 446 mov %r13, %cr3; \
442 447 mov %gs:CPU_KPTI_R13, %r13; \
443 448 swapgs; \
444 449 jmp isr; \
445 450 SET_SIZE(tr_/**/isr)
446 451 #endif
447 452
448 453 MK_SYSCALL_TRAMPOLINE(sys_syscall)
449 454 MK_SYSCALL_TRAMPOLINE(sys_syscall32)
450 455 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall)
451 456 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32)
452 457
453 458 /*
454 459 * SYSENTER is special. The CPU is really not very helpful when it
455 460 * comes to preserving and restoring state with it, and as a result
456 461 * we have to do all of it by hand. So, since we want to preserve
457 462 * RFLAGS, we have to be very careful in these trampolines to not
458 463 * clobber any bits in it. That means no cmpqs or branches!
459 464 */
460 465 ENTRY_NP(tr_sys_sysenter)
461 466 swapgs
462 467 mov %r13, %gs:CPU_KPTI_R13
463 468 #if DEBUG
464 469 mov %cr3, %r13
465 470 mov %r13, %gs:CPU_KPTI_TR_CR3
466 471 #endif
467 472 mov %gs:CPU_KPTI_KCR3, %r13
468 473 mov %r13, %cr3
469 474 mov %gs:CPU_KPTI_R13, %r13
470 475 jmp _sys_sysenter_post_swapgs
471 476 SET_SIZE(tr_sys_sysenter)
472 477
473 478 ENTRY_NP(tr_brand_sys_sysenter)
474 479 swapgs
475 480 mov %r13, %gs:CPU_KPTI_R13
476 481 #if DEBUG
477 482 mov %cr3, %r13
478 483 mov %r13, %gs:CPU_KPTI_TR_CR3
479 484 #endif
480 485 mov %gs:CPU_KPTI_KCR3, %r13
481 486 mov %r13, %cr3
482 487 mov %gs:CPU_KPTI_R13, %r13
483 488 jmp _brand_sys_sysenter_post_swapgs
484 489 SET_SIZE(tr_brand_sys_sysenter)
485 490
486 491 #define MK_SYSCALL_INT_TRAMPOLINE(isr) \
487 492 ENTRY_NP(tr_/**/isr); \
488 493 swapgs; \
489 494 mov %r13, %gs:CPU_KPTI_R13; \
490 495 SET_KERNEL_CR3(%r13); \
491 496 mov %gs:CPU_THREAD, %r13; \
492 497 mov T_STACK(%r13), %r13; \
493 498 addq $REGSIZE+MINFRAME, %r13; \
494 499 mov %r13, %rsp; \
495 500 pushq %gs:CPU_KPTI_SS; \
496 501 pushq %gs:CPU_KPTI_RSP; \
497 502 pushq %gs:CPU_KPTI_RFLAGS; \
498 503 pushq %gs:CPU_KPTI_CS; \
499 504 pushq %gs:CPU_KPTI_RIP; \
500 505 mov %gs:CPU_KPTI_R13, %r13; \
501 506 SWAPGS; \
502 507 jmp isr; \
503 508 SET_SIZE(tr_/**/isr)
504 509
505 510 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int)
506 511 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int)
507 512
508 513 /*
509 514 * Interrupt/trap return trampolines
510 515 */
511 516
512 517 .global tr_intr_ret_start
513 518 tr_intr_ret_start:
514 519
515 520 ENTRY_NP(tr_iret_auto)
516 521 cmpq $1, kpti_enable
517 522 jne tr_iret_kernel
518 523 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp)
519 524 je tr_iret_kernel
520 525 jmp tr_iret_user
521 526 SET_SIZE(tr_iret_auto)
522 527
523 528 ENTRY_NP(tr_iret_kernel)
524 529 /*
525 530 * Yes, this does nothing extra. But this way we know if we see iret
526 531 * elsewhere, then we've failed to properly consider trampolines there.
527 532 */
528 533 iretq
529 534 SET_SIZE(tr_iret_kernel)
530 535
531 536 ENTRY_NP(tr_iret_user)
532 537 #if DEBUG
533 538 /*
534 539 * Ensure that we return to user land with CR0.TS clear. We do this
535 540 * before we trampoline back and pivot the stack and %cr3. This way
536 541 * we're still on the kernel stack and kernel %cr3, though we are on the
537 542 * user GSBASE.
538 543 */
539 544 pushq %rax
540 545 mov %cr0, %rax
541 546 testq $CR0_TS, %rax
542 547 jz 1f
543 548 swapgs
544 549 popq %rax
545 550 leaq _bad_ts_panic_msg(%rip), %rdi
546 551 xorl %eax, %eax
547 552 pushq %rbp
548 553 movq %rsp, %rbp
549 554 call panic
550 555 1:
551 556 popq %rax
552 557 #endif
553 558
554 559 cmpq $1, kpti_enable
555 560 jne 1f
556 561
557 562 swapgs
558 563 mov %r13, %gs:CPU_KPTI_R13
559 564 PIVOT_KPTI_STK(%r13)
560 565 SET_USER_CR3(%r13)
561 566 mov %gs:CPU_KPTI_R13, %r13
562 567 /* Zero these to make sure they didn't leak from a kernel trap */
563 568 movq $0, %gs:CPU_KPTI_R13
564 569 movq $0, %gs:CPU_KPTI_R14
565 570 swapgs
566 571 1:
567 572 iretq
568 573 SET_SIZE(tr_iret_user)
569 574
570 575 /*
571 576 * This special return trampoline is for KDI's use only (with kmdb).
572 577 *
573 578 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR
574 579 * instead. This trampoline runs after GSBASE has already been changed
575 580 * back to the userland value (so we can't use %gs).
576 581 *
577 582 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13.
578 583 * The KPTI_R13 member in the kpti_dbg has already been set to what the
579 584 * real %r13 should be before we IRET.
580 585 *
581 586 * Additionally, KDI keeps a copy of the incoming %cr3 value when it
582 587 * took an interrupt, and has put that back in the kpti_dbg area for us
583 588 * to use, so we don't do any sniffing of %cs here. This is important
584 589 * so that debugging code that changes %cr3 is possible.
585 590 */
586 591 ENTRY_NP(tr_iret_kdi)
587 592 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */
588 593
589 594 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */
590 595 leaq KPTI_TOP(%r13), %rsp
591 596 pushq T_FRAMERET_SS(%r14)
592 597 pushq T_FRAMERET_RSP(%r14)
593 598 pushq T_FRAMERET_RFLAGS(%r14)
594 599 pushq T_FRAMERET_CS(%r14)
595 600 pushq T_FRAMERET_RIP(%r14)
596 601
597 602 movq KPTI_TR_CR3(%r13), %r14
598 603 movq %r14, %cr3
599 604
600 605 movq KPTI_R14(%r13), %r14
601 606 movq KPTI_R13(%r13), %r13 /* preserved by our caller */
602 607
603 608 iretq
604 609 SET_SIZE(tr_iret_kdi)
605 610
606 611 .global tr_intr_ret_end
607 612 tr_intr_ret_end:
608 613
609 614 /*
610 615 * Interrupt/trap entry trampolines
611 616 */
612 617
613 618 /* CPU pushed an error code, and ISR wants one */
614 619 #define MK_INTR_TRAMPOLINE(isr) \
615 620 ENTRY_NP(tr_/**/isr); \
616 621 INTERRUPT_TRAMPOLINE; \
617 622 jmp isr; \
618 623 SET_SIZE(tr_/**/isr)
619 624
620 625 /* CPU didn't push an error code, and ISR doesn't want one */
621 626 #define MK_INTR_TRAMPOLINE_NOERR(isr) \
622 627 ENTRY_NP(tr_/**/isr); \
623 628 push $0; \
624 629 INTERRUPT_TRAMPOLINE_NOERR; \
625 630 jmp isr; \
626 631 SET_SIZE(tr_/**/isr)
627 632
628 633 /* CPU pushed an error code, and ISR wants one */
629 634 #define MK_DBG_INTR_TRAMPOLINE(isr) \
630 635 ENTRY_NP(tr_/**/isr); \
631 636 DBG_INTERRUPT_TRAMPOLINE; \
632 637 jmp isr; \
633 638 SET_SIZE(tr_/**/isr)
634 639
635 640 /* CPU didn't push an error code, and ISR doesn't want one */
636 641 #define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \
637 642 ENTRY_NP(tr_/**/isr); \
638 643 push $0; \
639 644 DBG_INTERRUPT_TRAMPOLINE_NOERR; \
640 645 jmp isr; \
641 646 SET_SIZE(tr_/**/isr)
↓ open down ↓ |
378 lines elided |
↑ open up ↑ |
642 647
643 648
644 649 MK_INTR_TRAMPOLINE_NOERR(div0trap)
645 650 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap)
646 651 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap)
647 652 MK_INTR_TRAMPOLINE_NOERR(ovflotrap)
648 653 MK_INTR_TRAMPOLINE_NOERR(boundstrap)
649 654 MK_INTR_TRAMPOLINE_NOERR(invoptrap)
650 655 MK_INTR_TRAMPOLINE_NOERR(ndptrap)
651 656 MK_INTR_TRAMPOLINE(invtsstrap)
652 - MK_INTR_TRAMPOLINE(segnptrap)
657 + MK_DBG_INTR_TRAMPOLINE(segnptrap)
653 658 MK_DBG_INTR_TRAMPOLINE(stktrap)
654 659 MK_DBG_INTR_TRAMPOLINE(gptrap)
655 660 MK_DBG_INTR_TRAMPOLINE(pftrap)
656 661 MK_INTR_TRAMPOLINE_NOERR(resvtrap)
657 662 MK_INTR_TRAMPOLINE_NOERR(ndperr)
658 663 MK_INTR_TRAMPOLINE(achktrap)
659 664 MK_INTR_TRAMPOLINE_NOERR(xmtrap)
660 665 MK_INTR_TRAMPOLINE_NOERR(invaltrap)
661 666 MK_INTR_TRAMPOLINE_NOERR(fasttrap)
662 667 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret)
663 668
664 669 /*
665 670 * These are special because they can interrupt other traps, and
666 671 * each other. We don't need to pivot their stacks, because they have
667 672 * dedicated IST stack space, but we need to change %cr3.
668 673 */
669 674 ENTRY_NP(tr_nmiint)
670 675 pushq %r13
671 676 mov kpti_safe_cr3, %r13
672 677 mov %r13, %cr3
673 678 popq %r13
674 679 jmp nmiint
675 680 SET_SIZE(tr_nmiint)
676 681
677 682 #if !defined(__xpv)
678 683 ENTRY_NP(tr_syserrtrap)
679 684 /*
680 685 * If we got here we should always have a zero error code pushed.
681 686 * The INT $0x8 instr doesn't seem to push one, though, which we use
682 687 * as an emergency panic in the other trampolines. So adjust things
683 688 * here.
684 689 */
685 690 cmpq $0, (%rsp)
686 691 je 1f
687 692 pushq $0
688 693 1:
689 694 pushq %r13
690 695 mov kpti_safe_cr3, %r13
691 696 mov %r13, %cr3
692 697 popq %r13
693 698 jmp syserrtrap
694 699 SET_SIZE(tr_syserrtrap)
695 700 #endif
696 701
697 702 ENTRY_NP(tr_mcetrap)
698 703 pushq %r13
699 704 mov kpti_safe_cr3, %r13
700 705 mov %r13, %cr3
701 706 popq %r13
702 707 jmp mcetrap
703 708 SET_SIZE(tr_mcetrap)
704 709
705 710 /*
706 711 * Interrupts start at 32
707 712 */
708 713 #define MKIVCT(n) \
709 714 ENTRY_NP(tr_ivct/**/n) \
710 715 push $0; \
711 716 INTERRUPT_TRAMPOLINE; \
712 717 push $n - 0x20; \
713 718 jmp cmnint; \
714 719 SET_SIZE(tr_ivct/**/n)
715 720
716 721 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35);
717 722 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39);
718 723 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43);
719 724 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47);
720 725 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51);
721 726 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55);
722 727 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59);
723 728 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63);
724 729 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67);
725 730 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71);
726 731 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75);
727 732 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79);
728 733 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83);
729 734 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87);
730 735 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91);
731 736 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95);
732 737 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99);
733 738 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103);
734 739 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107);
735 740 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111);
736 741 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115);
737 742 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119);
738 743 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123);
739 744 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127);
740 745 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131);
741 746 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135);
742 747 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139);
743 748 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143);
744 749 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147);
745 750 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151);
746 751 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155);
747 752 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159);
748 753 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163);
749 754 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167);
750 755 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171);
751 756 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175);
752 757 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179);
753 758 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183);
754 759 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187);
755 760 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191);
756 761 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195);
757 762 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199);
758 763 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203);
759 764 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207);
760 765 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211);
761 766 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215);
762 767 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219);
763 768 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223);
764 769 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227);
765 770 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231);
766 771 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235);
767 772 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239);
768 773 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243);
769 774 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247);
770 775 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251);
771 776 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255);
772 777
773 778 /*
774 779 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a
775 780 * PCID other than the current one, then, is to load its cr3 then
776 781 * invlpg. But loading kf_user_cr3 means we can longer access our
777 782 * caller's text mapping (or indeed, its stack). So this little helper
778 783 * has to live within our trampoline text region.
779 784 *
780 785 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3)
781 786 */
782 787 ENTRY_NP(tr_mmu_flush_user_range)
783 788 push %rbx
784 789 /* When we read cr3, it never has the NOINVL bit set. */
785 790 mov %cr3, %rax
786 791 movq $CR3_NOINVL_BIT, %rbx
787 792 orq %rbx, %rax
788 793
789 794 mov %rcx, %cr3
790 795 add %rdi, %rsi
791 796 .align ASM_ENTRY_ALIGN
792 797 1:
793 798 invlpg (%rdi)
794 799 add %rdx, %rdi
795 800 cmp %rsi, %rdi
796 801 jb 1b
797 802 mov %rax, %cr3
798 803 pop %rbx
799 804 retq
800 805 SET_SIZE(tr_mmu_flush_user_range)
801 806
802 807 .align MMU_PAGESIZE
803 808 .global kpti_tramp_end
804 809 kpti_tramp_end:
805 810 nop
806 811
807 812 #endif /* __lint */
↓ open down ↓ |
145 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX