Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/os/desctbls.c
+++ new/usr/src/uts/intel/ia32/os/desctbls.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 - * Copyright 2011 Joyent, Inc. All rights reserved.
27 + * Copyright 2018 Joyent, Inc. All rights reserved.
28 28 */
29 29
30 30 /*
31 31 * Copyright (c) 1992 Terrence R. Lambert.
32 32 * Copyright (c) 1990 The Regents of the University of California.
33 33 * All rights reserved.
34 34 *
35 35 * This code is derived from software contributed to Berkeley by
36 36 * William Jolitz.
37 37 *
38 38 * Redistribution and use in source and binary forms, with or without
39 39 * modification, are permitted provided that the following conditions
40 40 * are met:
41 41 * 1. Redistributions of source code must retain the above copyright
42 42 * notice, this list of conditions and the following disclaimer.
43 43 * 2. Redistributions in binary form must reproduce the above copyright
44 44 * notice, this list of conditions and the following disclaimer in the
45 45 * documentation and/or other materials provided with the distribution.
46 46 * 3. All advertising materials mentioning features or use of this software
47 47 * must display the following acknowledgement:
48 48 * This product includes software developed by the University of
49 49 * California, Berkeley and its contributors.
50 50 * 4. Neither the name of the University nor the names of its contributors
51 51 * may be used to endorse or promote products derived from this software
52 52 * without specific prior written permission.
53 53 *
54 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 64 * SUCH DAMAGE.
65 65 *
66 66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 67 */
68 68
69 69 #include <sys/types.h>
70 70 #include <sys/sysmacros.h>
71 71 #include <sys/tss.h>
72 72 #include <sys/segments.h>
73 73 #include <sys/trap.h>
74 74 #include <sys/cpuvar.h>
75 75 #include <sys/bootconf.h>
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
76 76 #include <sys/x86_archext.h>
77 77 #include <sys/controlregs.h>
78 78 #include <sys/archsystm.h>
79 79 #include <sys/machsystm.h>
80 80 #include <sys/kobj.h>
81 81 #include <sys/cmn_err.h>
82 82 #include <sys/reboot.h>
83 83 #include <sys/kdi.h>
84 84 #include <sys/mach_mmu.h>
85 85 #include <sys/systm.h>
86 +#include <sys/note.h>
86 87
87 88 #ifdef __xpv
88 89 #include <sys/hypervisor.h>
89 90 #include <vm/as.h>
90 91 #endif
91 92
92 93 #include <sys/promif.h>
93 94 #include <sys/bootinfo.h>
94 95 #include <vm/kboot_mmu.h>
95 96 #include <vm/hat_pte.h>
96 97
97 98 /*
98 99 * cpu0 and default tables and structures.
99 100 */
100 101 user_desc_t *gdt0;
101 102 #if !defined(__xpv)
102 103 desctbr_t gdt0_default_r;
103 104 #endif
104 105
105 106 gate_desc_t *idt0; /* interrupt descriptor table */
106 107 #if defined(__i386)
107 108 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */
108 109 #endif
109 110
110 111 tss_t *ktss0; /* kernel task state structure */
111 112
112 113 #if defined(__i386)
113 114 tss_t *dftss0; /* #DF double-fault exception */
114 115 #endif /* __i386 */
115 116
116 117 user_desc_t zero_udesc; /* base zero user desc native procs */
117 118 user_desc_t null_udesc; /* null user descriptor */
118 119 system_desc_t null_sdesc; /* null system descriptor */
119 120
120 121 #if defined(__amd64)
↓ open down ↓ |
25 lines elided |
↑ open up ↑ |
121 122 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
122 123 #endif /* __amd64 */
123 124
124 125 #if defined(__amd64)
125 126 user_desc_t ucs_on;
126 127 user_desc_t ucs_off;
127 128 user_desc_t ucs32_on;
128 129 user_desc_t ucs32_off;
129 130 #endif /* __amd64 */
130 131
131 -#pragma align 16(dblfault_stack0)
132 -char dblfault_stack0[DEFAULTSTKSZ];
132 +/*
133 + * If the size of this is changed, you must update hat_pcp_setup() and the
134 + * definitions in exception.s
135 + */
136 +extern char dblfault_stack0[DEFAULTSTKSZ];
137 +extern char nmi_stack0[DEFAULTSTKSZ];
138 +extern char mce_stack0[DEFAULTSTKSZ];
133 139
134 140 extern void fast_null(void);
135 141 extern hrtime_t get_hrtime(void);
136 142 extern hrtime_t gethrvtime(void);
137 143 extern hrtime_t get_hrestime(void);
138 144 extern uint64_t getlgrp(void);
139 145
140 146 void (*(fasttable[]))(void) = {
141 147 fast_null, /* T_FNULL routine */
142 148 fast_null, /* T_FGETFP routine (initially null) */
143 149 fast_null, /* T_FSETFP routine (initially null) */
144 150 (void (*)())get_hrtime, /* T_GETHRTIME */
145 151 (void (*)())gethrvtime, /* T_GETHRVTIME */
146 152 (void (*)())get_hrestime, /* T_GETHRESTIME */
147 153 (void (*)())getlgrp /* T_GETLGRP */
148 154 };
149 155
150 156 /*
151 157 * Structure containing pre-computed descriptors to allow us to temporarily
152 158 * interpose on a standard handler.
153 159 */
154 160 struct interposing_handler {
155 161 int ih_inum;
156 162 gate_desc_t ih_interp_desc;
157 163 gate_desc_t ih_default_desc;
158 164 };
159 165
160 166 /*
161 167 * The brand infrastructure interposes on two handlers, and we use one as a
162 168 * NULL signpost.
163 169 */
164 170 static struct interposing_handler brand_tbl[2];
165 171
166 172 /*
167 173 * software prototypes for default local descriptor table
168 174 */
169 175
170 176 /*
171 177 * Routines for loading segment descriptors in format the hardware
172 178 * can understand.
173 179 */
174 180
175 181 #if defined(__amd64)
176 182
177 183 /*
178 184 * In long mode we have the new L or long mode attribute bit
179 185 * for code segments. Only the conforming bit in type is used along
180 186 * with descriptor priority and present bits. Default operand size must
181 187 * be zero when in long mode. In 32-bit compatibility mode all fields
182 188 * are treated as in legacy mode. For data segments while in long mode
183 189 * only the present bit is loaded.
184 190 */
185 191 void
186 192 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
187 193 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
188 194 {
189 195 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
190 196
191 197 /*
192 198 * 64-bit long mode.
193 199 */
194 200 if (lmode == SDP_LONG)
195 201 dp->usd_def32 = 0; /* 32-bit operands only */
196 202 else
197 203 /*
198 204 * 32-bit compatibility mode.
199 205 */
200 206 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
201 207
202 208 dp->usd_long = lmode; /* 64-bit mode */
203 209 dp->usd_type = type;
204 210 dp->usd_dpl = dpl;
205 211 dp->usd_p = 1;
206 212 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
207 213
208 214 dp->usd_lobase = (uintptr_t)base;
209 215 dp->usd_midbase = (uintptr_t)base >> 16;
210 216 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
211 217 dp->usd_lolimit = size;
212 218 dp->usd_hilimit = (uintptr_t)size >> 16;
213 219 }
214 220
215 221 #elif defined(__i386)
216 222
217 223 /*
218 224 * Install user segment descriptor for code and data.
219 225 */
220 226 void
221 227 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
222 228 uint_t dpl, uint_t gran, uint_t defopsz)
223 229 {
224 230 dp->usd_lolimit = size;
225 231 dp->usd_hilimit = (uintptr_t)size >> 16;
226 232
227 233 dp->usd_lobase = (uintptr_t)base;
228 234 dp->usd_midbase = (uintptr_t)base >> 16;
229 235 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
230 236
231 237 dp->usd_type = type;
232 238 dp->usd_dpl = dpl;
233 239 dp->usd_p = 1;
234 240 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */
235 241 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
236 242 }
237 243
238 244 #endif /* __i386 */
239 245
240 246 /*
241 247 * Install system segment descriptor for LDT and TSS segments.
242 248 */
243 249
244 250 #if defined(__amd64)
245 251
246 252 void
247 253 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
248 254 uint_t dpl)
249 255 {
250 256 dp->ssd_lolimit = size;
251 257 dp->ssd_hilimit = (uintptr_t)size >> 16;
252 258
253 259 dp->ssd_lobase = (uintptr_t)base;
254 260 dp->ssd_midbase = (uintptr_t)base >> 16;
255 261 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
256 262 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
257 263
258 264 dp->ssd_type = type;
259 265 dp->ssd_zero1 = 0; /* must be zero */
260 266 dp->ssd_zero2 = 0;
261 267 dp->ssd_dpl = dpl;
262 268 dp->ssd_p = 1;
263 269 dp->ssd_gran = 0; /* force byte units */
264 270 }
265 271
266 272 void *
267 273 get_ssd_base(system_desc_t *dp)
268 274 {
269 275 uintptr_t base;
270 276
271 277 base = (uintptr_t)dp->ssd_lobase |
272 278 (uintptr_t)dp->ssd_midbase << 16 |
273 279 (uintptr_t)dp->ssd_hibase << (16 + 8) |
274 280 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
275 281 return ((void *)base);
276 282 }
277 283
278 284 #elif defined(__i386)
279 285
280 286 void
281 287 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
282 288 uint_t dpl)
283 289 {
284 290 dp->ssd_lolimit = size;
285 291 dp->ssd_hilimit = (uintptr_t)size >> 16;
286 292
287 293 dp->ssd_lobase = (uintptr_t)base;
288 294 dp->ssd_midbase = (uintptr_t)base >> 16;
289 295 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
290 296
291 297 dp->ssd_type = type;
292 298 dp->ssd_zero = 0; /* must be zero */
293 299 dp->ssd_dpl = dpl;
294 300 dp->ssd_p = 1;
295 301 dp->ssd_gran = 0; /* force byte units */
296 302 }
297 303
298 304 void *
299 305 get_ssd_base(system_desc_t *dp)
300 306 {
301 307 uintptr_t base;
302 308
↓ open down ↓ |
160 lines elided |
↑ open up ↑ |
303 309 base = (uintptr_t)dp->ssd_lobase |
304 310 (uintptr_t)dp->ssd_midbase << 16 |
305 311 (uintptr_t)dp->ssd_hibase << (16 + 8);
306 312 return ((void *)base);
307 313 }
308 314
309 315 #endif /* __i386 */
310 316
311 317 /*
312 318 * Install gate segment descriptor for interrupt, trap, call and task gates.
319 + *
320 + * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
321 + * all interrupts. We have different ISTs for each class of exceptions that are
322 + * most likely to occur while handling an existing exception; while many of
323 + * these are just going to panic, it's nice not to trample on the existing
324 + * exception state for debugging purposes.
325 + *
326 + * Normal interrupts are all redirected unconditionally to the KPTI trampoline
327 + * stack space. This unifies the trampoline handling between user and kernel
328 + * space (and avoids the need to touch %gs).
329 + *
330 + * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
331 + * we do a read from KMDB that cause another #PF. Without its own IST, this
332 + * would stomp on the kernel's mcpu_kpti_flt frame.
313 333 */
314 -
315 -#if defined(__amd64)
316 -
317 -/*ARGSUSED*/
318 -void
319 -set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
320 - uint_t type, uint_t dpl, uint_t vector)
334 +uint_t
335 +idt_vector_to_ist(uint_t vector)
321 336 {
322 - dp->sgd_looffset = (uintptr_t)func;
323 - dp->sgd_hioffset = (uintptr_t)func >> 16;
324 - dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
337 +#if defined(__xpv)
338 + _NOTE(ARGUNUSED(vector));
339 + return (IST_NONE);
340 +#else
341 + switch (vector) {
342 + /* These should always use IST even without KPTI enabled. */
343 + case T_DBLFLT:
344 + return (IST_DF);
345 + case T_NMIFLT:
346 + return (IST_NMI);
347 + case T_MCE:
348 + return (IST_MCE);
325 349
326 - dp->sgd_selector = (uint16_t)sel;
327 -
328 - /*
329 - * For 64 bit native we use the IST stack mechanism
330 - * for double faults. All other traps use the CPL = 0
331 - * (tss_rsp0) stack.
332 - */
333 -#if !defined(__xpv)
334 - if (vector == T_DBLFLT)
335 - dp->sgd_ist = 1;
336 - else
350 + case T_BPTFLT:
351 + case T_SGLSTP:
352 + if (kpti_enable == 1) {
353 + return (IST_DBG);
354 + }
355 + return (IST_NONE);
356 + case T_STKFLT:
357 + case T_GPFLT:
358 + case T_PGFLT:
359 + if (kpti_enable == 1) {
360 + return (IST_NESTABLE);
361 + }
362 + return (IST_NONE);
363 + default:
364 + if (kpti_enable == 1) {
365 + return (IST_DEFAULT);
366 + }
367 + return (IST_NONE);
368 + }
337 369 #endif
338 - dp->sgd_ist = 0;
339 -
340 - dp->sgd_type = type;
341 - dp->sgd_dpl = dpl;
342 - dp->sgd_p = 1;
343 370 }
344 371
345 -#elif defined(__i386)
346 -
347 -/*ARGSUSED*/
348 372 void
349 373 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350 - uint_t type, uint_t dpl, uint_t unused)
374 + uint_t type, uint_t dpl, uint_t ist)
351 375 {
352 376 dp->sgd_looffset = (uintptr_t)func;
353 377 dp->sgd_hioffset = (uintptr_t)func >> 16;
354 -
378 + dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
355 379 dp->sgd_selector = (uint16_t)sel;
356 - dp->sgd_stkcpy = 0; /* always zero bytes */
380 + dp->sgd_ist = ist;
357 381 dp->sgd_type = type;
358 382 dp->sgd_dpl = dpl;
359 383 dp->sgd_p = 1;
360 384 }
361 385
362 -#endif /* __i386 */
363 -
364 386 /*
365 387 * Updates a single user descriptor in the the GDT of the current cpu.
366 388 * Caller is responsible for preventing cpu migration.
367 389 */
368 390
369 391 void
370 392 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 393 {
372 394 #if defined(__xpv)
373 395
374 396 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375 397
376 398 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 399 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378 400
379 401 #else /* __xpv */
380 402
381 403 CPU->cpu_gdt[sidx] = *udp;
382 404
383 405 #endif /* __xpv */
384 406 }
385 407
386 408 /*
387 409 * Writes single descriptor pointed to by udp into a processes
388 410 * LDT entry pointed to by ldp.
389 411 */
390 412 int
391 413 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392 414 {
393 415 #if defined(__xpv)
394 416
395 417 uint64_t dpa;
396 418
397 419 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398 420 ((uintptr_t)ldp & PAGEOFFSET);
399 421
400 422 /*
401 423 * The hypervisor is a little more restrictive about what it
402 424 * supports in the LDT.
403 425 */
404 426 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405 427 return (EINVAL);
406 428
407 429 #else /* __xpv */
408 430
409 431 *ldp = *udp;
410 432
411 433 #endif /* __xpv */
412 434 return (0);
413 435 }
414 436
415 437 #if defined(__xpv)
416 438
417 439 /*
418 440 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419 441 * Returns true if a valid entry was written.
420 442 */
421 443 int
422 444 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423 445 {
424 446 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
425 447
426 448 /*
427 449 * skip holes in the IDT
428 450 */
429 451 if (GATESEG_GETOFFSET(sgd) == 0)
430 452 return (0);
431 453
432 454 ASSERT(sgd->sgd_type == SDT_SYSIGT);
433 455 ti->vector = vec;
434 456 TI_SET_DPL(ti, sgd->sgd_dpl);
435 457
436 458 /*
437 459 * Is this an interrupt gate?
438 460 */
439 461 if (sgd->sgd_type == SDT_SYSIGT) {
440 462 /* LINTED */
441 463 TI_SET_IF(ti, 1);
442 464 }
443 465 ti->cs = sgd->sgd_selector;
444 466 #if defined(__amd64)
445 467 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
446 468 #endif
447 469 ti->address = GATESEG_GETOFFSET(sgd);
448 470 return (1);
449 471 }
450 472
451 473 /*
452 474 * Convert a single hw format gate descriptor and write it into our virtual IDT.
453 475 */
454 476 void
455 477 xen_idt_write(gate_desc_t *sgd, uint_t vec)
456 478 {
457 479 trap_info_t trapinfo[2];
458 480
459 481 bzero(trapinfo, sizeof (trapinfo));
460 482 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461 483 return;
462 484 if (xen_set_trap_table(trapinfo) != 0)
463 485 panic("xen_idt_write: xen_set_trap_table() failed");
464 486 }
465 487
466 488 #endif /* __xpv */
467 489
468 490 #if defined(__amd64)
469 491
470 492 /*
471 493 * Build kernel GDT.
472 494 */
473 495
474 496 static void
475 497 init_gdt_common(user_desc_t *gdt)
476 498 {
477 499 int i;
478 500
479 501 /*
480 502 * 64-bit kernel code segment.
481 503 */
482 504 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
483 505 SDP_PAGES, SDP_OP32);
484 506
485 507 /*
486 508 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
487 509 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
488 510 * instruction to return from system calls back to 32-bit applications.
489 511 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
490 512 * descriptors. We therefore must ensure that the kernel uses something,
491 513 * though it will be ignored by hardware, that is compatible with 32-bit
492 514 * apps. For the same reason we must set the default op size of this
493 515 * descriptor to 32-bit operands.
494 516 */
495 517 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
496 518 SEL_KPL, SDP_PAGES, SDP_OP32);
497 519 gdt[GDT_KDATA].usd_def32 = 1;
498 520
499 521 /*
500 522 * 64-bit user code segment.
501 523 */
502 524 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
503 525 SDP_PAGES, SDP_OP32);
504 526
505 527 /*
506 528 * 32-bit user code segment.
507 529 */
508 530 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
509 531 SEL_UPL, SDP_PAGES, SDP_OP32);
510 532
511 533 /*
512 534 * See gdt_ucode32() and gdt_ucode_native().
513 535 */
514 536 ucs_on = ucs_off = gdt[GDT_UCODE];
515 537 ucs_off.usd_p = 0; /* forces #np fault */
516 538
517 539 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518 540 ucs32_off.usd_p = 0; /* forces #np fault */
519 541
520 542 /*
521 543 * 32 and 64 bit data segments can actually share the same descriptor.
522 544 * In long mode only the present bit is checked but all other fields
523 545 * are loaded. But in compatibility mode all fields are interpreted
524 546 * as in legacy mode so they must be set correctly for a 32-bit data
525 547 * segment.
526 548 */
527 549 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
528 550 SDP_PAGES, SDP_OP32);
529 551
530 552 #if !defined(__xpv)
531 553
532 554 /*
533 555 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
534 556 * in the GDT is 0.
535 557 */
536 558
537 559 /*
538 560 * Kernel TSS
539 561 */
540 562 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
541 563 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
542 564
543 565 #endif /* !__xpv */
544 566
545 567 /*
546 568 * Initialize fs and gs descriptors for 32 bit processes.
547 569 * Only attributes and limits are initialized, the effective
548 570 * base address is programmed via fsbase/gsbase.
549 571 */
550 572 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
551 573 SEL_UPL, SDP_PAGES, SDP_OP32);
552 574 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
553 575 SEL_UPL, SDP_PAGES, SDP_OP32);
554 576
555 577 /*
556 578 * Initialize the descriptors set aside for brand usage.
557 579 * Only attributes and limits are initialized.
558 580 */
559 581 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
560 582 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
561 583 SEL_UPL, SDP_PAGES, SDP_OP32);
562 584
563 585 /*
564 586 * Initialize convenient zero base user descriptors for clearing
565 587 * lwp private %fs and %gs descriptors in GDT. See setregs() for
566 588 * an example.
567 589 */
568 590 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
569 591 SDP_BYTES, SDP_OP32);
570 592 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
571 593 SDP_PAGES, SDP_OP32);
572 594 }
573 595
574 596 #if defined(__xpv)
575 597
576 598 static user_desc_t *
577 599 init_gdt(void)
578 600 {
579 601 uint64_t gdtpa;
580 602 ulong_t ma[1]; /* XXPV should be a memory_t */
581 603 ulong_t addr;
582 604
583 605 #if !defined(__lint)
584 606 /*
585 607 * Our gdt is never larger than a single page.
586 608 */
587 609 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588 610 #endif
589 611 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590 612 PAGESIZE, PAGESIZE);
591 613 bzero(gdt0, PAGESIZE);
592 614
593 615 init_gdt_common(gdt0);
594 616
595 617 /*
596 618 * XXX Since we never invoke kmdb until after the kernel takes
597 619 * over the descriptor tables why not have it use the kernel's
598 620 * selectors?
599 621 */
600 622 if (boothowto & RB_DEBUG) {
601 623 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
602 624 SEL_KPL, SDP_PAGES, SDP_OP32);
603 625 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
604 626 SEL_KPL, SDP_PAGES, SDP_OP32);
605 627 }
606 628
607 629 /*
608 630 * Clear write permission for page containing the gdt and install it.
609 631 */
610 632 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
611 633 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
612 634 kbm_read_only((uintptr_t)gdt0, gdtpa);
613 635 xen_set_gdt(ma, NGDT);
614 636
615 637 /*
616 638 * Reload the segment registers to use the new GDT.
617 639 * On 64-bit, fixup KCS_SEL to be in ring 3.
618 640 * See KCS_SEL in segments.h.
619 641 */
620 642 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
621 643
622 644 /*
623 645 * setup %gs for kernel
624 646 */
625 647 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
626 648
627 649 /*
628 650 * XX64 We should never dereference off "other gsbase" or
629 651 * "fsbase". So, we should arrange to point FSBASE and
630 652 * KGSBASE somewhere truly awful e.g. point it at the last
631 653 * valid address below the hole so that any attempts to index
632 654 * off them cause an exception.
633 655 *
634 656 * For now, point it at 8G -- at least it should be unmapped
635 657 * until some 64-bit processes run.
636 658 */
637 659 addr = 0x200000000ul;
638 660 xen_set_segment_base(SEGBASE_FS, addr);
639 661 xen_set_segment_base(SEGBASE_GS_USER, addr);
640 662 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
641 663
642 664 return (gdt0);
643 665 }
644 666
645 667 #else /* __xpv */
646 668
647 669 static user_desc_t *
648 670 init_gdt(void)
649 671 {
650 672 desctbr_t r_bgdt, r_gdt;
651 673 user_desc_t *bgdt;
652 674
653 675 #if !defined(__lint)
654 676 /*
655 677 * Our gdt is never larger than a single page.
656 678 */
657 679 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
658 680 #endif
659 681 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
660 682 PAGESIZE, PAGESIZE);
661 683 bzero(gdt0, PAGESIZE);
662 684
663 685 init_gdt_common(gdt0);
664 686
665 687 /*
666 688 * Copy in from boot's gdt to our gdt.
667 689 * Entry 0 is the null descriptor by definition.
668 690 */
669 691 rd_gdtr(&r_bgdt);
670 692 bgdt = (user_desc_t *)r_bgdt.dtr_base;
671 693 if (bgdt == NULL)
672 694 panic("null boot gdt");
673 695
674 696 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
675 697 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
676 698 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
677 699 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
678 700 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
679 701
680 702 /*
681 703 * Install our new GDT
682 704 */
683 705 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
684 706 r_gdt.dtr_base = (uintptr_t)gdt0;
685 707 wr_gdtr(&r_gdt);
686 708
687 709 /*
688 710 * Reload the segment registers to use the new GDT
689 711 */
690 712 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
691 713
692 714 /*
693 715 * setup %gs for kernel
694 716 */
695 717 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
696 718
697 719 /*
698 720 * XX64 We should never dereference off "other gsbase" or
699 721 * "fsbase". So, we should arrange to point FSBASE and
700 722 * KGSBASE somewhere truly awful e.g. point it at the last
701 723 * valid address below the hole so that any attempts to index
702 724 * off them cause an exception.
703 725 *
704 726 * For now, point it at 8G -- at least it should be unmapped
705 727 * until some 64-bit processes run.
706 728 */
707 729 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
708 730 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
709 731 return (gdt0);
710 732 }
711 733
712 734 #endif /* __xpv */
713 735
714 736 #elif defined(__i386)
715 737
716 738 static void
717 739 init_gdt_common(user_desc_t *gdt)
718 740 {
719 741 int i;
720 742
721 743 /*
722 744 * Text and data for both kernel and user span entire 32 bit
723 745 * address space.
724 746 */
725 747
726 748 /*
727 749 * kernel code segment.
728 750 */
729 751 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
730 752 SDP_OP32);
731 753
732 754 /*
733 755 * kernel data segment.
734 756 */
735 757 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
736 758 SDP_OP32);
737 759
738 760 /*
739 761 * user code segment.
740 762 */
741 763 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
742 764 SDP_OP32);
743 765
744 766 /*
745 767 * user data segment.
746 768 */
747 769 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
748 770 SDP_OP32);
749 771
750 772 #if !defined(__xpv)
751 773
752 774 /*
753 775 * TSS for T_DBLFLT (double fault) handler
754 776 */
755 777 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
756 778 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
757 779
758 780 /*
759 781 * TSS for kernel
760 782 */
761 783 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
762 784 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
763 785
764 786 #endif /* !__xpv */
765 787
766 788 /*
767 789 * %gs selector for kernel
768 790 */
769 791 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
770 792 SEL_KPL, SDP_BYTES, SDP_OP32);
771 793
772 794 /*
773 795 * Initialize lwp private descriptors.
774 796 * Only attributes and limits are initialized, the effective
775 797 * base address is programmed via fsbase/gsbase.
776 798 */
777 799 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
778 800 SDP_PAGES, SDP_OP32);
779 801 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
780 802 SDP_PAGES, SDP_OP32);
781 803
782 804 /*
783 805 * Initialize the descriptors set aside for brand usage.
784 806 * Only attributes and limits are initialized.
785 807 */
786 808 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
787 809 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
788 810 SDP_PAGES, SDP_OP32);
789 811 /*
790 812 * Initialize convenient zero base user descriptor for clearing
791 813 * lwp private %fs and %gs descriptors in GDT. See setregs() for
792 814 * an example.
793 815 */
794 816 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
795 817 SDP_BYTES, SDP_OP32);
796 818 }
797 819
798 820 #if defined(__xpv)
799 821
800 822 static user_desc_t *
801 823 init_gdt(void)
802 824 {
803 825 uint64_t gdtpa;
804 826 ulong_t ma[1]; /* XXPV should be a memory_t */
805 827
806 828 #if !defined(__lint)
807 829 /*
808 830 * Our gdt is never larger than a single page.
809 831 */
810 832 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
811 833 #endif
812 834 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
813 835 PAGESIZE, PAGESIZE);
814 836 bzero(gdt0, PAGESIZE);
815 837
816 838 init_gdt_common(gdt0);
817 839 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
818 840
819 841 /*
820 842 * XXX Since we never invoke kmdb until after the kernel takes
821 843 * over the descriptor tables why not have it use the kernel's
822 844 * selectors?
823 845 */
824 846 if (boothowto & RB_DEBUG) {
825 847 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
826 848 SDP_PAGES, SDP_OP32);
827 849 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
828 850 SDP_PAGES, SDP_OP32);
829 851 }
830 852
831 853 /*
832 854 * Clear write permission for page containing the gdt and install it.
833 855 */
834 856 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
835 857 kbm_read_only((uintptr_t)gdt0, gdtpa);
836 858 xen_set_gdt(ma, NGDT);
837 859
838 860 /*
839 861 * Reload the segment registers to use the new GDT
840 862 */
841 863 load_segment_registers(
842 864 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
843 865
844 866 return (gdt0);
845 867 }
846 868
847 869 #else /* __xpv */
848 870
849 871 static user_desc_t *
850 872 init_gdt(void)
851 873 {
852 874 desctbr_t r_bgdt, r_gdt;
853 875 user_desc_t *bgdt;
854 876
855 877 #if !defined(__lint)
856 878 /*
857 879 * Our gdt is never larger than a single page.
858 880 */
859 881 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
860 882 #endif
861 883 /*
862 884 * XXX this allocation belongs in our caller, not here.
863 885 */
864 886 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
865 887 PAGESIZE, PAGESIZE);
866 888 bzero(gdt0, PAGESIZE);
867 889
868 890 init_gdt_common(gdt0);
869 891
870 892 /*
871 893 * Copy in from boot's gdt to our gdt entries.
872 894 * Entry 0 is null descriptor by definition.
873 895 */
874 896 rd_gdtr(&r_bgdt);
875 897 bgdt = (user_desc_t *)r_bgdt.dtr_base;
876 898 if (bgdt == NULL)
877 899 panic("null boot gdt");
878 900
879 901 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
880 902 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
881 903 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
882 904 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
883 905
884 906 /*
885 907 * Install our new GDT
886 908 */
887 909 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
888 910 r_gdt.dtr_base = (uintptr_t)gdt0;
889 911 wr_gdtr(&r_gdt);
890 912
891 913 /*
892 914 * Reload the segment registers to use the new GDT
893 915 */
894 916 load_segment_registers(
895 917 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
896 918
897 919 return (gdt0);
898 920 }
899 921
900 922 #endif /* __xpv */
901 923 #endif /* __i386 */
902 924
903 925 /*
904 926 * Build kernel IDT.
905 927 *
906 928 * Note that for amd64 we pretty much require every gate to be an interrupt
907 929 * gate which blocks interrupts atomically on entry; that's because of our
908 930 * dependency on using 'swapgs' every time we come into the kernel to find
909 931 * the cpu structure. If we get interrupted just before doing that, %cs could
↓ open down ↓ |
536 lines elided |
↑ open up ↑ |
910 932 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
911 933 * %gsbase is really still pointing at something in userland. Bad things will
912 934 * ensue. We also use interrupt gates for i386 as well even though this is not
913 935 * required for some traps.
914 936 *
915 937 * Perhaps they should have invented a trap gate that does an atomic swapgs?
916 938 */
917 939 static void
918 940 init_idt_common(gate_desc_t *idt)
919 941 {
920 - set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 - 0);
922 - set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
923 - 0);
924 - set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
925 - 0);
926 - set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
927 - 0);
928 - set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
929 - 0);
930 - set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
931 - TRP_KPL, 0);
932 - set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
933 - 0);
934 - set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
935 - 0);
942 + set_gatesegd(&idt[T_ZERODIV],
943 + (kpti_enable == 1) ? &tr_div0trap : &div0trap,
944 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
945 + set_gatesegd(&idt[T_SGLSTP],
946 + (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
947 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
948 + set_gatesegd(&idt[T_NMIFLT],
949 + (kpti_enable == 1) ? &tr_nmiint : &nmiint,
950 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
951 + set_gatesegd(&idt[T_BPTFLT],
952 + (kpti_enable == 1) ? &tr_brktrap : &brktrap,
953 + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
954 + set_gatesegd(&idt[T_OVFLW],
955 + (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
956 + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
957 + set_gatesegd(&idt[T_BOUNDFLT],
958 + (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
959 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
960 + set_gatesegd(&idt[T_ILLINST],
961 + (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
962 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
963 + set_gatesegd(&idt[T_NOEXTFLT],
964 + (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
965 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
936 966
937 967 /*
938 968 * double fault handler.
939 969 *
940 970 * Note that on the hypervisor a guest does not receive #df faults.
941 971 * Instead a failsafe event is injected into the guest if its selectors
942 972 * and/or stack is in a broken state. See xen_failsafe_callback.
943 973 */
944 974 #if !defined(__xpv)
945 -#if defined(__amd64)
946 -
947 - set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
948 - T_DBLFLT);
949 -
950 -#elif defined(__i386)
951 -
952 - /*
953 - * task gate required.
954 - */
955 - set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
956 - 0);
957 -
958 -#endif /* __i386 */
975 + set_gatesegd(&idt[T_DBLFLT],
976 + (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
977 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
959 978 #endif /* !__xpv */
960 979
961 980 /*
962 981 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
963 982 */
983 + set_gatesegd(&idt[T_TSSFLT],
984 + (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
985 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
986 + set_gatesegd(&idt[T_SEGFLT],
987 + (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
988 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
989 + set_gatesegd(&idt[T_STKFLT],
990 + (kpti_enable == 1) ? &tr_stktrap : &stktrap,
991 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
992 + set_gatesegd(&idt[T_GPFLT],
993 + (kpti_enable == 1) ? &tr_gptrap : &gptrap,
994 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
995 + set_gatesegd(&idt[T_PGFLT],
996 + (kpti_enable == 1) ? &tr_pftrap : &pftrap,
997 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
998 + set_gatesegd(&idt[T_EXTERRFLT],
999 + (kpti_enable == 1) ? &tr_ndperr : &ndperr,
1000 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
1001 + set_gatesegd(&idt[T_ALIGNMENT],
1002 + (kpti_enable == 1) ? &tr_achktrap : &achktrap,
1003 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
1004 + set_gatesegd(&idt[T_MCE],
1005 + (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
1006 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
1007 + set_gatesegd(&idt[T_SIMDFPE],
1008 + (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
1009 + KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
964 1010
965 - set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
966 - 0);
967 - set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
968 - 0);
969 - set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
970 - set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
971 - set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
972 - set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
973 - 0);
974 - set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
975 - TRP_KPL, 0);
976 - set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
977 - set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
978 -
979 1011 /*
980 1012 * install fast trap handler at 210.
981 1013 */
982 - set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 - 0);
1014 + set_gatesegd(&idt[T_FASTTRAP],
1015 + (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
1016 + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
984 1017
985 1018 /*
986 1019 * System call handler.
987 1020 */
988 -#if defined(__amd64)
989 - set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990 - TRP_UPL, 0);
1021 + set_gatesegd(&idt[T_SYSCALLINT],
1022 + (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
1023 + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
991 1024
992 -#elif defined(__i386)
993 - set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
994 - TRP_UPL, 0);
995 -#endif /* __i386 */
996 -
997 1025 /*
998 1026 * Install the DTrace interrupt handler for the pid provider.
999 1027 */
1000 - set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 - SDT_SYSIGT, TRP_UPL, 0);
1028 + set_gatesegd(&idt[T_DTRACE_RET],
1029 + (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
1030 + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
1002 1031
1003 1032 /*
1004 1033 * Prepare interposing descriptor for the syscall handler
1005 1034 * and cache copy of the default descriptor.
1006 1035 */
1007 1036 brand_tbl[0].ih_inum = T_SYSCALLINT;
1008 1037 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1009 1038
1010 -#if defined(__amd64)
1011 - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1012 - KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 -#elif defined(__i386)
1014 - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1015 - KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 -#endif /* __i386 */
1039 + set_gatesegd(&(brand_tbl[0].ih_interp_desc),
1040 + (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
1041 + &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
1042 + idt_vector_to_ist(T_SYSCALLINT));
1017 1043
1018 1044 brand_tbl[1].ih_inum = 0;
1019 1045 }
1020 1046
1021 1047 #if defined(__xpv)
1022 1048
1023 1049 static void
1024 1050 init_idt(gate_desc_t *idt)
1025 1051 {
1026 1052 init_idt_common(idt);
1027 1053 }
1028 1054
1029 1055 #else /* __xpv */
1030 1056
1031 1057 static void
1032 1058 init_idt(gate_desc_t *idt)
1033 1059 {
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
1034 1060 char ivctname[80];
1035 1061 void (*ivctptr)(void);
1036 1062 int i;
1037 1063
1038 1064 /*
1039 1065 * Initialize entire table with 'reserved' trap and then overwrite
1040 1066 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041 1067 * since it can only be generated on a 386 processor. 15 is also
1042 1068 * unsupported and reserved.
1043 1069 */
1044 - for (i = 0; i < NIDT; i++)
1070 +#if !defined(__xpv)
1071 + for (i = 0; i < NIDT; i++) {
1072 + set_gatesegd(&idt[i],
1073 + (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
1074 + KCS_SEL, SDT_SYSIGT, TRP_KPL,
1075 + idt_vector_to_ist(T_RESVTRAP));
1076 + }
1077 +#else
1078 + for (i = 0; i < NIDT; i++) {
1045 1079 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046 - 0);
1080 + IST_NONE);
1081 + }
1082 +#endif
1047 1083
1048 1084 /*
1049 1085 * 20-31 reserved
1050 1086 */
1051 - for (i = 20; i < 32; i++)
1087 +#if !defined(__xpv)
1088 + for (i = 20; i < 32; i++) {
1089 + set_gatesegd(&idt[i],
1090 + (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
1091 + KCS_SEL, SDT_SYSIGT, TRP_KPL,
1092 + idt_vector_to_ist(T_INVALTRAP));
1093 + }
1094 +#else
1095 + for (i = 20; i < 32; i++) {
1052 1096 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053 - 0);
1097 + IST_NONE);
1098 + }
1099 +#endif
1054 1100
1055 1101 /*
1056 1102 * interrupts 32 - 255
1057 1103 */
1058 1104 for (i = 32; i < 256; i++) {
1105 +#if !defined(__xpv)
1106 + (void) snprintf(ivctname, sizeof (ivctname),
1107 + (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
1108 +#else
1059 1109 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1110 +#endif
1060 1111 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061 1112 if (ivctptr == NULL)
1062 1113 panic("kobj_getsymvalue(%s) failed", ivctname);
1063 1114
1064 - set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1115 + set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1116 + idt_vector_to_ist(i));
1065 1117 }
1066 1118
1067 1119 /*
1068 1120 * Now install the common ones. Note that it will overlay some
1069 1121 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070 1122 */
1071 1123 init_idt_common(idt);
1072 1124 }
1073 1125
1074 1126 #endif /* __xpv */
1075 1127
1076 1128 /*
1077 1129 * The kernel does not deal with LDTs unless a user explicitly creates
1078 1130 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079 1131 * to reference the LDT will therefore cause a #gp. System calls made via the
1080 1132 * obsolete lcall mechanism are emulated by the #gp fault handler.
1081 1133 */
1082 1134 static void
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
1083 1135 init_ldt(void)
1084 1136 {
1085 1137 #if defined(__xpv)
1086 1138 xen_set_ldt(NULL, 0);
1087 1139 #else
1088 1140 wr_ldtr(0);
1089 1141 #endif
1090 1142 }
1091 1143
1092 1144 #if !defined(__xpv)
1093 -#if defined(__amd64)
1094 1145
1095 1146 static void
1096 1147 init_tss(void)
1097 1148 {
1098 - /*
1099 - * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100 - * All exceptions but #DF will run on the thread stack.
1101 - * Set up the double fault stack here.
1102 - */
1103 - ktss0->tss_ist1 =
1104 - (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1149 + extern struct cpu cpus[];
1105 1150
1106 1151 /*
1107 - * Set I/O bit map offset equal to size of TSS segment limit
1108 - * for no I/O permission map. This will force all user I/O
1109 - * instructions to generate #gp fault.
1152 + * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
1153 + * context switch but it'll be overwritten with this same value anyway.
1110 1154 */
1111 - ktss0->tss_bitmapbase = sizeof (*ktss0);
1155 + if (kpti_enable == 1) {
1156 + ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
1157 + }
1112 1158
1113 - /*
1114 - * Point %tr to descriptor for ktss0 in gdt.
1115 - */
1116 - wr_tsr(KTSS_SEL);
1117 -}
1159 + /* Set up the IST stacks for double fault, NMI, MCE. */
1160 + ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1161 + ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
1162 + ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
1118 1163
1119 -#elif defined(__i386)
1120 -
1121 -static void
1122 -init_tss(void)
1123 -{
1124 1164 /*
1125 - * ktss0->tss_esp dynamically filled in by resume() on each
1126 - * context switch.
1165 + * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
1166 + * enabled), and also for KDI (always).
1127 1167 */
1128 - ktss0->tss_ss0 = KDS_SEL;
1129 - ktss0->tss_eip = (uint32_t)_start;
1130 - ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131 - ktss0->tss_cs = KCS_SEL;
1132 - ktss0->tss_fs = KFS_SEL;
1133 - ktss0->tss_gs = KGS_SEL;
1134 - ktss0->tss_ldt = ULDT_SEL;
1168 + ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
1135 1169
1136 - /*
1137 - * Initialize double fault tss.
1138 - */
1139 - dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140 - dftss0->tss_ss0 = KDS_SEL;
1170 + if (kpti_enable == 1) {
1171 + /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
1172 + ktss0->tss_ist5 =
1173 + (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
1141 1174
1142 - /*
1143 - * tss_cr3 will get initialized in hat_kern_setup() once our page
1144 - * tables have been setup.
1145 - */
1146 - dftss0->tss_eip = (uint32_t)syserrtrap;
1147 - dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148 - dftss0->tss_cs = KCS_SEL;
1149 - dftss0->tss_ds = KDS_SEL;
1150 - dftss0->tss_es = KDS_SEL;
1151 - dftss0->tss_ss = KDS_SEL;
1152 - dftss0->tss_fs = KFS_SEL;
1153 - dftss0->tss_gs = KGS_SEL;
1175 + /* This IST stack is used for all other intrs (for KPTI). */
1176 + ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
1177 + }
1154 1178
1155 1179 /*
1156 1180 * Set I/O bit map offset equal to size of TSS segment limit
1157 1181 * for no I/O permission map. This will force all user I/O
1158 1182 * instructions to generate #gp fault.
1159 1183 */
1160 1184 ktss0->tss_bitmapbase = sizeof (*ktss0);
1161 1185
1162 1186 /*
1163 1187 * Point %tr to descriptor for ktss0 in gdt.
1164 1188 */
1165 1189 wr_tsr(KTSS_SEL);
1166 1190 }
1167 1191
1168 -#endif /* __i386 */
1169 1192 #endif /* !__xpv */
1170 1193
1171 1194 #if defined(__xpv)
1172 1195
1173 1196 void
1174 1197 init_desctbls(void)
1175 1198 {
1176 1199 uint_t vec;
1177 1200 user_desc_t *gdt;
1178 1201
1179 1202 /*
1180 1203 * Setup and install our GDT.
1181 1204 */
1182 1205 gdt = init_gdt();
1183 1206
1184 1207 /*
1185 1208 * Store static pa of gdt to speed up pa_to_ma() translations
1186 1209 * on lwp context switches.
1187 1210 */
1188 1211 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1189 1212 CPU->cpu_gdt = gdt;
1190 1213 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1191 1214
1192 1215 /*
1193 1216 * Setup and install our IDT.
1194 1217 */
1195 1218 #if !defined(__lint)
1196 1219 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1197 1220 #endif
1198 1221 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1199 1222 PAGESIZE, PAGESIZE);
1200 1223 bzero(idt0, PAGESIZE);
1201 1224 init_idt(idt0);
1202 1225 for (vec = 0; vec < NIDT; vec++)
1203 1226 xen_idt_write(&idt0[vec], vec);
1204 1227
1205 1228 CPU->cpu_idt = idt0;
1206 1229
1207 1230 /*
1208 1231 * set default kernel stack
1209 1232 */
1210 1233 xen_stack_switch(KDS_SEL,
1211 1234 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1212 1235
1213 1236 xen_init_callbacks();
1214 1237
1215 1238 init_ldt();
1216 1239 }
1217 1240
1218 1241 #else /* __xpv */
1219 1242
1220 1243 void
1221 1244 init_desctbls(void)
1222 1245 {
1223 1246 user_desc_t *gdt;
1224 1247 desctbr_t idtr;
1225 1248
1226 1249 /*
1227 1250 * Allocate IDT and TSS structures on unique pages for better
1228 1251 * performance in virtual machines.
1229 1252 */
1230 1253 #if !defined(__lint)
1231 1254 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1232 1255 #endif
1233 1256 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1234 1257 PAGESIZE, PAGESIZE);
1235 1258 bzero(idt0, PAGESIZE);
1236 1259 #if !defined(__lint)
1237 1260 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1238 1261 #endif
1239 1262 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1240 1263 PAGESIZE, PAGESIZE);
1241 1264 bzero(ktss0, PAGESIZE);
1242 1265
1243 1266 #if defined(__i386)
1244 1267 #if !defined(__lint)
1245 1268 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 1269 #endif
1247 1270 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248 1271 PAGESIZE, PAGESIZE);
1249 1272 bzero(dftss0, PAGESIZE);
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
1250 1273 #endif
1251 1274
1252 1275 /*
1253 1276 * Setup and install our GDT.
1254 1277 */
1255 1278 gdt = init_gdt();
1256 1279 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257 1280 CPU->cpu_gdt = gdt;
1258 1281
1259 1282 /*
1283 + * Initialize this CPU's LDT.
1284 + */
1285 + CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
1286 + LDT_CPU_SIZE, PAGESIZE);
1287 + bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
1288 + CPU->cpu_m.mcpu_ldt_len = 0;
1289 +
1290 + /*
1260 1291 * Setup and install our IDT.
1261 1292 */
1262 1293 init_idt(idt0);
1263 1294
1264 1295 idtr.dtr_base = (uintptr_t)idt0;
1265 1296 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266 1297 wr_idtr(&idtr);
1267 1298 CPU->cpu_idt = idt0;
1268 1299
1269 1300 #if defined(__i386)
1270 1301 /*
1271 1302 * We maintain a description of idt0 in convenient IDTR format
1272 1303 * for #pf's on some older pentium processors. See pentium_pftrap().
1273 1304 */
1274 1305 idt0_default_r = idtr;
1275 1306 #endif /* __i386 */
1276 1307
1277 1308 init_tss();
1278 1309 CPU->cpu_tss = ktss0;
1279 1310 init_ldt();
1311 +
1312 + /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
1313 + kpti_safe_cr3 = (uint64_t)getcr3();
1280 1314 }
1281 1315
1282 1316 #endif /* __xpv */
1283 1317
1284 1318 /*
1285 1319 * In the early kernel, we need to set up a simple GDT to run on.
1286 1320 *
1287 1321 * XXPV Can dboot use this too? See dboot_gdt.s
1288 1322 */
1289 1323 void
1290 1324 init_boot_gdt(user_desc_t *bgdt)
1291 1325 {
1292 1326 #if defined(__amd64)
1293 1327 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294 1328 SDP_PAGES, SDP_OP32);
1295 1329 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296 1330 SDP_PAGES, SDP_OP32);
1297 1331 #elif defined(__i386)
1298 1332 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299 1333 SDP_PAGES, SDP_OP32);
1300 1334 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1301 1335 SDP_PAGES, SDP_OP32);
1302 1336 #endif /* __i386 */
1303 1337 }
1304 1338
1305 1339 /*
1306 1340 * Enable interpositioning on the system call path by rewriting the
1307 1341 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1308 1342 * the branded entry points.
1309 1343 */
1310 1344 void
1311 1345 brand_interpositioning_enable(void)
1312 1346 {
1313 1347 gate_desc_t *idt = CPU->cpu_idt;
1314 1348 int i;
1315 1349
1316 1350 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1317 1351
1318 1352 for (i = 0; brand_tbl[i].ih_inum; i++) {
1319 1353 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1320 1354 #if defined(__xpv)
1321 1355 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322 1356 brand_tbl[i].ih_inum);
1323 1357 #endif
1324 1358 }
1325 1359
1326 1360 #if defined(__amd64)
1327 1361 #if defined(__xpv)
1328 1362
1329 1363 /*
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
1330 1364 * Currently the hypervisor only supports 64-bit syscalls via
1331 1365 * syscall instruction. The 32-bit syscalls are handled by
1332 1366 * interrupt gate above.
1333 1367 */
1334 1368 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335 1369 CALLBACKF_mask_events);
1336 1370
1337 1371 #else
1338 1372
1339 1373 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1340 - wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341 - wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1374 + if (kpti_enable == 1) {
1375 + wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
1376 + wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
1377 + } else {
1378 + wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1379 + wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1380 + }
1342 1381 }
1343 1382
1344 1383 #endif
1345 1384 #endif /* __amd64 */
1346 1385
1347 - if (is_x86_feature(x86_featureset, X86FSET_SEP))
1348 - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1386 + if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1387 + if (kpti_enable == 1) {
1388 + wrmsr(MSR_INTC_SEP_EIP,
1389 + (uintptr_t)tr_brand_sys_sysenter);
1390 + } else {
1391 + wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1392 + }
1393 + }
1349 1394 }
1350 1395
1351 1396 /*
1352 1397 * Disable interpositioning on the system call path by rewriting the
1353 1398 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354 1399 * the standard entry points, which bypass the interpositioning hooks.
1355 1400 */
1356 1401 void
1357 1402 brand_interpositioning_disable(void)
1358 1403 {
1359 1404 gate_desc_t *idt = CPU->cpu_idt;
1360 1405 int i;
1361 1406
1362 1407 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363 1408
1364 1409 for (i = 0; brand_tbl[i].ih_inum; i++) {
1365 1410 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 1411 #if defined(__xpv)
1367 1412 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368 1413 brand_tbl[i].ih_inum);
1369 1414 #endif
1370 1415 }
1371 1416
1372 1417 #if defined(__amd64)
1373 1418 #if defined(__xpv)
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
1374 1419
1375 1420 /*
1376 1421 * See comment above in brand_interpositioning_enable.
1377 1422 */
1378 1423 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379 1424 CALLBACKF_mask_events);
1380 1425
1381 1426 #else
1382 1427
1383 1428 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1384 - wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385 - wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1429 + if (kpti_enable == 1) {
1430 + wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
1431 + wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
1432 + } else {
1433 + wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1434 + wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1435 + }
1386 1436 }
1387 1437
1388 1438 #endif
1389 1439 #endif /* __amd64 */
1390 1440
1391 - if (is_x86_feature(x86_featureset, X86FSET_SEP))
1392 - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1441 + if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1442 + if (kpti_enable == 1) {
1443 + wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
1444 + } else {
1445 + wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1446 + }
1447 + }
1393 1448 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX