8956 Implement KPTI Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com>
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2011 Joyent, Inc. All rights reserved. 28 */ 29 30 /* 31 * Copyright (c) 1992 Terrence R. Lambert. 32 * Copyright (c) 1990 The Regents of the University of California. 33 * All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * William Jolitz. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 67 */ 68 69 #include <sys/types.h> 70 #include <sys/sysmacros.h> 71 #include <sys/tss.h> 72 #include <sys/segments.h> 73 #include <sys/trap.h> 74 #include <sys/cpuvar.h> 75 #include <sys/bootconf.h> 76 #include <sys/x86_archext.h> 77 #include <sys/controlregs.h> 78 #include <sys/archsystm.h> 79 #include <sys/machsystm.h> 80 #include <sys/kobj.h> 81 #include <sys/cmn_err.h> 82 #include <sys/reboot.h> 83 #include <sys/kdi.h> 84 #include <sys/mach_mmu.h> 85 #include <sys/systm.h> 86 87 #ifdef __xpv 88 #include <sys/hypervisor.h> 89 #include <vm/as.h> 90 #endif 91 92 #include <sys/promif.h> 93 #include <sys/bootinfo.h> 94 #include <vm/kboot_mmu.h> 95 #include <vm/hat_pte.h> 96 97 /* 98 * cpu0 and default tables and structures. 99 */ 100 user_desc_t *gdt0; 101 #if !defined(__xpv) 102 desctbr_t gdt0_default_r; 103 #endif 104 105 gate_desc_t *idt0; /* interrupt descriptor table */ 106 #if defined(__i386) 107 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 108 #endif 109 110 tss_t *ktss0; /* kernel task state structure */ 111 112 #if defined(__i386) 113 tss_t *dftss0; /* #DF double-fault exception */ 114 #endif /* __i386 */ 115 116 user_desc_t zero_udesc; /* base zero user desc native procs */ 117 user_desc_t null_udesc; /* null user descriptor */ 118 system_desc_t null_sdesc; /* null system descriptor */ 119 120 #if defined(__amd64) 121 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 122 #endif /* __amd64 */ 123 124 #if defined(__amd64) 125 user_desc_t ucs_on; 126 user_desc_t ucs_off; 127 user_desc_t ucs32_on; 128 user_desc_t ucs32_off; 129 #endif /* __amd64 */ 130 131 #pragma align 16(dblfault_stack0) 132 char dblfault_stack0[DEFAULTSTKSZ]; 133 134 extern void fast_null(void); 135 extern hrtime_t get_hrtime(void); 136 extern hrtime_t gethrvtime(void); 137 extern hrtime_t get_hrestime(void); 138 extern uint64_t getlgrp(void); 139 140 void (*(fasttable[]))(void) = { 141 fast_null, /* T_FNULL routine */ 142 fast_null, /* T_FGETFP routine (initially null) */ 143 fast_null, /* T_FSETFP routine (initially null) */ 144 (void (*)())get_hrtime, /* T_GETHRTIME */ 145 (void (*)())gethrvtime, /* T_GETHRVTIME */ 146 (void (*)())get_hrestime, /* T_GETHRESTIME */ 147 (void (*)())getlgrp /* T_GETLGRP */ 148 }; 149 150 /* 151 * Structure containing pre-computed descriptors to allow us to temporarily 152 * interpose on a standard handler. 153 */ 154 struct interposing_handler { 155 int ih_inum; 156 gate_desc_t ih_interp_desc; 157 gate_desc_t ih_default_desc; 158 }; 159 160 /* 161 * The brand infrastructure interposes on two handlers, and we use one as a 162 * NULL signpost. 163 */ 164 static struct interposing_handler brand_tbl[2]; 165 166 /* 167 * software prototypes for default local descriptor table 168 */ 169 170 /* 171 * Routines for loading segment descriptors in format the hardware 172 * can understand. 173 */ 174 175 #if defined(__amd64) 176 177 /* 178 * In long mode we have the new L or long mode attribute bit 179 * for code segments. Only the conforming bit in type is used along 180 * with descriptor priority and present bits. Default operand size must 181 * be zero when in long mode. In 32-bit compatibility mode all fields 182 * are treated as in legacy mode. For data segments while in long mode 183 * only the present bit is loaded. 184 */ 185 void 186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 187 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 188 { 189 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 190 191 /* 192 * 64-bit long mode. 193 */ 194 if (lmode == SDP_LONG) 195 dp->usd_def32 = 0; /* 32-bit operands only */ 196 else 197 /* 198 * 32-bit compatibility mode. 199 */ 200 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 201 202 dp->usd_long = lmode; /* 64-bit mode */ 203 dp->usd_type = type; 204 dp->usd_dpl = dpl; 205 dp->usd_p = 1; 206 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 207 208 dp->usd_lobase = (uintptr_t)base; 209 dp->usd_midbase = (uintptr_t)base >> 16; 210 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 211 dp->usd_lolimit = size; 212 dp->usd_hilimit = (uintptr_t)size >> 16; 213 } 214 215 #elif defined(__i386) 216 217 /* 218 * Install user segment descriptor for code and data. 219 */ 220 void 221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 222 uint_t dpl, uint_t gran, uint_t defopsz) 223 { 224 dp->usd_lolimit = size; 225 dp->usd_hilimit = (uintptr_t)size >> 16; 226 227 dp->usd_lobase = (uintptr_t)base; 228 dp->usd_midbase = (uintptr_t)base >> 16; 229 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 230 231 dp->usd_type = type; 232 dp->usd_dpl = dpl; 233 dp->usd_p = 1; 234 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 235 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 236 } 237 238 #endif /* __i386 */ 239 240 /* 241 * Install system segment descriptor for LDT and TSS segments. 242 */ 243 244 #if defined(__amd64) 245 246 void 247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 248 uint_t dpl) 249 { 250 dp->ssd_lolimit = size; 251 dp->ssd_hilimit = (uintptr_t)size >> 16; 252 253 dp->ssd_lobase = (uintptr_t)base; 254 dp->ssd_midbase = (uintptr_t)base >> 16; 255 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 256 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 257 258 dp->ssd_type = type; 259 dp->ssd_zero1 = 0; /* must be zero */ 260 dp->ssd_zero2 = 0; 261 dp->ssd_dpl = dpl; 262 dp->ssd_p = 1; 263 dp->ssd_gran = 0; /* force byte units */ 264 } 265 266 void * 267 get_ssd_base(system_desc_t *dp) 268 { 269 uintptr_t base; 270 271 base = (uintptr_t)dp->ssd_lobase | 272 (uintptr_t)dp->ssd_midbase << 16 | 273 (uintptr_t)dp->ssd_hibase << (16 + 8) | 274 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 275 return ((void *)base); 276 } 277 278 #elif defined(__i386) 279 280 void 281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 282 uint_t dpl) 283 { 284 dp->ssd_lolimit = size; 285 dp->ssd_hilimit = (uintptr_t)size >> 16; 286 287 dp->ssd_lobase = (uintptr_t)base; 288 dp->ssd_midbase = (uintptr_t)base >> 16; 289 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 290 291 dp->ssd_type = type; 292 dp->ssd_zero = 0; /* must be zero */ 293 dp->ssd_dpl = dpl; 294 dp->ssd_p = 1; 295 dp->ssd_gran = 0; /* force byte units */ 296 } 297 298 void * 299 get_ssd_base(system_desc_t *dp) 300 { 301 uintptr_t base; 302 303 base = (uintptr_t)dp->ssd_lobase | 304 (uintptr_t)dp->ssd_midbase << 16 | 305 (uintptr_t)dp->ssd_hibase << (16 + 8); 306 return ((void *)base); 307 } 308 309 #endif /* __i386 */ 310 311 /* 312 * Install gate segment descriptor for interrupt, trap, call and task gates. 313 */ 314 315 #if defined(__amd64) 316 317 /*ARGSUSED*/ 318 void 319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 320 uint_t type, uint_t dpl, uint_t vector) 321 { 322 dp->sgd_looffset = (uintptr_t)func; 323 dp->sgd_hioffset = (uintptr_t)func >> 16; 324 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 325 326 dp->sgd_selector = (uint16_t)sel; 327 328 /* 329 * For 64 bit native we use the IST stack mechanism 330 * for double faults. All other traps use the CPL = 0 331 * (tss_rsp0) stack. 332 */ 333 #if !defined(__xpv) 334 if (vector == T_DBLFLT) 335 dp->sgd_ist = 1; 336 else 337 #endif 338 dp->sgd_ist = 0; 339 340 dp->sgd_type = type; 341 dp->sgd_dpl = dpl; 342 dp->sgd_p = 1; 343 } 344 345 #elif defined(__i386) 346 347 /*ARGSUSED*/ 348 void 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 350 uint_t type, uint_t dpl, uint_t unused) 351 { 352 dp->sgd_looffset = (uintptr_t)func; 353 dp->sgd_hioffset = (uintptr_t)func >> 16; 354 355 dp->sgd_selector = (uint16_t)sel; 356 dp->sgd_stkcpy = 0; /* always zero bytes */ 357 dp->sgd_type = type; 358 dp->sgd_dpl = dpl; 359 dp->sgd_p = 1; 360 } 361 362 #endif /* __i386 */ 363 364 /* 365 * Updates a single user descriptor in the the GDT of the current cpu. 366 * Caller is responsible for preventing cpu migration. 367 */ 368 369 void 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp) 371 { 372 #if defined(__xpv) 373 374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 375 376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 378 379 #else /* __xpv */ 380 381 CPU->cpu_gdt[sidx] = *udp; 382 383 #endif /* __xpv */ 384 } 385 386 /* 387 * Writes single descriptor pointed to by udp into a processes 388 * LDT entry pointed to by ldp. 389 */ 390 int 391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 392 { 393 #if defined(__xpv) 394 395 uint64_t dpa; 396 397 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 398 ((uintptr_t)ldp & PAGEOFFSET); 399 400 /* 401 * The hypervisor is a little more restrictive about what it 402 * supports in the LDT. 403 */ 404 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 405 return (EINVAL); 406 407 #else /* __xpv */ 408 409 *ldp = *udp; 410 411 #endif /* __xpv */ 412 return (0); 413 } 414 415 #if defined(__xpv) 416 417 /* 418 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 419 * Returns true if a valid entry was written. 420 */ 421 int 422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 423 { 424 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 425 426 /* 427 * skip holes in the IDT 428 */ 429 if (GATESEG_GETOFFSET(sgd) == 0) 430 return (0); 431 432 ASSERT(sgd->sgd_type == SDT_SYSIGT); 433 ti->vector = vec; 434 TI_SET_DPL(ti, sgd->sgd_dpl); 435 436 /* 437 * Is this an interrupt gate? 438 */ 439 if (sgd->sgd_type == SDT_SYSIGT) { 440 /* LINTED */ 441 TI_SET_IF(ti, 1); 442 } 443 ti->cs = sgd->sgd_selector; 444 #if defined(__amd64) 445 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 446 #endif 447 ti->address = GATESEG_GETOFFSET(sgd); 448 return (1); 449 } 450 451 /* 452 * Convert a single hw format gate descriptor and write it into our virtual IDT. 453 */ 454 void 455 xen_idt_write(gate_desc_t *sgd, uint_t vec) 456 { 457 trap_info_t trapinfo[2]; 458 459 bzero(trapinfo, sizeof (trapinfo)); 460 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 461 return; 462 if (xen_set_trap_table(trapinfo) != 0) 463 panic("xen_idt_write: xen_set_trap_table() failed"); 464 } 465 466 #endif /* __xpv */ 467 468 #if defined(__amd64) 469 470 /* 471 * Build kernel GDT. 472 */ 473 474 static void 475 init_gdt_common(user_desc_t *gdt) 476 { 477 int i; 478 479 /* 480 * 64-bit kernel code segment. 481 */ 482 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 483 SDP_PAGES, SDP_OP32); 484 485 /* 486 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 487 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 488 * instruction to return from system calls back to 32-bit applications. 489 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 490 * descriptors. We therefore must ensure that the kernel uses something, 491 * though it will be ignored by hardware, that is compatible with 32-bit 492 * apps. For the same reason we must set the default op size of this 493 * descriptor to 32-bit operands. 494 */ 495 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 496 SEL_KPL, SDP_PAGES, SDP_OP32); 497 gdt[GDT_KDATA].usd_def32 = 1; 498 499 /* 500 * 64-bit user code segment. 501 */ 502 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 503 SDP_PAGES, SDP_OP32); 504 505 /* 506 * 32-bit user code segment. 507 */ 508 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 509 SEL_UPL, SDP_PAGES, SDP_OP32); 510 511 /* 512 * See gdt_ucode32() and gdt_ucode_native(). 513 */ 514 ucs_on = ucs_off = gdt[GDT_UCODE]; 515 ucs_off.usd_p = 0; /* forces #np fault */ 516 517 ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 518 ucs32_off.usd_p = 0; /* forces #np fault */ 519 520 /* 521 * 32 and 64 bit data segments can actually share the same descriptor. 522 * In long mode only the present bit is checked but all other fields 523 * are loaded. But in compatibility mode all fields are interpreted 524 * as in legacy mode so they must be set correctly for a 32-bit data 525 * segment. 526 */ 527 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 528 SDP_PAGES, SDP_OP32); 529 530 #if !defined(__xpv) 531 532 /* 533 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 534 * in the GDT is 0. 535 */ 536 537 /* 538 * Kernel TSS 539 */ 540 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 541 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 542 543 #endif /* !__xpv */ 544 545 /* 546 * Initialize fs and gs descriptors for 32 bit processes. 547 * Only attributes and limits are initialized, the effective 548 * base address is programmed via fsbase/gsbase. 549 */ 550 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 551 SEL_UPL, SDP_PAGES, SDP_OP32); 552 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 553 SEL_UPL, SDP_PAGES, SDP_OP32); 554 555 /* 556 * Initialize the descriptors set aside for brand usage. 557 * Only attributes and limits are initialized. 558 */ 559 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 560 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 561 SEL_UPL, SDP_PAGES, SDP_OP32); 562 563 /* 564 * Initialize convenient zero base user descriptors for clearing 565 * lwp private %fs and %gs descriptors in GDT. See setregs() for 566 * an example. 567 */ 568 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 569 SDP_BYTES, SDP_OP32); 570 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 571 SDP_PAGES, SDP_OP32); 572 } 573 574 #if defined(__xpv) 575 576 static user_desc_t * 577 init_gdt(void) 578 { 579 uint64_t gdtpa; 580 ulong_t ma[1]; /* XXPV should be a memory_t */ 581 ulong_t addr; 582 583 #if !defined(__lint) 584 /* 585 * Our gdt is never larger than a single page. 586 */ 587 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 588 #endif 589 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 590 PAGESIZE, PAGESIZE); 591 bzero(gdt0, PAGESIZE); 592 593 init_gdt_common(gdt0); 594 595 /* 596 * XXX Since we never invoke kmdb until after the kernel takes 597 * over the descriptor tables why not have it use the kernel's 598 * selectors? 599 */ 600 if (boothowto & RB_DEBUG) { 601 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 602 SEL_KPL, SDP_PAGES, SDP_OP32); 603 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, 604 SEL_KPL, SDP_PAGES, SDP_OP32); 605 } 606 607 /* 608 * Clear write permission for page containing the gdt and install it. 609 */ 610 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 611 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 612 kbm_read_only((uintptr_t)gdt0, gdtpa); 613 xen_set_gdt(ma, NGDT); 614 615 /* 616 * Reload the segment registers to use the new GDT. 617 * On 64-bit, fixup KCS_SEL to be in ring 3. 618 * See KCS_SEL in segments.h. 619 */ 620 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 621 622 /* 623 * setup %gs for kernel 624 */ 625 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 626 627 /* 628 * XX64 We should never dereference off "other gsbase" or 629 * "fsbase". So, we should arrange to point FSBASE and 630 * KGSBASE somewhere truly awful e.g. point it at the last 631 * valid address below the hole so that any attempts to index 632 * off them cause an exception. 633 * 634 * For now, point it at 8G -- at least it should be unmapped 635 * until some 64-bit processes run. 636 */ 637 addr = 0x200000000ul; 638 xen_set_segment_base(SEGBASE_FS, addr); 639 xen_set_segment_base(SEGBASE_GS_USER, addr); 640 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 641 642 return (gdt0); 643 } 644 645 #else /* __xpv */ 646 647 static user_desc_t * 648 init_gdt(void) 649 { 650 desctbr_t r_bgdt, r_gdt; 651 user_desc_t *bgdt; 652 653 #if !defined(__lint) 654 /* 655 * Our gdt is never larger than a single page. 656 */ 657 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 658 #endif 659 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 660 PAGESIZE, PAGESIZE); 661 bzero(gdt0, PAGESIZE); 662 663 init_gdt_common(gdt0); 664 665 /* 666 * Copy in from boot's gdt to our gdt. 667 * Entry 0 is the null descriptor by definition. 668 */ 669 rd_gdtr(&r_bgdt); 670 bgdt = (user_desc_t *)r_bgdt.dtr_base; 671 if (bgdt == NULL) 672 panic("null boot gdt"); 673 674 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 675 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 676 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 677 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 678 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 679 680 /* 681 * Install our new GDT 682 */ 683 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 684 r_gdt.dtr_base = (uintptr_t)gdt0; 685 wr_gdtr(&r_gdt); 686 687 /* 688 * Reload the segment registers to use the new GDT 689 */ 690 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 691 692 /* 693 * setup %gs for kernel 694 */ 695 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 696 697 /* 698 * XX64 We should never dereference off "other gsbase" or 699 * "fsbase". So, we should arrange to point FSBASE and 700 * KGSBASE somewhere truly awful e.g. point it at the last 701 * valid address below the hole so that any attempts to index 702 * off them cause an exception. 703 * 704 * For now, point it at 8G -- at least it should be unmapped 705 * until some 64-bit processes run. 706 */ 707 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 708 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 709 return (gdt0); 710 } 711 712 #endif /* __xpv */ 713 714 #elif defined(__i386) 715 716 static void 717 init_gdt_common(user_desc_t *gdt) 718 { 719 int i; 720 721 /* 722 * Text and data for both kernel and user span entire 32 bit 723 * address space. 724 */ 725 726 /* 727 * kernel code segment. 728 */ 729 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 730 SDP_OP32); 731 732 /* 733 * kernel data segment. 734 */ 735 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 736 SDP_OP32); 737 738 /* 739 * user code segment. 740 */ 741 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 742 SDP_OP32); 743 744 /* 745 * user data segment. 746 */ 747 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 748 SDP_OP32); 749 750 #if !defined(__xpv) 751 752 /* 753 * TSS for T_DBLFLT (double fault) handler 754 */ 755 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0, 756 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL); 757 758 /* 759 * TSS for kernel 760 */ 761 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 762 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 763 764 #endif /* !__xpv */ 765 766 /* 767 * %gs selector for kernel 768 */ 769 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 770 SEL_KPL, SDP_BYTES, SDP_OP32); 771 772 /* 773 * Initialize lwp private descriptors. 774 * Only attributes and limits are initialized, the effective 775 * base address is programmed via fsbase/gsbase. 776 */ 777 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 778 SDP_PAGES, SDP_OP32); 779 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 780 SDP_PAGES, SDP_OP32); 781 782 /* 783 * Initialize the descriptors set aside for brand usage. 784 * Only attributes and limits are initialized. 785 */ 786 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 787 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 788 SDP_PAGES, SDP_OP32); 789 /* 790 * Initialize convenient zero base user descriptor for clearing 791 * lwp private %fs and %gs descriptors in GDT. See setregs() for 792 * an example. 793 */ 794 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL, 795 SDP_BYTES, SDP_OP32); 796 } 797 798 #if defined(__xpv) 799 800 static user_desc_t * 801 init_gdt(void) 802 { 803 uint64_t gdtpa; 804 ulong_t ma[1]; /* XXPV should be a memory_t */ 805 806 #if !defined(__lint) 807 /* 808 * Our gdt is never larger than a single page. 809 */ 810 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 811 #endif 812 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 813 PAGESIZE, PAGESIZE); 814 bzero(gdt0, PAGESIZE); 815 816 init_gdt_common(gdt0); 817 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 818 819 /* 820 * XXX Since we never invoke kmdb until after the kernel takes 821 * over the descriptor tables why not have it use the kernel's 822 * selectors? 823 */ 824 if (boothowto & RB_DEBUG) { 825 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 826 SDP_PAGES, SDP_OP32); 827 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 828 SDP_PAGES, SDP_OP32); 829 } 830 831 /* 832 * Clear write permission for page containing the gdt and install it. 833 */ 834 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 835 kbm_read_only((uintptr_t)gdt0, gdtpa); 836 xen_set_gdt(ma, NGDT); 837 838 /* 839 * Reload the segment registers to use the new GDT 840 */ 841 load_segment_registers( 842 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 843 844 return (gdt0); 845 } 846 847 #else /* __xpv */ 848 849 static user_desc_t * 850 init_gdt(void) 851 { 852 desctbr_t r_bgdt, r_gdt; 853 user_desc_t *bgdt; 854 855 #if !defined(__lint) 856 /* 857 * Our gdt is never larger than a single page. 858 */ 859 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 860 #endif 861 /* 862 * XXX this allocation belongs in our caller, not here. 863 */ 864 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 865 PAGESIZE, PAGESIZE); 866 bzero(gdt0, PAGESIZE); 867 868 init_gdt_common(gdt0); 869 870 /* 871 * Copy in from boot's gdt to our gdt entries. 872 * Entry 0 is null descriptor by definition. 873 */ 874 rd_gdtr(&r_bgdt); 875 bgdt = (user_desc_t *)r_bgdt.dtr_base; 876 if (bgdt == NULL) 877 panic("null boot gdt"); 878 879 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 880 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 881 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 882 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 883 884 /* 885 * Install our new GDT 886 */ 887 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 888 r_gdt.dtr_base = (uintptr_t)gdt0; 889 wr_gdtr(&r_gdt); 890 891 /* 892 * Reload the segment registers to use the new GDT 893 */ 894 load_segment_registers( 895 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 896 897 return (gdt0); 898 } 899 900 #endif /* __xpv */ 901 #endif /* __i386 */ 902 903 /* 904 * Build kernel IDT. 905 * 906 * Note that for amd64 we pretty much require every gate to be an interrupt 907 * gate which blocks interrupts atomically on entry; that's because of our 908 * dependency on using 'swapgs' every time we come into the kernel to find 909 * the cpu structure. If we get interrupted just before doing that, %cs could 910 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 911 * %gsbase is really still pointing at something in userland. Bad things will 912 * ensue. We also use interrupt gates for i386 as well even though this is not 913 * required for some traps. 914 * 915 * Perhaps they should have invented a trap gate that does an atomic swapgs? 916 */ 917 static void 918 init_idt_common(gate_desc_t *idt) 919 { 920 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 921 0); 922 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 923 0); 924 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL, 925 0); 926 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 927 0); 928 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 929 0); 930 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT, 931 TRP_KPL, 0); 932 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 933 0); 934 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 935 0); 936 937 /* 938 * double fault handler. 939 * 940 * Note that on the hypervisor a guest does not receive #df faults. 941 * Instead a failsafe event is injected into the guest if its selectors 942 * and/or stack is in a broken state. See xen_failsafe_callback. 943 */ 944 #if !defined(__xpv) 945 #if defined(__amd64) 946 947 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 948 T_DBLFLT); 949 950 #elif defined(__i386) 951 952 /* 953 * task gate required. 954 */ 955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL, 956 0); 957 958 #endif /* __i386 */ 959 #endif /* !__xpv */ 960 961 /* 962 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 963 */ 964 965 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 966 0); 967 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 968 0); 969 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 970 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 971 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 972 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 973 0); 974 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT, 975 TRP_KPL, 0); 976 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 977 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 978 979 /* 980 * install fast trap handler at 210. 981 */ 982 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 983 0); 984 985 /* 986 * System call handler. 987 */ 988 #if defined(__amd64) 989 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT, 990 TRP_UPL, 0); 991 992 #elif defined(__i386) 993 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT, 994 TRP_UPL, 0); 995 #endif /* __i386 */ 996 997 /* 998 * Install the DTrace interrupt handler for the pid provider. 999 */ 1000 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 1001 SDT_SYSIGT, TRP_UPL, 0); 1002 1003 /* 1004 * Prepare interposing descriptor for the syscall handler 1005 * and cache copy of the default descriptor. 1006 */ 1007 brand_tbl[0].ih_inum = T_SYSCALLINT; 1008 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; 1009 1010 #if defined(__amd64) 1011 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int, 1012 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1013 #elif defined(__i386) 1014 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call, 1015 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1016 #endif /* __i386 */ 1017 1018 brand_tbl[1].ih_inum = 0; 1019 } 1020 1021 #if defined(__xpv) 1022 1023 static void 1024 init_idt(gate_desc_t *idt) 1025 { 1026 init_idt_common(idt); 1027 } 1028 1029 #else /* __xpv */ 1030 1031 static void 1032 init_idt(gate_desc_t *idt) 1033 { 1034 char ivctname[80]; 1035 void (*ivctptr)(void); 1036 int i; 1037 1038 /* 1039 * Initialize entire table with 'reserved' trap and then overwrite 1040 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 1041 * since it can only be generated on a 386 processor. 15 is also 1042 * unsupported and reserved. 1043 */ 1044 for (i = 0; i < NIDT; i++) 1045 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1046 0); 1047 1048 /* 1049 * 20-31 reserved 1050 */ 1051 for (i = 20; i < 32; i++) 1052 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1053 0); 1054 1055 /* 1056 * interrupts 32 - 255 1057 */ 1058 for (i = 32; i < 256; i++) { 1059 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 1060 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 1061 if (ivctptr == NULL) 1062 panic("kobj_getsymvalue(%s) failed", ivctname); 1063 1064 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 1065 } 1066 1067 /* 1068 * Now install the common ones. Note that it will overlay some 1069 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 1070 */ 1071 init_idt_common(idt); 1072 } 1073 1074 #endif /* __xpv */ 1075 1076 /* 1077 * The kernel does not deal with LDTs unless a user explicitly creates 1078 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 1079 * to reference the LDT will therefore cause a #gp. System calls made via the 1080 * obsolete lcall mechanism are emulated by the #gp fault handler. 1081 */ 1082 static void 1083 init_ldt(void) 1084 { 1085 #if defined(__xpv) 1086 xen_set_ldt(NULL, 0); 1087 #else 1088 wr_ldtr(0); 1089 #endif 1090 } 1091 1092 #if !defined(__xpv) 1093 #if defined(__amd64) 1094 1095 static void 1096 init_tss(void) 1097 { 1098 /* 1099 * tss_rsp0 is dynamically filled in by resume() on each context switch. 1100 * All exceptions but #DF will run on the thread stack. 1101 * Set up the double fault stack here. 1102 */ 1103 ktss0->tss_ist1 = 1104 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1105 1106 /* 1107 * Set I/O bit map offset equal to size of TSS segment limit 1108 * for no I/O permission map. This will force all user I/O 1109 * instructions to generate #gp fault. 1110 */ 1111 ktss0->tss_bitmapbase = sizeof (*ktss0); 1112 1113 /* 1114 * Point %tr to descriptor for ktss0 in gdt. 1115 */ 1116 wr_tsr(KTSS_SEL); 1117 } 1118 1119 #elif defined(__i386) 1120 1121 static void 1122 init_tss(void) 1123 { 1124 /* 1125 * ktss0->tss_esp dynamically filled in by resume() on each 1126 * context switch. 1127 */ 1128 ktss0->tss_ss0 = KDS_SEL; 1129 ktss0->tss_eip = (uint32_t)_start; 1130 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL; 1131 ktss0->tss_cs = KCS_SEL; 1132 ktss0->tss_fs = KFS_SEL; 1133 ktss0->tss_gs = KGS_SEL; 1134 ktss0->tss_ldt = ULDT_SEL; 1135 1136 /* 1137 * Initialize double fault tss. 1138 */ 1139 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1140 dftss0->tss_ss0 = KDS_SEL; 1141 1142 /* 1143 * tss_cr3 will get initialized in hat_kern_setup() once our page 1144 * tables have been setup. 1145 */ 1146 dftss0->tss_eip = (uint32_t)syserrtrap; 1147 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1148 dftss0->tss_cs = KCS_SEL; 1149 dftss0->tss_ds = KDS_SEL; 1150 dftss0->tss_es = KDS_SEL; 1151 dftss0->tss_ss = KDS_SEL; 1152 dftss0->tss_fs = KFS_SEL; 1153 dftss0->tss_gs = KGS_SEL; 1154 1155 /* 1156 * Set I/O bit map offset equal to size of TSS segment limit 1157 * for no I/O permission map. This will force all user I/O 1158 * instructions to generate #gp fault. 1159 */ 1160 ktss0->tss_bitmapbase = sizeof (*ktss0); 1161 1162 /* 1163 * Point %tr to descriptor for ktss0 in gdt. 1164 */ 1165 wr_tsr(KTSS_SEL); 1166 } 1167 1168 #endif /* __i386 */ 1169 #endif /* !__xpv */ 1170 1171 #if defined(__xpv) 1172 1173 void 1174 init_desctbls(void) 1175 { 1176 uint_t vec; 1177 user_desc_t *gdt; 1178 1179 /* 1180 * Setup and install our GDT. 1181 */ 1182 gdt = init_gdt(); 1183 1184 /* 1185 * Store static pa of gdt to speed up pa_to_ma() translations 1186 * on lwp context switches. 1187 */ 1188 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1189 CPU->cpu_gdt = gdt; 1190 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 1191 1192 /* 1193 * Setup and install our IDT. 1194 */ 1195 #if !defined(__lint) 1196 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1197 #endif 1198 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1199 PAGESIZE, PAGESIZE); 1200 bzero(idt0, PAGESIZE); 1201 init_idt(idt0); 1202 for (vec = 0; vec < NIDT; vec++) 1203 xen_idt_write(&idt0[vec], vec); 1204 1205 CPU->cpu_idt = idt0; 1206 1207 /* 1208 * set default kernel stack 1209 */ 1210 xen_stack_switch(KDS_SEL, 1211 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 1212 1213 xen_init_callbacks(); 1214 1215 init_ldt(); 1216 } 1217 1218 #else /* __xpv */ 1219 1220 void 1221 init_desctbls(void) 1222 { 1223 user_desc_t *gdt; 1224 desctbr_t idtr; 1225 1226 /* 1227 * Allocate IDT and TSS structures on unique pages for better 1228 * performance in virtual machines. 1229 */ 1230 #if !defined(__lint) 1231 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1232 #endif 1233 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1234 PAGESIZE, PAGESIZE); 1235 bzero(idt0, PAGESIZE); 1236 #if !defined(__lint) 1237 ASSERT(sizeof (*ktss0) <= PAGESIZE); 1238 #endif 1239 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, 1240 PAGESIZE, PAGESIZE); 1241 bzero(ktss0, PAGESIZE); 1242 1243 #if defined(__i386) 1244 #if !defined(__lint) 1245 ASSERT(sizeof (*dftss0) <= PAGESIZE); 1246 #endif 1247 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA, 1248 PAGESIZE, PAGESIZE); 1249 bzero(dftss0, PAGESIZE); 1250 #endif 1251 1252 /* 1253 * Setup and install our GDT. 1254 */ 1255 gdt = init_gdt(); 1256 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1257 CPU->cpu_gdt = gdt; 1258 1259 /* 1260 * Setup and install our IDT. 1261 */ 1262 init_idt(idt0); 1263 1264 idtr.dtr_base = (uintptr_t)idt0; 1265 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; 1266 wr_idtr(&idtr); 1267 CPU->cpu_idt = idt0; 1268 1269 #if defined(__i386) 1270 /* 1271 * We maintain a description of idt0 in convenient IDTR format 1272 * for #pf's on some older pentium processors. See pentium_pftrap(). 1273 */ 1274 idt0_default_r = idtr; 1275 #endif /* __i386 */ 1276 1277 init_tss(); 1278 CPU->cpu_tss = ktss0; 1279 init_ldt(); 1280 } 1281 1282 #endif /* __xpv */ 1283 1284 /* 1285 * In the early kernel, we need to set up a simple GDT to run on. 1286 * 1287 * XXPV Can dboot use this too? See dboot_gdt.s 1288 */ 1289 void 1290 init_boot_gdt(user_desc_t *bgdt) 1291 { 1292 #if defined(__amd64) 1293 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 1294 SDP_PAGES, SDP_OP32); 1295 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 1296 SDP_PAGES, SDP_OP32); 1297 #elif defined(__i386) 1298 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 1299 SDP_PAGES, SDP_OP32); 1300 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 1301 SDP_PAGES, SDP_OP32); 1302 #endif /* __i386 */ 1303 } 1304 1305 /* 1306 * Enable interpositioning on the system call path by rewriting the 1307 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1308 * the branded entry points. 1309 */ 1310 void 1311 brand_interpositioning_enable(void) 1312 { 1313 gate_desc_t *idt = CPU->cpu_idt; 1314 int i; 1315 1316 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1317 1318 for (i = 0; brand_tbl[i].ih_inum; i++) { 1319 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1320 #if defined(__xpv) 1321 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1322 brand_tbl[i].ih_inum); 1323 #endif 1324 } 1325 1326 #if defined(__amd64) 1327 #if defined(__xpv) 1328 1329 /* 1330 * Currently the hypervisor only supports 64-bit syscalls via 1331 * syscall instruction. The 32-bit syscalls are handled by 1332 * interrupt gate above. 1333 */ 1334 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1335 CALLBACKF_mask_events); 1336 1337 #else 1338 1339 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1340 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 1341 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 1342 } 1343 1344 #endif 1345 #endif /* __amd64 */ 1346 1347 if (is_x86_feature(x86_featureset, X86FSET_SEP)) 1348 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 1349 } 1350 1351 /* 1352 * Disable interpositioning on the system call path by rewriting the 1353 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1354 * the standard entry points, which bypass the interpositioning hooks. 1355 */ 1356 void 1357 brand_interpositioning_disable(void) 1358 { 1359 gate_desc_t *idt = CPU->cpu_idt; 1360 int i; 1361 1362 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1363 1364 for (i = 0; brand_tbl[i].ih_inum; i++) { 1365 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1366 #if defined(__xpv) 1367 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1368 brand_tbl[i].ih_inum); 1369 #endif 1370 } 1371 1372 #if defined(__amd64) 1373 #if defined(__xpv) 1374 1375 /* 1376 * See comment above in brand_interpositioning_enable. 1377 */ 1378 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1379 CALLBACKF_mask_events); 1380 1381 #else 1382 1383 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1384 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 1385 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 1386 } 1387 1388 #endif 1389 #endif /* __amd64 */ 1390 1391 if (is_x86_feature(x86_featureset, X86FSET_SEP)) 1392 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 1393 } --- EOF ---