1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2011 Joyent, Inc. All rights reserved. 28 */ 29 30 /* 31 * Copyright (c) 1992 Terrence R. Lambert. 32 * Copyright (c) 1990 The Regents of the University of California. 33 * All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * William Jolitz. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 67 */ 68 69 #include <sys/types.h> 70 #include <sys/sysmacros.h> 71 #include <sys/tss.h> 72 #include <sys/segments.h> 73 #include <sys/trap.h> 74 #include <sys/cpuvar.h> 75 #include <sys/bootconf.h> 76 #include <sys/x86_archext.h> 77 #include <sys/controlregs.h> 78 #include <sys/archsystm.h> 79 #include <sys/machsystm.h> 80 #include <sys/kobj.h> 81 #include <sys/cmn_err.h> 82 #include <sys/reboot.h> 83 #include <sys/kdi.h> 84 #include <sys/mach_mmu.h> 85 #include <sys/systm.h> 86 87 #ifdef __xpv 88 #include <sys/hypervisor.h> 89 #include <vm/as.h> 90 #endif 91 92 #include <sys/promif.h> 93 #include <sys/bootinfo.h> 94 #include <vm/kboot_mmu.h> 95 #include <vm/hat_pte.h> 96 97 /* 98 * cpu0 and default tables and structures. 99 */ 100 user_desc_t *gdt0; 101 #if !defined(__xpv) 102 desctbr_t gdt0_default_r; 103 #endif 104 105 gate_desc_t *idt0; /* interrupt descriptor table */ 106 #if defined(__i386) 107 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 108 #endif 109 110 tss_t *ktss0; /* kernel task state structure */ 111 112 #if defined(__i386) 113 tss_t *dftss0; /* #DF double-fault exception */ 114 #endif /* __i386 */ 115 116 user_desc_t zero_udesc; /* base zero user desc native procs */ 117 user_desc_t null_udesc; /* null user descriptor */ 118 system_desc_t null_sdesc; /* null system descriptor */ 119 120 #if defined(__amd64) 121 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 122 #endif /* __amd64 */ 123 124 #if defined(__amd64) 125 user_desc_t ucs_on; 126 user_desc_t ucs_off; 127 user_desc_t ucs32_on; 128 user_desc_t ucs32_off; 129 #endif /* __amd64 */ 130 131 #pragma align 16(dblfault_stack0) 132 char dblfault_stack0[DEFAULTSTKSZ]; 133 134 extern void fast_null(void); 135 extern hrtime_t get_hrtime(void); 136 extern hrtime_t gethrvtime(void); 137 extern hrtime_t get_hrestime(void); 138 extern uint64_t getlgrp(void); 139 140 void (*(fasttable[]))(void) = { 141 fast_null, /* T_FNULL routine */ 142 fast_null, /* T_FGETFP routine (initially null) */ 143 fast_null, /* T_FSETFP routine (initially null) */ 144 (void (*)())get_hrtime, /* T_GETHRTIME */ 145 (void (*)())gethrvtime, /* T_GETHRVTIME */ 146 (void (*)())get_hrestime, /* T_GETHRESTIME */ 147 (void (*)())getlgrp /* T_GETLGRP */ 148 }; 149 150 /* 151 * Structure containing pre-computed descriptors to allow us to temporarily 152 * interpose on a standard handler. 153 */ 154 struct interposing_handler { 155 int ih_inum; 156 gate_desc_t ih_interp_desc; 157 gate_desc_t ih_default_desc; 158 }; 159 160 /* 161 * The brand infrastructure interposes on two handlers, and we use one as a 162 * NULL signpost. 163 */ 164 static struct interposing_handler brand_tbl[3]; 165 166 /* 167 * software prototypes for default local descriptor table 168 */ 169 170 /* 171 * Routines for loading segment descriptors in format the hardware 172 * can understand. 173 */ 174 175 #if defined(__amd64) 176 177 /* 178 * In long mode we have the new L or long mode attribute bit 179 * for code segments. Only the conforming bit in type is used along 180 * with descriptor priority and present bits. Default operand size must 181 * be zero when in long mode. In 32-bit compatibility mode all fields 182 * are treated as in legacy mode. For data segments while in long mode 183 * only the present bit is loaded. 184 */ 185 void 186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 187 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 188 { 189 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 190 191 /* 192 * 64-bit long mode. 193 */ 194 if (lmode == SDP_LONG) 195 dp->usd_def32 = 0; /* 32-bit operands only */ 196 else 197 /* 198 * 32-bit compatibility mode. 199 */ 200 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 201 202 dp->usd_long = lmode; /* 64-bit mode */ 203 dp->usd_type = type; 204 dp->usd_dpl = dpl; 205 dp->usd_p = 1; 206 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 207 208 dp->usd_lobase = (uintptr_t)base; 209 dp->usd_midbase = (uintptr_t)base >> 16; 210 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 211 dp->usd_lolimit = size; 212 dp->usd_hilimit = (uintptr_t)size >> 16; 213 } 214 215 #elif defined(__i386) 216 217 /* 218 * Install user segment descriptor for code and data. 219 */ 220 void 221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 222 uint_t dpl, uint_t gran, uint_t defopsz) 223 { 224 dp->usd_lolimit = size; 225 dp->usd_hilimit = (uintptr_t)size >> 16; 226 227 dp->usd_lobase = (uintptr_t)base; 228 dp->usd_midbase = (uintptr_t)base >> 16; 229 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 230 231 dp->usd_type = type; 232 dp->usd_dpl = dpl; 233 dp->usd_p = 1; 234 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 235 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 236 } 237 238 #endif /* __i386 */ 239 240 /* 241 * Install system segment descriptor for LDT and TSS segments. 242 */ 243 244 #if defined(__amd64) 245 246 void 247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 248 uint_t dpl) 249 { 250 dp->ssd_lolimit = size; 251 dp->ssd_hilimit = (uintptr_t)size >> 16; 252 253 dp->ssd_lobase = (uintptr_t)base; 254 dp->ssd_midbase = (uintptr_t)base >> 16; 255 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 256 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 257 258 dp->ssd_type = type; 259 dp->ssd_zero1 = 0; /* must be zero */ 260 dp->ssd_zero2 = 0; 261 dp->ssd_dpl = dpl; 262 dp->ssd_p = 1; 263 dp->ssd_gran = 0; /* force byte units */ 264 } 265 266 void * 267 get_ssd_base(system_desc_t *dp) 268 { 269 uintptr_t base; 270 271 base = (uintptr_t)dp->ssd_lobase | 272 (uintptr_t)dp->ssd_midbase << 16 | 273 (uintptr_t)dp->ssd_hibase << (16 + 8) | 274 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 275 return ((void *)base); 276 } 277 278 #elif defined(__i386) 279 280 void 281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 282 uint_t dpl) 283 { 284 dp->ssd_lolimit = size; 285 dp->ssd_hilimit = (uintptr_t)size >> 16; 286 287 dp->ssd_lobase = (uintptr_t)base; 288 dp->ssd_midbase = (uintptr_t)base >> 16; 289 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 290 291 dp->ssd_type = type; 292 dp->ssd_zero = 0; /* must be zero */ 293 dp->ssd_dpl = dpl; 294 dp->ssd_p = 1; 295 dp->ssd_gran = 0; /* force byte units */ 296 } 297 298 void * 299 get_ssd_base(system_desc_t *dp) 300 { 301 uintptr_t base; 302 303 base = (uintptr_t)dp->ssd_lobase | 304 (uintptr_t)dp->ssd_midbase << 16 | 305 (uintptr_t)dp->ssd_hibase << (16 + 8); 306 return ((void *)base); 307 } 308 309 #endif /* __i386 */ 310 311 /* 312 * Install gate segment descriptor for interrupt, trap, call and task gates. 313 */ 314 315 #if defined(__amd64) 316 317 /*ARGSUSED*/ 318 void 319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 320 uint_t type, uint_t dpl, uint_t vector) 321 { 322 dp->sgd_looffset = (uintptr_t)func; 323 dp->sgd_hioffset = (uintptr_t)func >> 16; 324 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 325 326 dp->sgd_selector = (uint16_t)sel; 327 328 /* 329 * For 64 bit native we use the IST stack mechanism 330 * for double faults. All other traps use the CPL = 0 331 * (tss_rsp0) stack. 332 */ 333 #if !defined(__xpv) 334 if (vector == T_DBLFLT) 335 dp->sgd_ist = 1; 336 else 337 #endif 338 dp->sgd_ist = 0; 339 340 dp->sgd_type = type; 341 dp->sgd_dpl = dpl; 342 dp->sgd_p = 1; 343 } 344 345 #elif defined(__i386) 346 347 /*ARGSUSED*/ 348 void 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 350 uint_t type, uint_t dpl, uint_t unused) 351 { 352 dp->sgd_looffset = (uintptr_t)func; 353 dp->sgd_hioffset = (uintptr_t)func >> 16; 354 355 dp->sgd_selector = (uint16_t)sel; 356 dp->sgd_stkcpy = 0; /* always zero bytes */ 357 dp->sgd_type = type; 358 dp->sgd_dpl = dpl; 359 dp->sgd_p = 1; 360 } 361 362 #endif /* __i386 */ 363 364 /* 365 * Updates a single user descriptor in the the GDT of the current cpu. 366 * Caller is responsible for preventing cpu migration. 367 */ 368 369 void 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp) 371 { 372 #if defined(__xpv) 373 374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 375 376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 378 379 #else /* __xpv */ 380 381 CPU->cpu_gdt[sidx] = *udp; 382 383 #endif /* __xpv */ 384 } 385 386 /* 387 * Writes single descriptor pointed to by udp into a processes 388 * LDT entry pointed to by ldp. 389 */ 390 int 391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 392 { 393 #if defined(__xpv) 394 395 uint64_t dpa; 396 397 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 398 ((uintptr_t)ldp & PAGEOFFSET); 399 400 /* 401 * The hypervisor is a little more restrictive about what it 402 * supports in the LDT. 403 */ 404 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 405 return (EINVAL); 406 407 #else /* __xpv */ 408 409 *ldp = *udp; 410 411 #endif /* __xpv */ 412 return (0); 413 } 414 415 #if defined(__xpv) 416 417 /* 418 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 419 * Returns true if a valid entry was written. 420 */ 421 int 422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 423 { 424 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 425 426 /* 427 * skip holes in the IDT 428 */ 429 if (GATESEG_GETOFFSET(sgd) == 0) 430 return (0); 431 432 ASSERT(sgd->sgd_type == SDT_SYSIGT); 433 ti->vector = vec; 434 TI_SET_DPL(ti, sgd->sgd_dpl); 435 436 /* 437 * Is this an interrupt gate? 438 */ 439 if (sgd->sgd_type == SDT_SYSIGT) { 440 /* LINTED */ 441 TI_SET_IF(ti, 1); 442 } 443 ti->cs = sgd->sgd_selector; 444 #if defined(__amd64) 445 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 446 #endif 447 ti->address = GATESEG_GETOFFSET(sgd); 448 return (1); 449 } 450 451 /* 452 * Convert a single hw format gate descriptor and write it into our virtual IDT. 453 */ 454 void 455 xen_idt_write(gate_desc_t *sgd, uint_t vec) 456 { 457 trap_info_t trapinfo[2]; 458 459 bzero(trapinfo, sizeof (trapinfo)); 460 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 461 return; 462 if (xen_set_trap_table(trapinfo) != 0) 463 panic("xen_idt_write: xen_set_trap_table() failed"); 464 } 465 466 #endif /* __xpv */ 467 468 #if defined(__amd64) 469 470 /* 471 * Build kernel GDT. 472 */ 473 474 static void 475 init_gdt_common(user_desc_t *gdt) 476 { 477 int i; 478 479 /* 480 * 64-bit kernel code segment. 481 */ 482 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 483 SDP_PAGES, SDP_OP32); 484 485 /* 486 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 487 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 488 * instruction to return from system calls back to 32-bit applications. 489 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 490 * descriptors. We therefore must ensure that the kernel uses something, 491 * though it will be ignored by hardware, that is compatible with 32-bit 492 * apps. For the same reason we must set the default op size of this 493 * descriptor to 32-bit operands. 494 */ 495 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 496 SEL_KPL, SDP_PAGES, SDP_OP32); 497 gdt[GDT_KDATA].usd_def32 = 1; 498 499 /* 500 * 64-bit user code segment. 501 */ 502 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 503 SDP_PAGES, SDP_OP32); 504 505 /* 506 * 32-bit user code segment. 507 */ 508 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 509 SEL_UPL, SDP_PAGES, SDP_OP32); 510 511 /* 512 * See gdt_ucode32() and gdt_ucode_native(). 513 */ 514 ucs_on = ucs_off = gdt[GDT_UCODE]; 515 ucs_off.usd_p = 0; /* forces #np fault */ 516 517 ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 518 ucs32_off.usd_p = 0; /* forces #np fault */ 519 520 /* 521 * 32 and 64 bit data segments can actually share the same descriptor. 522 * In long mode only the present bit is checked but all other fields 523 * are loaded. But in compatibility mode all fields are interpreted 524 * as in legacy mode so they must be set correctly for a 32-bit data 525 * segment. 526 */ 527 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 528 SDP_PAGES, SDP_OP32); 529 530 #if !defined(__xpv) 531 532 /* 533 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 534 * in the GDT is 0. 535 */ 536 537 /* 538 * Kernel TSS 539 */ 540 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 541 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 542 543 #endif /* !__xpv */ 544 545 /* 546 * Initialize fs and gs descriptors for 32 bit processes. 547 * Only attributes and limits are initialized, the effective 548 * base address is programmed via fsbase/gsbase. 549 */ 550 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 551 SEL_UPL, SDP_PAGES, SDP_OP32); 552 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 553 SEL_UPL, SDP_PAGES, SDP_OP32); 554 555 /* 556 * Initialize the descriptors set aside for brand usage. 557 * Only attributes and limits are initialized. 558 */ 559 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 560 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 561 SEL_UPL, SDP_PAGES, SDP_OP32); 562 563 /* 564 * Initialize convenient zero base user descriptors for clearing 565 * lwp private %fs and %gs descriptors in GDT. See setregs() for 566 * an example. 567 */ 568 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 569 SDP_BYTES, SDP_OP32); 570 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 571 SDP_PAGES, SDP_OP32); 572 } 573 574 #if defined(__xpv) 575 576 static user_desc_t * 577 init_gdt(void) 578 { 579 uint64_t gdtpa; 580 ulong_t ma[1]; /* XXPV should be a memory_t */ 581 ulong_t addr; 582 583 #if !defined(__lint) 584 /* 585 * Our gdt is never larger than a single page. 586 */ 587 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 588 #endif 589 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 590 PAGESIZE, PAGESIZE); 591 bzero(gdt0, PAGESIZE); 592 593 init_gdt_common(gdt0); 594 595 /* 596 * XXX Since we never invoke kmdb until after the kernel takes 597 * over the descriptor tables why not have it use the kernel's 598 * selectors? 599 */ 600 if (boothowto & RB_DEBUG) { 601 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 602 SEL_KPL, SDP_PAGES, SDP_OP32); 603 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, 604 SEL_KPL, SDP_PAGES, SDP_OP32); 605 } 606 607 /* 608 * Clear write permission for page containing the gdt and install it. 609 */ 610 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 611 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 612 kbm_read_only((uintptr_t)gdt0, gdtpa); 613 xen_set_gdt(ma, NGDT); 614 615 /* 616 * Reload the segment registers to use the new GDT. 617 * On 64-bit, fixup KCS_SEL to be in ring 3. 618 * See KCS_SEL in segments.h. 619 */ 620 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 621 622 /* 623 * setup %gs for kernel 624 */ 625 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 626 627 /* 628 * XX64 We should never dereference off "other gsbase" or 629 * "fsbase". So, we should arrange to point FSBASE and 630 * KGSBASE somewhere truly awful e.g. point it at the last 631 * valid address below the hole so that any attempts to index 632 * off them cause an exception. 633 * 634 * For now, point it at 8G -- at least it should be unmapped 635 * until some 64-bit processes run. 636 */ 637 addr = 0x200000000ul; 638 xen_set_segment_base(SEGBASE_FS, addr); 639 xen_set_segment_base(SEGBASE_GS_USER, addr); 640 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 641 642 return (gdt0); 643 } 644 645 #else /* __xpv */ 646 647 static user_desc_t * 648 init_gdt(void) 649 { 650 desctbr_t r_bgdt, r_gdt; 651 user_desc_t *bgdt; 652 653 #if !defined(__lint) 654 /* 655 * Our gdt is never larger than a single page. 656 */ 657 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 658 #endif 659 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 660 PAGESIZE, PAGESIZE); 661 bzero(gdt0, PAGESIZE); 662 663 init_gdt_common(gdt0); 664 665 /* 666 * Copy in from boot's gdt to our gdt. 667 * Entry 0 is the null descriptor by definition. 668 */ 669 rd_gdtr(&r_bgdt); 670 bgdt = (user_desc_t *)r_bgdt.dtr_base; 671 if (bgdt == NULL) 672 panic("null boot gdt"); 673 674 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 675 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 676 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 677 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 678 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 679 680 /* 681 * Install our new GDT 682 */ 683 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 684 r_gdt.dtr_base = (uintptr_t)gdt0; 685 wr_gdtr(&r_gdt); 686 687 /* 688 * Reload the segment registers to use the new GDT 689 */ 690 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 691 692 /* 693 * setup %gs for kernel 694 */ 695 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 696 697 /* 698 * XX64 We should never dereference off "other gsbase" or 699 * "fsbase". So, we should arrange to point FSBASE and 700 * KGSBASE somewhere truly awful e.g. point it at the last 701 * valid address below the hole so that any attempts to index 702 * off them cause an exception. 703 * 704 * For now, point it at 8G -- at least it should be unmapped 705 * until some 64-bit processes run. 706 */ 707 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 708 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 709 return (gdt0); 710 } 711 712 #endif /* __xpv */ 713 714 #elif defined(__i386) 715 716 static void 717 init_gdt_common(user_desc_t *gdt) 718 { 719 int i; 720 721 /* 722 * Text and data for both kernel and user span entire 32 bit 723 * address space. 724 */ 725 726 /* 727 * kernel code segment. 728 */ 729 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 730 SDP_OP32); 731 732 /* 733 * kernel data segment. 734 */ 735 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 736 SDP_OP32); 737 738 /* 739 * user code segment. 740 */ 741 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 742 SDP_OP32); 743 744 /* 745 * user data segment. 746 */ 747 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 748 SDP_OP32); 749 750 #if !defined(__xpv) 751 752 /* 753 * TSS for T_DBLFLT (double fault) handler 754 */ 755 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0, 756 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL); 757 758 /* 759 * TSS for kernel 760 */ 761 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 762 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 763 764 #endif /* !__xpv */ 765 766 /* 767 * %gs selector for kernel 768 */ 769 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 770 SEL_KPL, SDP_BYTES, SDP_OP32); 771 772 /* 773 * Initialize lwp private descriptors. 774 * Only attributes and limits are initialized, the effective 775 * base address is programmed via fsbase/gsbase. 776 */ 777 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 778 SDP_PAGES, SDP_OP32); 779 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 780 SDP_PAGES, SDP_OP32); 781 782 /* 783 * Initialize the descriptors set aside for brand usage. 784 * Only attributes and limits are initialized. 785 */ 786 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 787 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 788 SDP_PAGES, SDP_OP32); 789 /* 790 * Initialize convenient zero base user descriptor for clearing 791 * lwp private %fs and %gs descriptors in GDT. See setregs() for 792 * an example. 793 */ 794 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL, 795 SDP_BYTES, SDP_OP32); 796 } 797 798 #if defined(__xpv) 799 800 static user_desc_t * 801 init_gdt(void) 802 { 803 uint64_t gdtpa; 804 ulong_t ma[1]; /* XXPV should be a memory_t */ 805 806 #if !defined(__lint) 807 /* 808 * Our gdt is never larger than a single page. 809 */ 810 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 811 #endif 812 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 813 PAGESIZE, PAGESIZE); 814 bzero(gdt0, PAGESIZE); 815 816 init_gdt_common(gdt0); 817 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 818 819 /* 820 * XXX Since we never invoke kmdb until after the kernel takes 821 * over the descriptor tables why not have it use the kernel's 822 * selectors? 823 */ 824 if (boothowto & RB_DEBUG) { 825 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 826 SDP_PAGES, SDP_OP32); 827 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 828 SDP_PAGES, SDP_OP32); 829 } 830 831 /* 832 * Clear write permission for page containing the gdt and install it. 833 */ 834 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 835 kbm_read_only((uintptr_t)gdt0, gdtpa); 836 xen_set_gdt(ma, NGDT); 837 838 /* 839 * Reload the segment registers to use the new GDT 840 */ 841 load_segment_registers( 842 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 843 844 return (gdt0); 845 } 846 847 #else /* __xpv */ 848 849 static user_desc_t * 850 init_gdt(void) 851 { 852 desctbr_t r_bgdt, r_gdt; 853 user_desc_t *bgdt; 854 855 #if !defined(__lint) 856 /* 857 * Our gdt is never larger than a single page. 858 */ 859 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 860 #endif 861 /* 862 * XXX this allocation belongs in our caller, not here. 863 */ 864 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 865 PAGESIZE, PAGESIZE); 866 bzero(gdt0, PAGESIZE); 867 868 init_gdt_common(gdt0); 869 870 /* 871 * Copy in from boot's gdt to our gdt entries. 872 * Entry 0 is null descriptor by definition. 873 */ 874 rd_gdtr(&r_bgdt); 875 bgdt = (user_desc_t *)r_bgdt.dtr_base; 876 if (bgdt == NULL) 877 panic("null boot gdt"); 878 879 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 880 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 881 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 882 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 883 884 /* 885 * Install our new GDT 886 */ 887 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 888 r_gdt.dtr_base = (uintptr_t)gdt0; 889 wr_gdtr(&r_gdt); 890 891 /* 892 * Reload the segment registers to use the new GDT 893 */ 894 load_segment_registers( 895 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 896 897 return (gdt0); 898 } 899 900 #endif /* __xpv */ 901 #endif /* __i386 */ 902 903 /* 904 * Build kernel IDT. 905 * 906 * Note that for amd64 we pretty much require every gate to be an interrupt 907 * gate which blocks interrupts atomically on entry; that's because of our 908 * dependency on using 'swapgs' every time we come into the kernel to find 909 * the cpu structure. If we get interrupted just before doing that, %cs could 910 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 911 * %gsbase is really still pointing at something in userland. Bad things will 912 * ensue. We also use interrupt gates for i386 as well even though this is not 913 * required for some traps. 914 * 915 * Perhaps they should have invented a trap gate that does an atomic swapgs? 916 */ 917 static void 918 init_idt_common(gate_desc_t *idt) 919 { 920 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 921 0); 922 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 923 0); 924 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL, 925 0); 926 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 927 0); 928 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 929 0); 930 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT, 931 TRP_KPL, 0); 932 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 933 0); 934 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 935 0); 936 937 /* 938 * double fault handler. 939 * 940 * Note that on the hypervisor a guest does not receive #df faults. 941 * Instead a failsafe event is injected into the guest if its selectors 942 * and/or stack is in a broken state. See xen_failsafe_callback. 943 */ 944 #if !defined(__xpv) 945 #if defined(__amd64) 946 947 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 948 T_DBLFLT); 949 950 #elif defined(__i386) 951 952 /* 953 * task gate required. 954 */ 955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL, 956 0); 957 958 #endif /* __i386 */ 959 #endif /* !__xpv */ 960 961 /* 962 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 963 */ 964 965 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 966 0); 967 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 968 0); 969 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 970 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 971 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 972 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 973 0); 974 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT, 975 TRP_KPL, 0); 976 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 977 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 978 979 /* 980 * install "int80" handler at, well, 0x80. 981 */ 982 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL, 983 0); 984 985 /* 986 * install fast trap handler at 210. 987 */ 988 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 989 0); 990 991 /* 992 * System call handler. 993 */ 994 #if defined(__amd64) 995 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT, 996 TRP_UPL, 0); 997 998 #elif defined(__i386) 999 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT, 1000 TRP_UPL, 0); 1001 #endif /* __i386 */ 1002 1003 /* 1004 * Install the DTrace interrupt handler for the pid provider. 1005 */ 1006 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 1007 SDT_SYSIGT, TRP_UPL, 0); 1008 1009 /* 1010 - * Prepare interposing descriptors for the branded "int80" 1011 - * and syscall handlers and cache copies of the default 1012 - * descriptors. 1013 */ 1014 brand_tbl[0].ih_inum = T_INT80; 1015 brand_tbl[0].ih_default_desc = idt0[T_INT80]; 1016 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, 1017 SDT_SYSIGT, TRP_UPL, 0); 1018 1019 brand_tbl[1].ih_inum = T_SYSCALLINT; 1020 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; 1021 1022 #if defined(__amd64) 1023 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, 1024 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1025 #elif defined(__i386) 1026 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, 1027 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1028 #endif /* __i386 */ 1029 1030 brand_tbl[2].ih_inum = 0; 1031 } 1032 1033 #if defined(__xpv) 1034 1035 static void 1036 init_idt(gate_desc_t *idt) 1037 { 1038 init_idt_common(idt); 1039 } 1040 1041 #else /* __xpv */ 1042 1043 static void 1044 init_idt(gate_desc_t *idt) 1045 { 1046 char ivctname[80]; 1047 void (*ivctptr)(void); 1048 int i; 1049 1050 /* 1051 * Initialize entire table with 'reserved' trap and then overwrite 1052 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 1053 * since it can only be generated on a 386 processor. 15 is also 1054 * unsupported and reserved. 1055 */ 1056 for (i = 0; i < NIDT; i++) 1057 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1058 0); 1059 1060 /* 1061 * 20-31 reserved 1062 */ 1063 for (i = 20; i < 32; i++) 1064 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1065 0); 1066 1067 /* 1068 * interrupts 32 - 255 1069 */ 1070 for (i = 32; i < 256; i++) { 1071 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 1072 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 1073 if (ivctptr == NULL) 1074 panic("kobj_getsymvalue(%s) failed", ivctname); 1075 1076 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 1077 } 1078 1079 /* 1080 * Now install the common ones. Note that it will overlay some 1081 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 1082 */ 1083 init_idt_common(idt); 1084 } 1085 1086 #endif /* __xpv */ 1087 1088 /* 1089 * The kernel does not deal with LDTs unless a user explicitly creates 1090 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 1091 * to reference the LDT will therefore cause a #gp. System calls made via the 1092 * obsolete lcall mechanism are emulated by the #gp fault handler. 1093 */ 1094 static void 1095 init_ldt(void) 1096 { 1097 #if defined(__xpv) 1098 xen_set_ldt(NULL, 0); 1099 #else 1100 wr_ldtr(0); 1101 #endif 1102 } 1103 1104 #if !defined(__xpv) 1105 #if defined(__amd64) 1106 1107 static void 1108 init_tss(void) 1109 { 1110 /* 1111 * tss_rsp0 is dynamically filled in by resume() on each context switch. 1112 * All exceptions but #DF will run on the thread stack. 1113 * Set up the double fault stack here. 1114 */ 1115 ktss0->tss_ist1 = 1116 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1117 1118 /* 1119 * Set I/O bit map offset equal to size of TSS segment limit 1120 * for no I/O permission map. This will force all user I/O 1121 * instructions to generate #gp fault. 1122 */ 1123 ktss0->tss_bitmapbase = sizeof (*ktss0); 1124 1125 /* 1126 * Point %tr to descriptor for ktss0 in gdt. 1127 */ 1128 wr_tsr(KTSS_SEL); 1129 } 1130 1131 #elif defined(__i386) 1132 1133 static void 1134 init_tss(void) 1135 { 1136 /* 1137 * ktss0->tss_esp dynamically filled in by resume() on each 1138 * context switch. 1139 */ 1140 ktss0->tss_ss0 = KDS_SEL; 1141 ktss0->tss_eip = (uint32_t)_start; 1142 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL; 1143 ktss0->tss_cs = KCS_SEL; 1144 ktss0->tss_fs = KFS_SEL; 1145 ktss0->tss_gs = KGS_SEL; 1146 ktss0->tss_ldt = ULDT_SEL; 1147 1148 /* 1149 * Initialize double fault tss. 1150 */ 1151 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1152 dftss0->tss_ss0 = KDS_SEL; 1153 1154 /* 1155 * tss_cr3 will get initialized in hat_kern_setup() once our page 1156 * tables have been setup. 1157 */ 1158 dftss0->tss_eip = (uint32_t)syserrtrap; 1159 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1160 dftss0->tss_cs = KCS_SEL; 1161 dftss0->tss_ds = KDS_SEL; 1162 dftss0->tss_es = KDS_SEL; 1163 dftss0->tss_ss = KDS_SEL; 1164 dftss0->tss_fs = KFS_SEL; 1165 dftss0->tss_gs = KGS_SEL; 1166 1167 /* 1168 * Set I/O bit map offset equal to size of TSS segment limit 1169 * for no I/O permission map. This will force all user I/O 1170 * instructions to generate #gp fault. 1171 */ 1172 ktss0->tss_bitmapbase = sizeof (*ktss0); 1173 1174 /* 1175 * Point %tr to descriptor for ktss0 in gdt. 1176 */ 1177 wr_tsr(KTSS_SEL); 1178 } 1179 1180 #endif /* __i386 */ 1181 #endif /* !__xpv */ 1182 1183 #if defined(__xpv) 1184 1185 void 1186 init_desctbls(void) 1187 { 1188 uint_t vec; 1189 user_desc_t *gdt; 1190 1191 /* 1192 * Setup and install our GDT. 1193 */ 1194 gdt = init_gdt(); 1195 1196 /* 1197 * Store static pa of gdt to speed up pa_to_ma() translations 1198 * on lwp context switches. 1199 */ 1200 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1201 CPU->cpu_gdt = gdt; 1202 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 1203 1204 /* 1205 * Setup and install our IDT. 1206 */ 1207 #if !defined(__lint) 1208 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1209 #endif 1210 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1211 PAGESIZE, PAGESIZE); 1212 bzero(idt0, PAGESIZE); 1213 init_idt(idt0); 1214 for (vec = 0; vec < NIDT; vec++) 1215 xen_idt_write(&idt0[vec], vec); 1216 1217 CPU->cpu_idt = idt0; 1218 1219 /* 1220 * set default kernel stack 1221 */ 1222 xen_stack_switch(KDS_SEL, 1223 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 1224 1225 xen_init_callbacks(); 1226 1227 init_ldt(); 1228 } 1229 1230 #else /* __xpv */ 1231 1232 void 1233 init_desctbls(void) 1234 { 1235 user_desc_t *gdt; 1236 desctbr_t idtr; 1237 1238 /* 1239 * Allocate IDT and TSS structures on unique pages for better 1240 * performance in virtual machines. 1241 */ 1242 #if !defined(__lint) 1243 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1244 #endif 1245 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1246 PAGESIZE, PAGESIZE); 1247 bzero(idt0, PAGESIZE); 1248 #if !defined(__lint) 1249 ASSERT(sizeof (*ktss0) <= PAGESIZE); 1250 #endif 1251 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, 1252 PAGESIZE, PAGESIZE); 1253 bzero(ktss0, PAGESIZE); 1254 1255 #if defined(__i386) 1256 #if !defined(__lint) 1257 ASSERT(sizeof (*dftss0) <= PAGESIZE); 1258 #endif 1259 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA, 1260 PAGESIZE, PAGESIZE); 1261 bzero(dftss0, PAGESIZE); 1262 #endif 1263 1264 /* 1265 * Setup and install our GDT. 1266 */ 1267 gdt = init_gdt(); 1268 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1269 CPU->cpu_gdt = gdt; 1270 1271 /* 1272 * Setup and install our IDT. 1273 */ 1274 init_idt(idt0); 1275 1276 idtr.dtr_base = (uintptr_t)idt0; 1277 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; 1278 wr_idtr(&idtr); 1279 CPU->cpu_idt = idt0; 1280 1281 #if defined(__i386) 1282 /* 1283 * We maintain a description of idt0 in convenient IDTR format 1284 * for #pf's on some older pentium processors. See pentium_pftrap(). 1285 */ 1286 idt0_default_r = idtr; 1287 #endif /* __i386 */ 1288 1289 init_tss(); 1290 CPU->cpu_tss = ktss0; 1291 init_ldt(); 1292 } 1293 1294 #endif /* __xpv */ 1295 1296 /* 1297 * In the early kernel, we need to set up a simple GDT to run on. 1298 * 1299 * XXPV Can dboot use this too? See dboot_gdt.s 1300 */ 1301 void 1302 init_boot_gdt(user_desc_t *bgdt) 1303 { 1304 #if defined(__amd64) 1305 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 1306 SDP_PAGES, SDP_OP32); 1307 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 1308 SDP_PAGES, SDP_OP32); 1309 #elif defined(__i386) 1310 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 1311 SDP_PAGES, SDP_OP32); 1312 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 1313 SDP_PAGES, SDP_OP32); 1314 #endif /* __i386 */ 1315 } 1316 1317 /* 1318 * Enable interpositioning on the system call path by rewriting the 1319 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1320 * the branded entry points. 1321 */ 1322 void 1323 brand_interpositioning_enable(void) 1324 { 1325 gate_desc_t *idt = CPU->cpu_idt; 1326 int i; 1327 1328 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1329 1330 for (i = 0; brand_tbl[i].ih_inum; i++) { 1331 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1332 #if defined(__xpv) 1333 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1334 brand_tbl[i].ih_inum); 1335 #endif 1336 } 1337 1338 #if defined(__amd64) 1339 #if defined(__xpv) 1340 1341 /* 1342 * Currently the hypervisor only supports 64-bit syscalls via 1343 * syscall instruction. The 32-bit syscalls are handled by 1344 * interrupt gate above. 1345 */ 1346 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1347 CALLBACKF_mask_events); 1348 1349 #else 1350 1351 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1352 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 1353 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 1354 } 1355 1356 #endif 1357 #endif /* __amd64 */ 1358 1359 if (is_x86_feature(x86_featureset, X86FSET_SEP)) 1360 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 1361 } 1362 1363 /* 1364 * Disable interpositioning on the system call path by rewriting the 1365 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1366 * the standard entry points, which bypass the interpositioning hooks. 1367 */ 1368 void 1369 brand_interpositioning_disable(void) 1370 { 1371 gate_desc_t *idt = CPU->cpu_idt; 1372 int i; 1373 1374 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1375 1376 for (i = 0; brand_tbl[i].ih_inum; i++) { 1377 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1378 #if defined(__xpv) 1379 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1380 brand_tbl[i].ih_inum); 1381 #endif 1382 } 1383 1384 #if defined(__amd64) 1385 #if defined(__xpv) 1386 1387 /* 1388 * See comment above in brand_interpositioning_enable. 1389 */ 1390 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1391 CALLBACKF_mask_events); 1392 1393 #else 1394 1395 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1396 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 1397 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 1398 } 1399 1400 #endif 1401 #endif /* __amd64 */ 1402 1403 if (is_x86_feature(x86_featureset, X86FSET_SEP)) 1404 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 1405 }