1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2011 Joyent, Inc. All rights reserved.
  28  */
  29 
  30 /*
  31  * Copyright (c) 1992 Terrence R. Lambert.
  32  * Copyright (c) 1990 The Regents of the University of California.
  33  * All rights reserved.
  34  *
  35  * This code is derived from software contributed to Berkeley by
  36  * William Jolitz.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  67  */
  68 
  69 #include <sys/types.h>
  70 #include <sys/sysmacros.h>
  71 #include <sys/tss.h>
  72 #include <sys/segments.h>
  73 #include <sys/trap.h>
  74 #include <sys/cpuvar.h>
  75 #include <sys/bootconf.h>
  76 #include <sys/x86_archext.h>
  77 #include <sys/controlregs.h>
  78 #include <sys/archsystm.h>
  79 #include <sys/machsystm.h>
  80 #include <sys/kobj.h>
  81 #include <sys/cmn_err.h>
  82 #include <sys/reboot.h>
  83 #include <sys/kdi.h>
  84 #include <sys/mach_mmu.h>
  85 #include <sys/systm.h>
  86 
  87 #ifdef __xpv
  88 #include <sys/hypervisor.h>
  89 #include <vm/as.h>
  90 #endif
  91 
  92 #include <sys/promif.h>
  93 #include <sys/bootinfo.h>
  94 #include <vm/kboot_mmu.h>
  95 #include <vm/hat_pte.h>
  96 
  97 /*
  98  * cpu0 and default tables and structures.
  99  */
 100 user_desc_t     *gdt0;
 101 #if !defined(__xpv)
 102 desctbr_t       gdt0_default_r;
 103 #endif
 104 
 105 gate_desc_t     *idt0;          /* interrupt descriptor table */
 106 #if defined(__i386)
 107 desctbr_t       idt0_default_r;         /* describes idt0 in IDTR format */
 108 #endif
 109 
 110 tss_t           *ktss0;                 /* kernel task state structure */
 111 
 112 #if defined(__i386)
 113 tss_t           *dftss0;                /* #DF double-fault exception */
 114 #endif  /* __i386 */
 115 
 116 user_desc_t     zero_udesc;             /* base zero user desc native procs */
 117 user_desc_t     null_udesc;             /* null user descriptor */
 118 system_desc_t   null_sdesc;             /* null system descriptor */
 119 
 120 #if defined(__amd64)
 121 user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 122 #endif  /* __amd64 */
 123 
 124 #if defined(__amd64)
 125 user_desc_t     ucs_on;
 126 user_desc_t     ucs_off;
 127 user_desc_t     ucs32_on;
 128 user_desc_t     ucs32_off;
 129 #endif  /* __amd64 */
 130 
 131 #pragma align   16(dblfault_stack0)
 132 char            dblfault_stack0[DEFAULTSTKSZ];
 133 
 134 extern void     fast_null(void);
 135 extern hrtime_t get_hrtime(void);
 136 extern hrtime_t gethrvtime(void);
 137 extern hrtime_t get_hrestime(void);
 138 extern uint64_t getlgrp(void);
 139 
 140 void (*(fasttable[]))(void) = {
 141         fast_null,                      /* T_FNULL routine */
 142         fast_null,                      /* T_FGETFP routine (initially null) */
 143         fast_null,                      /* T_FSETFP routine (initially null) */
 144         (void (*)())get_hrtime,         /* T_GETHRTIME */
 145         (void (*)())gethrvtime,         /* T_GETHRVTIME */
 146         (void (*)())get_hrestime,       /* T_GETHRESTIME */
 147         (void (*)())getlgrp             /* T_GETLGRP */
 148 };
 149 
 150 /*
 151  * Structure containing pre-computed descriptors to allow us to temporarily
 152  * interpose on a standard handler.
 153  */
 154 struct interposing_handler {
 155         int ih_inum;
 156         gate_desc_t ih_interp_desc;
 157         gate_desc_t ih_default_desc;
 158 };
 159 
 160 /*
 161  * The brand infrastructure interposes on two handlers, and we use one as a
 162  * NULL signpost.
 163  */
 164 static struct interposing_handler brand_tbl[3];
 165 
 166 /*
 167  * software prototypes for default local descriptor table
 168  */
 169 
 170 /*
 171  * Routines for loading segment descriptors in format the hardware
 172  * can understand.
 173  */
 174 
 175 #if defined(__amd64)
 176 
 177 /*
 178  * In long mode we have the new L or long mode attribute bit
 179  * for code segments. Only the conforming bit in type is used along
 180  * with descriptor priority and present bits. Default operand size must
 181  * be zero when in long mode. In 32-bit compatibility mode all fields
 182  * are treated as in legacy mode. For data segments while in long mode
 183  * only the present bit is loaded.
 184  */
 185 void
 186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
 187     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
 188 {
 189         ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
 190 
 191         /*
 192          * 64-bit long mode.
 193          */
 194         if (lmode == SDP_LONG)
 195                 dp->usd_def32 = 0;           /* 32-bit operands only */
 196         else
 197                 /*
 198                  * 32-bit compatibility mode.
 199                  */
 200                 dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32-bit ops */
 201 
 202         dp->usd_long = lmode;        /* 64-bit mode */
 203         dp->usd_type = type;
 204         dp->usd_dpl = dpl;
 205         dp->usd_p = 1;
 206         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 207 
 208         dp->usd_lobase = (uintptr_t)base;
 209         dp->usd_midbase = (uintptr_t)base >> 16;
 210         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 211         dp->usd_lolimit = size;
 212         dp->usd_hilimit = (uintptr_t)size >> 16;
 213 }
 214 
 215 #elif defined(__i386)
 216 
 217 /*
 218  * Install user segment descriptor for code and data.
 219  */
 220 void
 221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
 222     uint_t dpl, uint_t gran, uint_t defopsz)
 223 {
 224         dp->usd_lolimit = size;
 225         dp->usd_hilimit = (uintptr_t)size >> 16;
 226 
 227         dp->usd_lobase = (uintptr_t)base;
 228         dp->usd_midbase = (uintptr_t)base >> 16;
 229         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 230 
 231         dp->usd_type = type;
 232         dp->usd_dpl = dpl;
 233         dp->usd_p = 1;
 234         dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32 bit operands */
 235         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 236 }
 237 
 238 #endif  /* __i386 */
 239 
 240 /*
 241  * Install system segment descriptor for LDT and TSS segments.
 242  */
 243 
 244 #if defined(__amd64)
 245 
 246 void
 247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 248     uint_t dpl)
 249 {
 250         dp->ssd_lolimit = size;
 251         dp->ssd_hilimit = (uintptr_t)size >> 16;
 252 
 253         dp->ssd_lobase = (uintptr_t)base;
 254         dp->ssd_midbase = (uintptr_t)base >> 16;
 255         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 256         dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
 257 
 258         dp->ssd_type = type;
 259         dp->ssd_zero1 = 0;   /* must be zero */
 260         dp->ssd_zero2 = 0;
 261         dp->ssd_dpl = dpl;
 262         dp->ssd_p = 1;
 263         dp->ssd_gran = 0;    /* force byte units */
 264 }
 265 
 266 void *
 267 get_ssd_base(system_desc_t *dp)
 268 {
 269         uintptr_t       base;
 270 
 271         base = (uintptr_t)dp->ssd_lobase |
 272             (uintptr_t)dp->ssd_midbase << 16 |
 273             (uintptr_t)dp->ssd_hibase << (16 + 8) |
 274             (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
 275         return ((void *)base);
 276 }
 277 
 278 #elif defined(__i386)
 279 
 280 void
 281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 282     uint_t dpl)
 283 {
 284         dp->ssd_lolimit = size;
 285         dp->ssd_hilimit = (uintptr_t)size >> 16;
 286 
 287         dp->ssd_lobase = (uintptr_t)base;
 288         dp->ssd_midbase = (uintptr_t)base >> 16;
 289         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 290 
 291         dp->ssd_type = type;
 292         dp->ssd_zero = 0;    /* must be zero */
 293         dp->ssd_dpl = dpl;
 294         dp->ssd_p = 1;
 295         dp->ssd_gran = 0;    /* force byte units */
 296 }
 297 
 298 void *
 299 get_ssd_base(system_desc_t *dp)
 300 {
 301         uintptr_t       base;
 302 
 303         base = (uintptr_t)dp->ssd_lobase |
 304             (uintptr_t)dp->ssd_midbase << 16 |
 305             (uintptr_t)dp->ssd_hibase << (16 + 8);
 306         return ((void *)base);
 307 }
 308 
 309 #endif  /* __i386 */
 310 
 311 /*
 312  * Install gate segment descriptor for interrupt, trap, call and task gates.
 313  */
 314 
 315 #if defined(__amd64)
 316 
 317 /*ARGSUSED*/
 318 void
 319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 320     uint_t type, uint_t dpl, uint_t vector)
 321 {
 322         dp->sgd_looffset = (uintptr_t)func;
 323         dp->sgd_hioffset = (uintptr_t)func >> 16;
 324         dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
 325 
 326         dp->sgd_selector =  (uint16_t)sel;
 327 
 328         /*
 329          * For 64 bit native we use the IST stack mechanism
 330          * for double faults. All other traps use the CPL = 0
 331          * (tss_rsp0) stack.
 332          */
 333 #if !defined(__xpv)
 334         if (vector == T_DBLFLT)
 335                 dp->sgd_ist = 1;
 336         else
 337 #endif
 338                 dp->sgd_ist = 0;
 339 
 340         dp->sgd_type = type;
 341         dp->sgd_dpl = dpl;
 342         dp->sgd_p = 1;
 343 }
 344 
 345 #elif defined(__i386)
 346 
 347 /*ARGSUSED*/
 348 void
 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 350     uint_t type, uint_t dpl, uint_t unused)
 351 {
 352         dp->sgd_looffset = (uintptr_t)func;
 353         dp->sgd_hioffset = (uintptr_t)func >> 16;
 354 
 355         dp->sgd_selector =  (uint16_t)sel;
 356         dp->sgd_stkcpy = 0;  /* always zero bytes */
 357         dp->sgd_type = type;
 358         dp->sgd_dpl = dpl;
 359         dp->sgd_p = 1;
 360 }
 361 
 362 #endif  /* __i386 */
 363 
 364 /*
 365  * Updates a single user descriptor in the the GDT of the current cpu.
 366  * Caller is responsible for preventing cpu migration.
 367  */
 368 
 369 void
 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 371 {
 372 #if defined(__xpv)
 373 
 374         uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 375 
 376         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 377                 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 378 
 379 #else   /* __xpv */
 380 
 381         CPU->cpu_gdt[sidx] = *udp;
 382 
 383 #endif  /* __xpv */
 384 }
 385 
 386 /*
 387  * Writes single descriptor pointed to by udp into a processes
 388  * LDT entry pointed to by ldp.
 389  */
 390 int
 391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
 392 {
 393 #if defined(__xpv)
 394 
 395         uint64_t dpa;
 396 
 397         dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
 398             ((uintptr_t)ldp & PAGEOFFSET);
 399 
 400         /*
 401          * The hypervisor is a little more restrictive about what it
 402          * supports in the LDT.
 403          */
 404         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
 405                 return (EINVAL);
 406 
 407 #else   /* __xpv */
 408 
 409         *ldp = *udp;
 410 
 411 #endif  /* __xpv */
 412         return (0);
 413 }
 414 
 415 #if defined(__xpv)
 416 
 417 /*
 418  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
 419  * Returns true if a valid entry was written.
 420  */
 421 int
 422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
 423 {
 424         trap_info_t *ti = ti_arg;       /* XXPV Aargh - segments.h comment */
 425 
 426         /*
 427          * skip holes in the IDT
 428          */
 429         if (GATESEG_GETOFFSET(sgd) == 0)
 430                 return (0);
 431 
 432         ASSERT(sgd->sgd_type == SDT_SYSIGT);
 433         ti->vector = vec;
 434         TI_SET_DPL(ti, sgd->sgd_dpl);
 435 
 436         /*
 437          * Is this an interrupt gate?
 438          */
 439         if (sgd->sgd_type == SDT_SYSIGT) {
 440                 /* LINTED */
 441                 TI_SET_IF(ti, 1);
 442         }
 443         ti->cs = sgd->sgd_selector;
 444 #if defined(__amd64)
 445         ti->cs |= SEL_KPL;   /* force into ring 3. see KCS_SEL  */
 446 #endif
 447         ti->address = GATESEG_GETOFFSET(sgd);
 448         return (1);
 449 }
 450 
 451 /*
 452  * Convert a single hw format gate descriptor and write it into our virtual IDT.
 453  */
 454 void
 455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
 456 {
 457         trap_info_t trapinfo[2];
 458 
 459         bzero(trapinfo, sizeof (trapinfo));
 460         if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
 461                 return;
 462         if (xen_set_trap_table(trapinfo) != 0)
 463                 panic("xen_idt_write: xen_set_trap_table() failed");
 464 }
 465 
 466 #endif  /* __xpv */
 467 
 468 #if defined(__amd64)
 469 
 470 /*
 471  * Build kernel GDT.
 472  */
 473 
 474 static void
 475 init_gdt_common(user_desc_t *gdt)
 476 {
 477         int i;
 478 
 479         /*
 480          * 64-bit kernel code segment.
 481          */
 482         set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
 483             SDP_PAGES, SDP_OP32);
 484 
 485         /*
 486          * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
 487          * mode, but we set it here to 0xFFFF so that we can use the SYSRET
 488          * instruction to return from system calls back to 32-bit applications.
 489          * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
 490          * descriptors. We therefore must ensure that the kernel uses something,
 491          * though it will be ignored by hardware, that is compatible with 32-bit
 492          * apps. For the same reason we must set the default op size of this
 493          * descriptor to 32-bit operands.
 494          */
 495         set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 496             SEL_KPL, SDP_PAGES, SDP_OP32);
 497         gdt[GDT_KDATA].usd_def32 = 1;
 498 
 499         /*
 500          * 64-bit user code segment.
 501          */
 502         set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
 503             SDP_PAGES, SDP_OP32);
 504 
 505         /*
 506          * 32-bit user code segment.
 507          */
 508         set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
 509             SEL_UPL, SDP_PAGES, SDP_OP32);
 510 
 511         /*
 512          * See gdt_ucode32() and gdt_ucode_native().
 513          */
 514         ucs_on = ucs_off = gdt[GDT_UCODE];
 515         ucs_off.usd_p = 0;      /* forces #np fault */
 516 
 517         ucs32_on = ucs32_off = gdt[GDT_U32CODE];
 518         ucs32_off.usd_p = 0;    /* forces #np fault */
 519 
 520         /*
 521          * 32 and 64 bit data segments can actually share the same descriptor.
 522          * In long mode only the present bit is checked but all other fields
 523          * are loaded. But in compatibility mode all fields are interpreted
 524          * as in legacy mode so they must be set correctly for a 32-bit data
 525          * segment.
 526          */
 527         set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
 528             SDP_PAGES, SDP_OP32);
 529 
 530 #if !defined(__xpv)
 531 
 532         /*
 533          * The 64-bit kernel has no default LDT. By default, the LDT descriptor
 534          * in the GDT is 0.
 535          */
 536 
 537         /*
 538          * Kernel TSS
 539          */
 540         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 541             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 542 
 543 #endif  /* !__xpv */
 544 
 545         /*
 546          * Initialize fs and gs descriptors for 32 bit processes.
 547          * Only attributes and limits are initialized, the effective
 548          * base address is programmed via fsbase/gsbase.
 549          */
 550         set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 551             SEL_UPL, SDP_PAGES, SDP_OP32);
 552         set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 553             SEL_UPL, SDP_PAGES, SDP_OP32);
 554 
 555         /*
 556          * Initialize the descriptors set aside for brand usage.
 557          * Only attributes and limits are initialized.
 558          */
 559         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 560                 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 561                     SEL_UPL, SDP_PAGES, SDP_OP32);
 562 
 563         /*
 564          * Initialize convenient zero base user descriptors for clearing
 565          * lwp private %fs and %gs descriptors in GDT. See setregs() for
 566          * an example.
 567          */
 568         set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
 569             SDP_BYTES, SDP_OP32);
 570         set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
 571             SDP_PAGES, SDP_OP32);
 572 }
 573 
 574 #if defined(__xpv)
 575 
 576 static user_desc_t *
 577 init_gdt(void)
 578 {
 579         uint64_t gdtpa;
 580         ulong_t ma[1];          /* XXPV should be a memory_t */
 581         ulong_t addr;
 582 
 583 #if !defined(__lint)
 584         /*
 585          * Our gdt is never larger than a single page.
 586          */
 587         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 588 #endif
 589         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 590             PAGESIZE, PAGESIZE);
 591         bzero(gdt0, PAGESIZE);
 592 
 593         init_gdt_common(gdt0);
 594 
 595         /*
 596          * XXX Since we never invoke kmdb until after the kernel takes
 597          * over the descriptor tables why not have it use the kernel's
 598          * selectors?
 599          */
 600         if (boothowto & RB_DEBUG) {
 601                 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 602                     SEL_KPL, SDP_PAGES, SDP_OP32);
 603                 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
 604                     SEL_KPL, SDP_PAGES, SDP_OP32);
 605         }
 606 
 607         /*
 608          * Clear write permission for page containing the gdt and install it.
 609          */
 610         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 611         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 612         kbm_read_only((uintptr_t)gdt0, gdtpa);
 613         xen_set_gdt(ma, NGDT);
 614 
 615         /*
 616          * Reload the segment registers to use the new GDT.
 617          * On 64-bit, fixup KCS_SEL to be in ring 3.
 618          * See KCS_SEL in segments.h.
 619          */
 620         load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
 621 
 622         /*
 623          *  setup %gs for kernel
 624          */
 625         xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
 626 
 627         /*
 628          * XX64 We should never dereference off "other gsbase" or
 629          * "fsbase".  So, we should arrange to point FSBASE and
 630          * KGSBASE somewhere truly awful e.g. point it at the last
 631          * valid address below the hole so that any attempts to index
 632          * off them cause an exception.
 633          *
 634          * For now, point it at 8G -- at least it should be unmapped
 635          * until some 64-bit processes run.
 636          */
 637         addr = 0x200000000ul;
 638         xen_set_segment_base(SEGBASE_FS, addr);
 639         xen_set_segment_base(SEGBASE_GS_USER, addr);
 640         xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
 641 
 642         return (gdt0);
 643 }
 644 
 645 #else   /* __xpv */
 646 
 647 static user_desc_t *
 648 init_gdt(void)
 649 {
 650         desctbr_t       r_bgdt, r_gdt;
 651         user_desc_t     *bgdt;
 652 
 653 #if !defined(__lint)
 654         /*
 655          * Our gdt is never larger than a single page.
 656          */
 657         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 658 #endif
 659         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 660             PAGESIZE, PAGESIZE);
 661         bzero(gdt0, PAGESIZE);
 662 
 663         init_gdt_common(gdt0);
 664 
 665         /*
 666          * Copy in from boot's gdt to our gdt.
 667          * Entry 0 is the null descriptor by definition.
 668          */
 669         rd_gdtr(&r_bgdt);
 670         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 671         if (bgdt == NULL)
 672                 panic("null boot gdt");
 673 
 674         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 675         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 676         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 677         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 678         gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
 679 
 680         /*
 681          * Install our new GDT
 682          */
 683         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 684         r_gdt.dtr_base = (uintptr_t)gdt0;
 685         wr_gdtr(&r_gdt);
 686 
 687         /*
 688          * Reload the segment registers to use the new GDT
 689          */
 690         load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 691 
 692         /*
 693          *  setup %gs for kernel
 694          */
 695         wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
 696 
 697         /*
 698          * XX64 We should never dereference off "other gsbase" or
 699          * "fsbase".  So, we should arrange to point FSBASE and
 700          * KGSBASE somewhere truly awful e.g. point it at the last
 701          * valid address below the hole so that any attempts to index
 702          * off them cause an exception.
 703          *
 704          * For now, point it at 8G -- at least it should be unmapped
 705          * until some 64-bit processes run.
 706          */
 707         wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
 708         wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
 709         return (gdt0);
 710 }
 711 
 712 #endif  /* __xpv */
 713 
 714 #elif defined(__i386)
 715 
 716 static void
 717 init_gdt_common(user_desc_t *gdt)
 718 {
 719         int i;
 720 
 721         /*
 722          * Text and data for both kernel and user span entire 32 bit
 723          * address space.
 724          */
 725 
 726         /*
 727          * kernel code segment.
 728          */
 729         set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
 730             SDP_OP32);
 731 
 732         /*
 733          * kernel data segment.
 734          */
 735         set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
 736             SDP_OP32);
 737 
 738         /*
 739          * user code segment.
 740          */
 741         set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
 742             SDP_OP32);
 743 
 744         /*
 745          * user data segment.
 746          */
 747         set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
 748             SDP_OP32);
 749 
 750 #if !defined(__xpv)
 751 
 752         /*
 753          * TSS for T_DBLFLT (double fault) handler
 754          */
 755         set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
 756             sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
 757 
 758         /*
 759          * TSS for kernel
 760          */
 761         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 762             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 763 
 764 #endif  /* !__xpv */
 765 
 766         /*
 767          * %gs selector for kernel
 768          */
 769         set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
 770             SEL_KPL, SDP_BYTES, SDP_OP32);
 771 
 772         /*
 773          * Initialize lwp private descriptors.
 774          * Only attributes and limits are initialized, the effective
 775          * base address is programmed via fsbase/gsbase.
 776          */
 777         set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 778             SDP_PAGES, SDP_OP32);
 779         set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 780             SDP_PAGES, SDP_OP32);
 781 
 782         /*
 783          * Initialize the descriptors set aside for brand usage.
 784          * Only attributes and limits are initialized.
 785          */
 786         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 787                 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 788                     SDP_PAGES, SDP_OP32);
 789         /*
 790          * Initialize convenient zero base user descriptor for clearing
 791          * lwp  private %fs and %gs descriptors in GDT. See setregs() for
 792          * an example.
 793          */
 794         set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
 795             SDP_BYTES, SDP_OP32);
 796 }
 797 
 798 #if defined(__xpv)
 799 
 800 static user_desc_t *
 801 init_gdt(void)
 802 {
 803         uint64_t gdtpa;
 804         ulong_t ma[1];          /* XXPV should be a memory_t */
 805 
 806 #if !defined(__lint)
 807         /*
 808          * Our gdt is never larger than a single page.
 809          */
 810         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 811 #endif
 812         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 813             PAGESIZE, PAGESIZE);
 814         bzero(gdt0, PAGESIZE);
 815 
 816         init_gdt_common(gdt0);
 817         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 818 
 819         /*
 820          * XXX Since we never invoke kmdb until after the kernel takes
 821          * over the descriptor tables why not have it use the kernel's
 822          * selectors?
 823          */
 824         if (boothowto & RB_DEBUG) {
 825                 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
 826                     SDP_PAGES, SDP_OP32);
 827                 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
 828                     SDP_PAGES, SDP_OP32);
 829         }
 830 
 831         /*
 832          * Clear write permission for page containing the gdt and install it.
 833          */
 834         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 835         kbm_read_only((uintptr_t)gdt0, gdtpa);
 836         xen_set_gdt(ma, NGDT);
 837 
 838         /*
 839          * Reload the segment registers to use the new GDT
 840          */
 841         load_segment_registers(
 842             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 843 
 844         return (gdt0);
 845 }
 846 
 847 #else   /* __xpv */
 848 
 849 static user_desc_t *
 850 init_gdt(void)
 851 {
 852         desctbr_t       r_bgdt, r_gdt;
 853         user_desc_t     *bgdt;
 854 
 855 #if !defined(__lint)
 856         /*
 857          * Our gdt is never larger than a single page.
 858          */
 859         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 860 #endif
 861         /*
 862          * XXX this allocation belongs in our caller, not here.
 863          */
 864         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 865             PAGESIZE, PAGESIZE);
 866         bzero(gdt0, PAGESIZE);
 867 
 868         init_gdt_common(gdt0);
 869 
 870         /*
 871          * Copy in from boot's gdt to our gdt entries.
 872          * Entry 0 is null descriptor by definition.
 873          */
 874         rd_gdtr(&r_bgdt);
 875         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 876         if (bgdt == NULL)
 877                 panic("null boot gdt");
 878 
 879         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 880         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 881         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 882         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 883 
 884         /*
 885          * Install our new GDT
 886          */
 887         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 888         r_gdt.dtr_base = (uintptr_t)gdt0;
 889         wr_gdtr(&r_gdt);
 890 
 891         /*
 892          * Reload the segment registers to use the new GDT
 893          */
 894         load_segment_registers(
 895             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 896 
 897         return (gdt0);
 898 }
 899 
 900 #endif  /* __xpv */
 901 #endif  /* __i386 */
 902 
 903 /*
 904  * Build kernel IDT.
 905  *
 906  * Note that for amd64 we pretty much require every gate to be an interrupt
 907  * gate which blocks interrupts atomically on entry; that's because of our
 908  * dependency on using 'swapgs' every time we come into the kernel to find
 909  * the cpu structure. If we get interrupted just before doing that, %cs could
 910  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 911  * %gsbase is really still pointing at something in userland. Bad things will
 912  * ensue. We also use interrupt gates for i386 as well even though this is not
 913  * required for some traps.
 914  *
 915  * Perhaps they should have invented a trap gate that does an atomic swapgs?
 916  */
 917 static void
 918 init_idt_common(gate_desc_t *idt)
 919 {
 920         set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 921             0);
 922         set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 923             0);
 924         set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 925             0);
 926         set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 927             0);
 928         set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 929             0);
 930         set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
 931             TRP_KPL, 0);
 932         set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 933             0);
 934         set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
 935             0);
 936 
 937         /*
 938          * double fault handler.
 939          *
 940          * Note that on the hypervisor a guest does not receive #df faults.
 941          * Instead a failsafe event is injected into the guest if its selectors
 942          * and/or stack is in a broken state. See xen_failsafe_callback.
 943          */
 944 #if !defined(__xpv)
 945 #if defined(__amd64)
 946 
 947         set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 948             T_DBLFLT);
 949 
 950 #elif defined(__i386)
 951 
 952         /*
 953          * task gate required.
 954          */
 955         set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
 956             0);
 957 
 958 #endif  /* __i386 */
 959 #endif  /* !__xpv */
 960 
 961         /*
 962          * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 963          */
 964 
 965         set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 966             0);
 967         set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 968             0);
 969         set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 970         set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 971         set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 972         set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 973             0);
 974         set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
 975             TRP_KPL, 0);
 976         set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 977         set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 978 
 979         /*
 980          * install "int80" handler at, well, 0x80.
 981          */
 982         set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 983             0);
 984 
 985         /*
 986          * install fast trap handler at 210.
 987          */
 988         set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 989             0);
 990 
 991         /*
 992          * System call handler.
 993          */
 994 #if defined(__amd64)
 995         set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
 996             TRP_UPL, 0);
 997 
 998 #elif defined(__i386)
 999         set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
1000             TRP_UPL, 0);
1001 #endif  /* __i386 */
1002 
1003         /*
1004          * Install the DTrace interrupt handler for the pid provider.
1005          */
1006         set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1007             SDT_SYSIGT, TRP_UPL, 0);
1008 
1009         /*
1010 -        * Prepare interposing descriptors for the branded "int80"
1011 -        * and syscall handlers and cache copies of the default
1012 -        * descriptors.
1013          */
1014         brand_tbl[0].ih_inum = T_INT80;
1015         brand_tbl[0].ih_default_desc = idt0[T_INT80];
1016         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1017             SDT_SYSIGT, TRP_UPL, 0);
1018 
1019         brand_tbl[1].ih_inum = T_SYSCALLINT;
1020         brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
1021 
1022 #if defined(__amd64)
1023         set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1024             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1025 #elif defined(__i386)
1026         set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1027             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1028 #endif  /* __i386 */
1029 
1030         brand_tbl[2].ih_inum = 0;
1031 }
1032 
1033 #if defined(__xpv)
1034 
1035 static void
1036 init_idt(gate_desc_t *idt)
1037 {
1038         init_idt_common(idt);
1039 }
1040 
1041 #else   /* __xpv */
1042 
1043 static void
1044 init_idt(gate_desc_t *idt)
1045 {
1046         char    ivctname[80];
1047         void    (*ivctptr)(void);
1048         int     i;
1049 
1050         /*
1051          * Initialize entire table with 'reserved' trap and then overwrite
1052          * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1053          * since it can only be generated on a 386 processor. 15 is also
1054          * unsupported and reserved.
1055          */
1056         for (i = 0; i < NIDT; i++)
1057                 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1058                     0);
1059 
1060         /*
1061          * 20-31 reserved
1062          */
1063         for (i = 20; i < 32; i++)
1064                 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1065                     0);
1066 
1067         /*
1068          * interrupts 32 - 255
1069          */
1070         for (i = 32; i < 256; i++) {
1071                 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1072                 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1073                 if (ivctptr == NULL)
1074                         panic("kobj_getsymvalue(%s) failed", ivctname);
1075 
1076                 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1077         }
1078 
1079         /*
1080          * Now install the common ones. Note that it will overlay some
1081          * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1082          */
1083         init_idt_common(idt);
1084 }
1085 
1086 #endif  /* __xpv */
1087 
1088 /*
1089  * The kernel does not deal with LDTs unless a user explicitly creates
1090  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1091  * to reference the LDT will therefore cause a #gp. System calls made via the
1092  * obsolete lcall mechanism are emulated by the #gp fault handler.
1093  */
1094 static void
1095 init_ldt(void)
1096 {
1097 #if defined(__xpv)
1098         xen_set_ldt(NULL, 0);
1099 #else
1100         wr_ldtr(0);
1101 #endif
1102 }
1103 
1104 #if !defined(__xpv)
1105 #if defined(__amd64)
1106 
1107 static void
1108 init_tss(void)
1109 {
1110         /*
1111          * tss_rsp0 is dynamically filled in by resume() on each context switch.
1112          * All exceptions but #DF will run on the thread stack.
1113          * Set up the double fault stack here.
1114          */
1115         ktss0->tss_ist1 =
1116             (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1117 
1118         /*
1119          * Set I/O bit map offset equal to size of TSS segment limit
1120          * for no I/O permission map. This will force all user I/O
1121          * instructions to generate #gp fault.
1122          */
1123         ktss0->tss_bitmapbase = sizeof (*ktss0);
1124 
1125         /*
1126          * Point %tr to descriptor for ktss0 in gdt.
1127          */
1128         wr_tsr(KTSS_SEL);
1129 }
1130 
1131 #elif defined(__i386)
1132 
1133 static void
1134 init_tss(void)
1135 {
1136         /*
1137          * ktss0->tss_esp dynamically filled in by resume() on each
1138          * context switch.
1139          */
1140         ktss0->tss_ss0       = KDS_SEL;
1141         ktss0->tss_eip       = (uint32_t)_start;
1142         ktss0->tss_ds        = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1143         ktss0->tss_cs        = KCS_SEL;
1144         ktss0->tss_fs        = KFS_SEL;
1145         ktss0->tss_gs        = KGS_SEL;
1146         ktss0->tss_ldt       = ULDT_SEL;
1147 
1148         /*
1149          * Initialize double fault tss.
1150          */
1151         dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1152         dftss0->tss_ss0      = KDS_SEL;
1153 
1154         /*
1155          * tss_cr3 will get initialized in hat_kern_setup() once our page
1156          * tables have been setup.
1157          */
1158         dftss0->tss_eip      = (uint32_t)syserrtrap;
1159         dftss0->tss_esp      = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1160         dftss0->tss_cs       = KCS_SEL;
1161         dftss0->tss_ds       = KDS_SEL;
1162         dftss0->tss_es       = KDS_SEL;
1163         dftss0->tss_ss       = KDS_SEL;
1164         dftss0->tss_fs       = KFS_SEL;
1165         dftss0->tss_gs       = KGS_SEL;
1166 
1167         /*
1168          * Set I/O bit map offset equal to size of TSS segment limit
1169          * for no I/O permission map. This will force all user I/O
1170          * instructions to generate #gp fault.
1171          */
1172         ktss0->tss_bitmapbase = sizeof (*ktss0);
1173 
1174         /*
1175          * Point %tr to descriptor for ktss0 in gdt.
1176          */
1177         wr_tsr(KTSS_SEL);
1178 }
1179 
1180 #endif  /* __i386 */
1181 #endif  /* !__xpv */
1182 
1183 #if defined(__xpv)
1184 
1185 void
1186 init_desctbls(void)
1187 {
1188         uint_t vec;
1189         user_desc_t *gdt;
1190 
1191         /*
1192          * Setup and install our GDT.
1193          */
1194         gdt = init_gdt();
1195 
1196         /*
1197          * Store static pa of gdt to speed up pa_to_ma() translations
1198          * on lwp context switches.
1199          */
1200         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1201         CPU->cpu_gdt = gdt;
1202         CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1203 
1204         /*
1205          * Setup and install our IDT.
1206          */
1207 #if !defined(__lint)
1208         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1209 #endif
1210         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1211             PAGESIZE, PAGESIZE);
1212         bzero(idt0, PAGESIZE);
1213         init_idt(idt0);
1214         for (vec = 0; vec < NIDT; vec++)
1215                 xen_idt_write(&idt0[vec], vec);
1216 
1217         CPU->cpu_idt = idt0;
1218 
1219         /*
1220          * set default kernel stack
1221          */
1222         xen_stack_switch(KDS_SEL,
1223             (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1224 
1225         xen_init_callbacks();
1226 
1227         init_ldt();
1228 }
1229 
1230 #else   /* __xpv */
1231 
1232 void
1233 init_desctbls(void)
1234 {
1235         user_desc_t *gdt;
1236         desctbr_t idtr;
1237 
1238         /*
1239          * Allocate IDT and TSS structures on unique pages for better
1240          * performance in virtual machines.
1241          */
1242 #if !defined(__lint)
1243         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1244 #endif
1245         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1246             PAGESIZE, PAGESIZE);
1247         bzero(idt0, PAGESIZE);
1248 #if !defined(__lint)
1249         ASSERT(sizeof (*ktss0) <= PAGESIZE);
1250 #endif
1251         ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1252             PAGESIZE, PAGESIZE);
1253         bzero(ktss0, PAGESIZE);
1254 
1255 #if defined(__i386)
1256 #if !defined(__lint)
1257         ASSERT(sizeof (*dftss0) <= PAGESIZE);
1258 #endif
1259         dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1260             PAGESIZE, PAGESIZE);
1261         bzero(dftss0, PAGESIZE);
1262 #endif
1263 
1264         /*
1265          * Setup and install our GDT.
1266          */
1267         gdt = init_gdt();
1268         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1269         CPU->cpu_gdt = gdt;
1270 
1271         /*
1272          * Setup and install our IDT.
1273          */
1274         init_idt(idt0);
1275 
1276         idtr.dtr_base = (uintptr_t)idt0;
1277         idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1278         wr_idtr(&idtr);
1279         CPU->cpu_idt = idt0;
1280 
1281 #if defined(__i386)
1282         /*
1283          * We maintain a description of idt0 in convenient IDTR format
1284          * for #pf's on some older pentium processors. See pentium_pftrap().
1285          */
1286         idt0_default_r = idtr;
1287 #endif  /* __i386 */
1288 
1289         init_tss();
1290         CPU->cpu_tss = ktss0;
1291         init_ldt();
1292 }
1293 
1294 #endif  /* __xpv */
1295 
1296 /*
1297  * In the early kernel, we need to set up a simple GDT to run on.
1298  *
1299  * XXPV Can dboot use this too?  See dboot_gdt.s
1300  */
1301 void
1302 init_boot_gdt(user_desc_t *bgdt)
1303 {
1304 #if defined(__amd64)
1305         set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1306             SDP_PAGES, SDP_OP32);
1307         set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1308             SDP_PAGES, SDP_OP32);
1309 #elif defined(__i386)
1310         set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1311             SDP_PAGES, SDP_OP32);
1312         set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1313             SDP_PAGES, SDP_OP32);
1314 #endif  /* __i386 */
1315 }
1316 
1317 /*
1318  * Enable interpositioning on the system call path by rewriting the
1319  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1320  * the branded entry points.
1321  */
1322 void
1323 brand_interpositioning_enable(void)
1324 {
1325         gate_desc_t     *idt = CPU->cpu_idt;
1326         int             i;
1327 
1328         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1329 
1330         for (i = 0; brand_tbl[i].ih_inum; i++) {
1331                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1332 #if defined(__xpv)
1333                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1334                     brand_tbl[i].ih_inum);
1335 #endif
1336         }
1337 
1338 #if defined(__amd64)
1339 #if defined(__xpv)
1340 
1341         /*
1342          * Currently the hypervisor only supports 64-bit syscalls via
1343          * syscall instruction. The 32-bit syscalls are handled by
1344          * interrupt gate above.
1345          */
1346         xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1347             CALLBACKF_mask_events);
1348 
1349 #else
1350 
1351         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1352                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1353                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1354         }
1355 
1356 #endif
1357 #endif  /* __amd64 */
1358 
1359         if (is_x86_feature(x86_featureset, X86FSET_SEP))
1360                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1361 }
1362 
1363 /*
1364  * Disable interpositioning on the system call path by rewriting the
1365  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1366  * the standard entry points, which bypass the interpositioning hooks.
1367  */
1368 void
1369 brand_interpositioning_disable(void)
1370 {
1371         gate_desc_t     *idt = CPU->cpu_idt;
1372         int i;
1373 
1374         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1375 
1376         for (i = 0; brand_tbl[i].ih_inum; i++) {
1377                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1378 #if defined(__xpv)
1379                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1380                     brand_tbl[i].ih_inum);
1381 #endif
1382         }
1383 
1384 #if defined(__amd64)
1385 #if defined(__xpv)
1386 
1387         /*
1388          * See comment above in brand_interpositioning_enable.
1389          */
1390         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1391             CALLBACKF_mask_events);
1392 
1393 #else
1394 
1395         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1396                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1397                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1398         }
1399 
1400 #endif
1401 #endif  /* __amd64 */
1402 
1403         if (is_x86_feature(x86_featureset, X86FSET_SEP))
1404                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1405 }