8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2011 Joyent, Inc. All rights reserved.
  28  */
  29 
  30 /*
  31  * Copyright (c) 1992 Terrence R. Lambert.
  32  * Copyright (c) 1990 The Regents of the University of California.
  33  * All rights reserved.
  34  *
  35  * This code is derived from software contributed to Berkeley by
  36  * William Jolitz.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  67  */
  68 
  69 #include <sys/types.h>
  70 #include <sys/sysmacros.h>
  71 #include <sys/tss.h>
  72 #include <sys/segments.h>
  73 #include <sys/trap.h>
  74 #include <sys/cpuvar.h>
  75 #include <sys/bootconf.h>
  76 #include <sys/x86_archext.h>
  77 #include <sys/controlregs.h>
  78 #include <sys/archsystm.h>
  79 #include <sys/machsystm.h>
  80 #include <sys/kobj.h>
  81 #include <sys/cmn_err.h>
  82 #include <sys/reboot.h>
  83 #include <sys/kdi.h>
  84 #include <sys/mach_mmu.h>
  85 #include <sys/systm.h>

  86 
  87 #ifdef __xpv
  88 #include <sys/hypervisor.h>
  89 #include <vm/as.h>
  90 #endif
  91 
  92 #include <sys/promif.h>
  93 #include <sys/bootinfo.h>
  94 #include <vm/kboot_mmu.h>
  95 #include <vm/hat_pte.h>
  96 
  97 /*
  98  * cpu0 and default tables and structures.
  99  */
 100 user_desc_t     *gdt0;
 101 #if !defined(__xpv)
 102 desctbr_t       gdt0_default_r;
 103 #endif
 104 
 105 gate_desc_t     *idt0;          /* interrupt descriptor table */
 106 #if defined(__i386)
 107 desctbr_t       idt0_default_r;         /* describes idt0 in IDTR format */
 108 #endif
 109 
 110 tss_t           *ktss0;                 /* kernel task state structure */
 111 
 112 #if defined(__i386)
 113 tss_t           *dftss0;                /* #DF double-fault exception */
 114 #endif  /* __i386 */
 115 
 116 user_desc_t     zero_udesc;             /* base zero user desc native procs */
 117 user_desc_t     null_udesc;             /* null user descriptor */
 118 system_desc_t   null_sdesc;             /* null system descriptor */
 119 
 120 #if defined(__amd64)
 121 user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 122 #endif  /* __amd64 */
 123 
 124 #if defined(__amd64)
 125 user_desc_t     ucs_on;
 126 user_desc_t     ucs_off;
 127 user_desc_t     ucs32_on;
 128 user_desc_t     ucs32_off;
 129 #endif  /* __amd64 */
 130 
 131 #pragma align   16(dblfault_stack0)
 132 char            dblfault_stack0[DEFAULTSTKSZ];





 133 
 134 extern void     fast_null(void);
 135 extern hrtime_t get_hrtime(void);
 136 extern hrtime_t gethrvtime(void);
 137 extern hrtime_t get_hrestime(void);
 138 extern uint64_t getlgrp(void);
 139 
 140 void (*(fasttable[]))(void) = {
 141         fast_null,                      /* T_FNULL routine */
 142         fast_null,                      /* T_FGETFP routine (initially null) */
 143         fast_null,                      /* T_FSETFP routine (initially null) */
 144         (void (*)())get_hrtime,         /* T_GETHRTIME */
 145         (void (*)())gethrvtime,         /* T_GETHRVTIME */
 146         (void (*)())get_hrestime,       /* T_GETHRESTIME */
 147         (void (*)())getlgrp             /* T_GETLGRP */
 148 };
 149 
 150 /*
 151  * Structure containing pre-computed descriptors to allow us to temporarily
 152  * interpose on a standard handler.
 153  */
 154 struct interposing_handler {
 155         int ih_inum;
 156         gate_desc_t ih_interp_desc;
 157         gate_desc_t ih_default_desc;
 158 };
 159 
 160 /*
 161  * The brand infrastructure interposes on two handlers, and we use one as a
 162  * NULL signpost.
 163  */
 164 static struct interposing_handler brand_tbl[2];
 165 
 166 /*
 167  * software prototypes for default local descriptor table
 168  */
 169 
 170 /*
 171  * Routines for loading segment descriptors in format the hardware
 172  * can understand.
 173  */
 174 
 175 #if defined(__amd64)
 176 
 177 /*
 178  * In long mode we have the new L or long mode attribute bit
 179  * for code segments. Only the conforming bit in type is used along
 180  * with descriptor priority and present bits. Default operand size must
 181  * be zero when in long mode. In 32-bit compatibility mode all fields
 182  * are treated as in legacy mode. For data segments while in long mode
 183  * only the present bit is loaded.
 184  */
 185 void
 186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
 187     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
 188 {
 189         ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
 190 
 191         /*
 192          * 64-bit long mode.
 193          */
 194         if (lmode == SDP_LONG)
 195                 dp->usd_def32 = 0;           /* 32-bit operands only */
 196         else
 197                 /*
 198                  * 32-bit compatibility mode.
 199                  */
 200                 dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32-bit ops */
 201 
 202         dp->usd_long = lmode;        /* 64-bit mode */
 203         dp->usd_type = type;
 204         dp->usd_dpl = dpl;
 205         dp->usd_p = 1;
 206         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 207 
 208         dp->usd_lobase = (uintptr_t)base;
 209         dp->usd_midbase = (uintptr_t)base >> 16;
 210         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 211         dp->usd_lolimit = size;
 212         dp->usd_hilimit = (uintptr_t)size >> 16;
 213 }
 214 
 215 #elif defined(__i386)
 216 
 217 /*
 218  * Install user segment descriptor for code and data.
 219  */
 220 void
 221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
 222     uint_t dpl, uint_t gran, uint_t defopsz)
 223 {
 224         dp->usd_lolimit = size;
 225         dp->usd_hilimit = (uintptr_t)size >> 16;
 226 
 227         dp->usd_lobase = (uintptr_t)base;
 228         dp->usd_midbase = (uintptr_t)base >> 16;
 229         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 230 
 231         dp->usd_type = type;
 232         dp->usd_dpl = dpl;
 233         dp->usd_p = 1;
 234         dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32 bit operands */
 235         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 236 }
 237 
 238 #endif  /* __i386 */
 239 
 240 /*
 241  * Install system segment descriptor for LDT and TSS segments.
 242  */
 243 
 244 #if defined(__amd64)
 245 
 246 void
 247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 248     uint_t dpl)
 249 {
 250         dp->ssd_lolimit = size;
 251         dp->ssd_hilimit = (uintptr_t)size >> 16;
 252 
 253         dp->ssd_lobase = (uintptr_t)base;
 254         dp->ssd_midbase = (uintptr_t)base >> 16;
 255         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 256         dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
 257 
 258         dp->ssd_type = type;
 259         dp->ssd_zero1 = 0;   /* must be zero */
 260         dp->ssd_zero2 = 0;
 261         dp->ssd_dpl = dpl;
 262         dp->ssd_p = 1;
 263         dp->ssd_gran = 0;    /* force byte units */
 264 }
 265 
 266 void *
 267 get_ssd_base(system_desc_t *dp)
 268 {
 269         uintptr_t       base;
 270 
 271         base = (uintptr_t)dp->ssd_lobase |
 272             (uintptr_t)dp->ssd_midbase << 16 |
 273             (uintptr_t)dp->ssd_hibase << (16 + 8) |
 274             (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
 275         return ((void *)base);
 276 }
 277 
 278 #elif defined(__i386)
 279 
 280 void
 281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 282     uint_t dpl)
 283 {
 284         dp->ssd_lolimit = size;
 285         dp->ssd_hilimit = (uintptr_t)size >> 16;
 286 
 287         dp->ssd_lobase = (uintptr_t)base;
 288         dp->ssd_midbase = (uintptr_t)base >> 16;
 289         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 290 
 291         dp->ssd_type = type;
 292         dp->ssd_zero = 0;    /* must be zero */
 293         dp->ssd_dpl = dpl;
 294         dp->ssd_p = 1;
 295         dp->ssd_gran = 0;    /* force byte units */
 296 }
 297 
 298 void *
 299 get_ssd_base(system_desc_t *dp)
 300 {
 301         uintptr_t       base;
 302 
 303         base = (uintptr_t)dp->ssd_lobase |
 304             (uintptr_t)dp->ssd_midbase << 16 |
 305             (uintptr_t)dp->ssd_hibase << (16 + 8);
 306         return ((void *)base);
 307 }
 308 
 309 #endif  /* __i386 */
 310 
 311 /*
 312  * Install gate segment descriptor for interrupt, trap, call and task gates.














 313  */
 314 
 315 #if defined(__amd64)
 316 
 317 /*ARGSUSED*/
 318 void
 319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 320     uint_t type, uint_t dpl, uint_t vector)
 321 {
 322         dp->sgd_looffset = (uintptr_t)func;
 323         dp->sgd_hioffset = (uintptr_t)func >> 16;
 324         dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);









 325 
 326         dp->sgd_selector =  (uint16_t)sel;
 327 
 328         /*
 329          * For 64 bit native we use the IST stack mechanism
 330          * for double faults. All other traps use the CPL = 0
 331          * (tss_rsp0) stack.
 332          */
 333 #if !defined(__xpv)
 334         if (vector == T_DBLFLT)
 335                 dp->sgd_ist = 1;
 336         else








 337 #endif
 338                 dp->sgd_ist = 0;
 339 
 340         dp->sgd_type = type;
 341         dp->sgd_dpl = dpl;
 342         dp->sgd_p = 1;
 343 }
 344 
 345 #elif defined(__i386)
 346 
 347 /*ARGSUSED*/
 348 void
 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 350     uint_t type, uint_t dpl, uint_t unused)
 351 {
 352         dp->sgd_looffset = (uintptr_t)func;
 353         dp->sgd_hioffset = (uintptr_t)func >> 16;
 354 
 355         dp->sgd_selector =  (uint16_t)sel;
 356         dp->sgd_stkcpy = 0;  /* always zero bytes */
 357         dp->sgd_type = type;
 358         dp->sgd_dpl = dpl;
 359         dp->sgd_p = 1;
 360 }
 361 
 362 #endif  /* __i386 */
 363 
 364 /*
 365  * Updates a single user descriptor in the the GDT of the current cpu.
 366  * Caller is responsible for preventing cpu migration.
 367  */
 368 
 369 void
 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 371 {
 372 #if defined(__xpv)
 373 
 374         uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 375 
 376         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 377                 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 378 
 379 #else   /* __xpv */
 380 
 381         CPU->cpu_gdt[sidx] = *udp;
 382 
 383 #endif  /* __xpv */
 384 }
 385 
 386 /*
 387  * Writes single descriptor pointed to by udp into a processes
 388  * LDT entry pointed to by ldp.
 389  */
 390 int
 391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
 392 {
 393 #if defined(__xpv)
 394 
 395         uint64_t dpa;
 396 
 397         dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
 398             ((uintptr_t)ldp & PAGEOFFSET);
 399 
 400         /*
 401          * The hypervisor is a little more restrictive about what it
 402          * supports in the LDT.
 403          */
 404         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
 405                 return (EINVAL);
 406 
 407 #else   /* __xpv */
 408 
 409         *ldp = *udp;
 410 
 411 #endif  /* __xpv */
 412         return (0);
 413 }
 414 
 415 #if defined(__xpv)
 416 
 417 /*
 418  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
 419  * Returns true if a valid entry was written.
 420  */
 421 int
 422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
 423 {
 424         trap_info_t *ti = ti_arg;       /* XXPV Aargh - segments.h comment */
 425 
 426         /*
 427          * skip holes in the IDT
 428          */
 429         if (GATESEG_GETOFFSET(sgd) == 0)
 430                 return (0);
 431 
 432         ASSERT(sgd->sgd_type == SDT_SYSIGT);
 433         ti->vector = vec;
 434         TI_SET_DPL(ti, sgd->sgd_dpl);
 435 
 436         /*
 437          * Is this an interrupt gate?
 438          */
 439         if (sgd->sgd_type == SDT_SYSIGT) {
 440                 /* LINTED */
 441                 TI_SET_IF(ti, 1);
 442         }
 443         ti->cs = sgd->sgd_selector;
 444 #if defined(__amd64)
 445         ti->cs |= SEL_KPL;   /* force into ring 3. see KCS_SEL  */
 446 #endif
 447         ti->address = GATESEG_GETOFFSET(sgd);
 448         return (1);
 449 }
 450 
 451 /*
 452  * Convert a single hw format gate descriptor and write it into our virtual IDT.
 453  */
 454 void
 455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
 456 {
 457         trap_info_t trapinfo[2];
 458 
 459         bzero(trapinfo, sizeof (trapinfo));
 460         if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
 461                 return;
 462         if (xen_set_trap_table(trapinfo) != 0)
 463                 panic("xen_idt_write: xen_set_trap_table() failed");
 464 }
 465 
 466 #endif  /* __xpv */
 467 
 468 #if defined(__amd64)
 469 
 470 /*
 471  * Build kernel GDT.
 472  */
 473 
 474 static void
 475 init_gdt_common(user_desc_t *gdt)
 476 {
 477         int i;
 478 
 479         /*
 480          * 64-bit kernel code segment.
 481          */
 482         set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
 483             SDP_PAGES, SDP_OP32);
 484 
 485         /*
 486          * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
 487          * mode, but we set it here to 0xFFFF so that we can use the SYSRET
 488          * instruction to return from system calls back to 32-bit applications.
 489          * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
 490          * descriptors. We therefore must ensure that the kernel uses something,
 491          * though it will be ignored by hardware, that is compatible with 32-bit
 492          * apps. For the same reason we must set the default op size of this
 493          * descriptor to 32-bit operands.
 494          */
 495         set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 496             SEL_KPL, SDP_PAGES, SDP_OP32);
 497         gdt[GDT_KDATA].usd_def32 = 1;
 498 
 499         /*
 500          * 64-bit user code segment.
 501          */
 502         set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
 503             SDP_PAGES, SDP_OP32);
 504 
 505         /*
 506          * 32-bit user code segment.
 507          */
 508         set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
 509             SEL_UPL, SDP_PAGES, SDP_OP32);
 510 
 511         /*
 512          * See gdt_ucode32() and gdt_ucode_native().
 513          */
 514         ucs_on = ucs_off = gdt[GDT_UCODE];
 515         ucs_off.usd_p = 0;      /* forces #np fault */
 516 
 517         ucs32_on = ucs32_off = gdt[GDT_U32CODE];
 518         ucs32_off.usd_p = 0;    /* forces #np fault */
 519 
 520         /*
 521          * 32 and 64 bit data segments can actually share the same descriptor.
 522          * In long mode only the present bit is checked but all other fields
 523          * are loaded. But in compatibility mode all fields are interpreted
 524          * as in legacy mode so they must be set correctly for a 32-bit data
 525          * segment.
 526          */
 527         set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
 528             SDP_PAGES, SDP_OP32);
 529 
 530 #if !defined(__xpv)
 531 
 532         /*
 533          * The 64-bit kernel has no default LDT. By default, the LDT descriptor
 534          * in the GDT is 0.
 535          */
 536 
 537         /*
 538          * Kernel TSS
 539          */
 540         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 541             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 542 
 543 #endif  /* !__xpv */
 544 
 545         /*
 546          * Initialize fs and gs descriptors for 32 bit processes.
 547          * Only attributes and limits are initialized, the effective
 548          * base address is programmed via fsbase/gsbase.
 549          */
 550         set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 551             SEL_UPL, SDP_PAGES, SDP_OP32);
 552         set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 553             SEL_UPL, SDP_PAGES, SDP_OP32);
 554 
 555         /*
 556          * Initialize the descriptors set aside for brand usage.
 557          * Only attributes and limits are initialized.
 558          */
 559         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 560                 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 561                     SEL_UPL, SDP_PAGES, SDP_OP32);
 562 
 563         /*
 564          * Initialize convenient zero base user descriptors for clearing
 565          * lwp private %fs and %gs descriptors in GDT. See setregs() for
 566          * an example.
 567          */
 568         set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
 569             SDP_BYTES, SDP_OP32);
 570         set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
 571             SDP_PAGES, SDP_OP32);
 572 }
 573 
 574 #if defined(__xpv)
 575 
 576 static user_desc_t *
 577 init_gdt(void)
 578 {
 579         uint64_t gdtpa;
 580         ulong_t ma[1];          /* XXPV should be a memory_t */
 581         ulong_t addr;
 582 
 583 #if !defined(__lint)
 584         /*
 585          * Our gdt is never larger than a single page.
 586          */
 587         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 588 #endif
 589         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 590             PAGESIZE, PAGESIZE);
 591         bzero(gdt0, PAGESIZE);
 592 
 593         init_gdt_common(gdt0);
 594 
 595         /*
 596          * XXX Since we never invoke kmdb until after the kernel takes
 597          * over the descriptor tables why not have it use the kernel's
 598          * selectors?
 599          */
 600         if (boothowto & RB_DEBUG) {
 601                 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 602                     SEL_KPL, SDP_PAGES, SDP_OP32);
 603                 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
 604                     SEL_KPL, SDP_PAGES, SDP_OP32);
 605         }
 606 
 607         /*
 608          * Clear write permission for page containing the gdt and install it.
 609          */
 610         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 611         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 612         kbm_read_only((uintptr_t)gdt0, gdtpa);
 613         xen_set_gdt(ma, NGDT);
 614 
 615         /*
 616          * Reload the segment registers to use the new GDT.
 617          * On 64-bit, fixup KCS_SEL to be in ring 3.
 618          * See KCS_SEL in segments.h.
 619          */
 620         load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
 621 
 622         /*
 623          *  setup %gs for kernel
 624          */
 625         xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
 626 
 627         /*
 628          * XX64 We should never dereference off "other gsbase" or
 629          * "fsbase".  So, we should arrange to point FSBASE and
 630          * KGSBASE somewhere truly awful e.g. point it at the last
 631          * valid address below the hole so that any attempts to index
 632          * off them cause an exception.
 633          *
 634          * For now, point it at 8G -- at least it should be unmapped
 635          * until some 64-bit processes run.
 636          */
 637         addr = 0x200000000ul;
 638         xen_set_segment_base(SEGBASE_FS, addr);
 639         xen_set_segment_base(SEGBASE_GS_USER, addr);
 640         xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
 641 
 642         return (gdt0);
 643 }
 644 
 645 #else   /* __xpv */
 646 
 647 static user_desc_t *
 648 init_gdt(void)
 649 {
 650         desctbr_t       r_bgdt, r_gdt;
 651         user_desc_t     *bgdt;
 652 
 653 #if !defined(__lint)
 654         /*
 655          * Our gdt is never larger than a single page.
 656          */
 657         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 658 #endif
 659         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 660             PAGESIZE, PAGESIZE);
 661         bzero(gdt0, PAGESIZE);
 662 
 663         init_gdt_common(gdt0);
 664 
 665         /*
 666          * Copy in from boot's gdt to our gdt.
 667          * Entry 0 is the null descriptor by definition.
 668          */
 669         rd_gdtr(&r_bgdt);
 670         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 671         if (bgdt == NULL)
 672                 panic("null boot gdt");
 673 
 674         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 675         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 676         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 677         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 678         gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
 679 
 680         /*
 681          * Install our new GDT
 682          */
 683         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 684         r_gdt.dtr_base = (uintptr_t)gdt0;
 685         wr_gdtr(&r_gdt);
 686 
 687         /*
 688          * Reload the segment registers to use the new GDT
 689          */
 690         load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 691 
 692         /*
 693          *  setup %gs for kernel
 694          */
 695         wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
 696 
 697         /*
 698          * XX64 We should never dereference off "other gsbase" or
 699          * "fsbase".  So, we should arrange to point FSBASE and
 700          * KGSBASE somewhere truly awful e.g. point it at the last
 701          * valid address below the hole so that any attempts to index
 702          * off them cause an exception.
 703          *
 704          * For now, point it at 8G -- at least it should be unmapped
 705          * until some 64-bit processes run.
 706          */
 707         wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
 708         wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
 709         return (gdt0);
 710 }
 711 
 712 #endif  /* __xpv */
 713 
 714 #elif defined(__i386)
 715 
 716 static void
 717 init_gdt_common(user_desc_t *gdt)
 718 {
 719         int i;
 720 
 721         /*
 722          * Text and data for both kernel and user span entire 32 bit
 723          * address space.
 724          */
 725 
 726         /*
 727          * kernel code segment.
 728          */
 729         set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
 730             SDP_OP32);
 731 
 732         /*
 733          * kernel data segment.
 734          */
 735         set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
 736             SDP_OP32);
 737 
 738         /*
 739          * user code segment.
 740          */
 741         set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
 742             SDP_OP32);
 743 
 744         /*
 745          * user data segment.
 746          */
 747         set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
 748             SDP_OP32);
 749 
 750 #if !defined(__xpv)
 751 
 752         /*
 753          * TSS for T_DBLFLT (double fault) handler
 754          */
 755         set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
 756             sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
 757 
 758         /*
 759          * TSS for kernel
 760          */
 761         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 762             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 763 
 764 #endif  /* !__xpv */
 765 
 766         /*
 767          * %gs selector for kernel
 768          */
 769         set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
 770             SEL_KPL, SDP_BYTES, SDP_OP32);
 771 
 772         /*
 773          * Initialize lwp private descriptors.
 774          * Only attributes and limits are initialized, the effective
 775          * base address is programmed via fsbase/gsbase.
 776          */
 777         set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 778             SDP_PAGES, SDP_OP32);
 779         set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 780             SDP_PAGES, SDP_OP32);
 781 
 782         /*
 783          * Initialize the descriptors set aside for brand usage.
 784          * Only attributes and limits are initialized.
 785          */
 786         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 787                 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 788                     SDP_PAGES, SDP_OP32);
 789         /*
 790          * Initialize convenient zero base user descriptor for clearing
 791          * lwp  private %fs and %gs descriptors in GDT. See setregs() for
 792          * an example.
 793          */
 794         set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
 795             SDP_BYTES, SDP_OP32);
 796 }
 797 
 798 #if defined(__xpv)
 799 
 800 static user_desc_t *
 801 init_gdt(void)
 802 {
 803         uint64_t gdtpa;
 804         ulong_t ma[1];          /* XXPV should be a memory_t */
 805 
 806 #if !defined(__lint)
 807         /*
 808          * Our gdt is never larger than a single page.
 809          */
 810         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 811 #endif
 812         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 813             PAGESIZE, PAGESIZE);
 814         bzero(gdt0, PAGESIZE);
 815 
 816         init_gdt_common(gdt0);
 817         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 818 
 819         /*
 820          * XXX Since we never invoke kmdb until after the kernel takes
 821          * over the descriptor tables why not have it use the kernel's
 822          * selectors?
 823          */
 824         if (boothowto & RB_DEBUG) {
 825                 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
 826                     SDP_PAGES, SDP_OP32);
 827                 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
 828                     SDP_PAGES, SDP_OP32);
 829         }
 830 
 831         /*
 832          * Clear write permission for page containing the gdt and install it.
 833          */
 834         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 835         kbm_read_only((uintptr_t)gdt0, gdtpa);
 836         xen_set_gdt(ma, NGDT);
 837 
 838         /*
 839          * Reload the segment registers to use the new GDT
 840          */
 841         load_segment_registers(
 842             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 843 
 844         return (gdt0);
 845 }
 846 
 847 #else   /* __xpv */
 848 
 849 static user_desc_t *
 850 init_gdt(void)
 851 {
 852         desctbr_t       r_bgdt, r_gdt;
 853         user_desc_t     *bgdt;
 854 
 855 #if !defined(__lint)
 856         /*
 857          * Our gdt is never larger than a single page.
 858          */
 859         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 860 #endif
 861         /*
 862          * XXX this allocation belongs in our caller, not here.
 863          */
 864         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 865             PAGESIZE, PAGESIZE);
 866         bzero(gdt0, PAGESIZE);
 867 
 868         init_gdt_common(gdt0);
 869 
 870         /*
 871          * Copy in from boot's gdt to our gdt entries.
 872          * Entry 0 is null descriptor by definition.
 873          */
 874         rd_gdtr(&r_bgdt);
 875         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 876         if (bgdt == NULL)
 877                 panic("null boot gdt");
 878 
 879         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 880         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 881         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 882         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 883 
 884         /*
 885          * Install our new GDT
 886          */
 887         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 888         r_gdt.dtr_base = (uintptr_t)gdt0;
 889         wr_gdtr(&r_gdt);
 890 
 891         /*
 892          * Reload the segment registers to use the new GDT
 893          */
 894         load_segment_registers(
 895             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 896 
 897         return (gdt0);
 898 }
 899 
 900 #endif  /* __xpv */
 901 #endif  /* __i386 */
 902 
 903 /*
 904  * Build kernel IDT.
 905  *
 906  * Note that for amd64 we pretty much require every gate to be an interrupt
 907  * gate which blocks interrupts atomically on entry; that's because of our
 908  * dependency on using 'swapgs' every time we come into the kernel to find
 909  * the cpu structure. If we get interrupted just before doing that, %cs could
 910  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 911  * %gsbase is really still pointing at something in userland. Bad things will
 912  * ensue. We also use interrupt gates for i386 as well even though this is not
 913  * required for some traps.
 914  *
 915  * Perhaps they should have invented a trap gate that does an atomic swapgs?
 916  */
 917 static void
 918 init_idt_common(gate_desc_t *idt)
 919 {
 920         set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 921             0);
 922         set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 923             0);
 924         set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 925             0);
 926         set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 927             0);
 928         set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 929             0);
 930         set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
 931             TRP_KPL, 0);
 932         set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 933             0);
 934         set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
 935             0);








 936 
 937         /*
 938          * double fault handler.
 939          *
 940          * Note that on the hypervisor a guest does not receive #df faults.
 941          * Instead a failsafe event is injected into the guest if its selectors
 942          * and/or stack is in a broken state. See xen_failsafe_callback.
 943          */
 944 #if !defined(__xpv)
 945 #if defined(__amd64)
 946 
 947         set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 948             T_DBLFLT);
 949 
 950 #elif defined(__i386)
 951 
 952         /*
 953          * task gate required.
 954          */
 955         set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
 956             0);
 957 
 958 #endif  /* __i386 */
 959 #endif  /* !__xpv */
 960 
 961         /*
 962          * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 963          */



























 964 
 965         set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 966             0);
 967         set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 968             0);
 969         set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 970         set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 971         set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 972         set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 973             0);
 974         set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
 975             TRP_KPL, 0);
 976         set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 977         set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 978 
 979         /*
 980          * install fast trap handler at 210.
 981          */
 982         set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 983             0);

 984 
 985         /*
 986          * System call handler.
 987          */
 988 #if defined(__amd64)
 989         set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
 990             TRP_UPL, 0);
 991 
 992 #elif defined(__i386)
 993         set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
 994             TRP_UPL, 0);
 995 #endif  /* __i386 */
 996 
 997         /*
 998          * Install the DTrace interrupt handler for the pid provider.
 999          */
1000         set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001             SDT_SYSIGT, TRP_UPL, 0);

1002 
1003         /*
1004          * Prepare interposing descriptor for the syscall handler
1005          * and cache copy of the default descriptor.
1006          */
1007         brand_tbl[0].ih_inum = T_SYSCALLINT;
1008         brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1009 
1010 #if defined(__amd64)
1011         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1012             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 #elif defined(__i386)
1014         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1015             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 #endif  /* __i386 */
1017 
1018         brand_tbl[1].ih_inum = 0;
1019 }
1020 
1021 #if defined(__xpv)
1022 
1023 static void
1024 init_idt(gate_desc_t *idt)
1025 {
1026         init_idt_common(idt);
1027 }
1028 
1029 #else   /* __xpv */
1030 
1031 static void
1032 init_idt(gate_desc_t *idt)
1033 {
1034         char    ivctname[80];
1035         void    (*ivctptr)(void);
1036         int     i;
1037 
1038         /*
1039          * Initialize entire table with 'reserved' trap and then overwrite
1040          * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041          * since it can only be generated on a 386 processor. 15 is also
1042          * unsupported and reserved.
1043          */
1044         for (i = 0; i < NIDT; i++)








1045                 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046                     0);


1047 
1048         /*
1049          * 20-31 reserved
1050          */
1051         for (i = 20; i < 32; i++)








1052                 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053                     0);


1054 
1055         /*
1056          * interrupts 32 - 255
1057          */
1058         for (i = 32; i < 256; i++) {




1059                 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);

1060                 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061                 if (ivctptr == NULL)
1062                         panic("kobj_getsymvalue(%s) failed", ivctname);
1063 
1064                 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);

1065         }
1066 
1067         /*
1068          * Now install the common ones. Note that it will overlay some
1069          * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070          */
1071         init_idt_common(idt);
1072 }
1073 
1074 #endif  /* __xpv */
1075 
1076 /*
1077  * The kernel does not deal with LDTs unless a user explicitly creates
1078  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079  * to reference the LDT will therefore cause a #gp. System calls made via the
1080  * obsolete lcall mechanism are emulated by the #gp fault handler.
1081  */
1082 static void
1083 init_ldt(void)
1084 {
1085 #if defined(__xpv)
1086         xen_set_ldt(NULL, 0);
1087 #else
1088         wr_ldtr(0);
1089 #endif
1090 }
1091 
1092 #if !defined(__xpv)
1093 #if defined(__amd64)
1094 
1095 static void
1096 init_tss(void)
1097 {
1098         /*
1099          * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100          * All exceptions but #DF will run on the thread stack.
1101          * Set up the double fault stack here.
1102          */
1103         ktss0->tss_ist1 =
1104             (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105 
1106         /*
1107          * Set I/O bit map offset equal to size of TSS segment limit
1108          * for no I/O permission map. This will force all user I/O
1109          * instructions to generate #gp fault.
1110          */
1111         ktss0->tss_bitmapbase = sizeof (*ktss0);


1112 
1113         /*
1114          * Point %tr to descriptor for ktss0 in gdt.
1115          */
1116         wr_tsr(KTSS_SEL);
1117 }
1118 
1119 #elif defined(__i386)
1120 
1121 static void
1122 init_tss(void)
1123 {
1124         /*
1125          * ktss0->tss_esp dynamically filled in by resume() on each
1126          * context switch.
1127          */
1128         ktss0->tss_ss0       = KDS_SEL;
1129         ktss0->tss_eip       = (uint32_t)_start;
1130         ktss0->tss_ds        = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131         ktss0->tss_cs        = KCS_SEL;
1132         ktss0->tss_fs        = KFS_SEL;
1133         ktss0->tss_gs        = KGS_SEL;
1134         ktss0->tss_ldt       = ULDT_SEL;
1135 
1136         /*
1137          * Initialize double fault tss.
1138          */
1139         dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140         dftss0->tss_ss0      = KDS_SEL;
1141 
1142         /*
1143          * tss_cr3 will get initialized in hat_kern_setup() once our page
1144          * tables have been setup.
1145          */
1146         dftss0->tss_eip      = (uint32_t)syserrtrap;
1147         dftss0->tss_esp      = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148         dftss0->tss_cs       = KCS_SEL;
1149         dftss0->tss_ds       = KDS_SEL;
1150         dftss0->tss_es       = KDS_SEL;
1151         dftss0->tss_ss       = KDS_SEL;
1152         dftss0->tss_fs       = KFS_SEL;
1153         dftss0->tss_gs       = KGS_SEL;
1154 
1155         /*
1156          * Set I/O bit map offset equal to size of TSS segment limit
1157          * for no I/O permission map. This will force all user I/O
1158          * instructions to generate #gp fault.
1159          */
1160         ktss0->tss_bitmapbase = sizeof (*ktss0);
1161 
1162         /*
1163          * Point %tr to descriptor for ktss0 in gdt.
1164          */
1165         wr_tsr(KTSS_SEL);
1166 }
1167 
1168 #endif  /* __i386 */
1169 #endif  /* !__xpv */
1170 
1171 #if defined(__xpv)
1172 
1173 void
1174 init_desctbls(void)
1175 {
1176         uint_t vec;
1177         user_desc_t *gdt;
1178 
1179         /*
1180          * Setup and install our GDT.
1181          */
1182         gdt = init_gdt();
1183 
1184         /*
1185          * Store static pa of gdt to speed up pa_to_ma() translations
1186          * on lwp context switches.
1187          */
1188         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1189         CPU->cpu_gdt = gdt;
1190         CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1191 
1192         /*
1193          * Setup and install our IDT.
1194          */
1195 #if !defined(__lint)
1196         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1197 #endif
1198         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1199             PAGESIZE, PAGESIZE);
1200         bzero(idt0, PAGESIZE);
1201         init_idt(idt0);
1202         for (vec = 0; vec < NIDT; vec++)
1203                 xen_idt_write(&idt0[vec], vec);
1204 
1205         CPU->cpu_idt = idt0;
1206 
1207         /*
1208          * set default kernel stack
1209          */
1210         xen_stack_switch(KDS_SEL,
1211             (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1212 
1213         xen_init_callbacks();
1214 
1215         init_ldt();
1216 }
1217 
1218 #else   /* __xpv */
1219 
1220 void
1221 init_desctbls(void)
1222 {
1223         user_desc_t *gdt;
1224         desctbr_t idtr;
1225 
1226         /*
1227          * Allocate IDT and TSS structures on unique pages for better
1228          * performance in virtual machines.
1229          */
1230 #if !defined(__lint)
1231         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1232 #endif
1233         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1234             PAGESIZE, PAGESIZE);
1235         bzero(idt0, PAGESIZE);
1236 #if !defined(__lint)
1237         ASSERT(sizeof (*ktss0) <= PAGESIZE);
1238 #endif
1239         ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1240             PAGESIZE, PAGESIZE);
1241         bzero(ktss0, PAGESIZE);
1242 
1243 #if defined(__i386)
1244 #if !defined(__lint)
1245         ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 #endif
1247         dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248             PAGESIZE, PAGESIZE);
1249         bzero(dftss0, PAGESIZE);
1250 #endif
1251 
1252         /*
1253          * Setup and install our GDT.
1254          */
1255         gdt = init_gdt();
1256         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257         CPU->cpu_gdt = gdt;
1258 
1259         /*








1260          * Setup and install our IDT.
1261          */
1262         init_idt(idt0);
1263 
1264         idtr.dtr_base = (uintptr_t)idt0;
1265         idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266         wr_idtr(&idtr);
1267         CPU->cpu_idt = idt0;
1268 
1269 #if defined(__i386)
1270         /*
1271          * We maintain a description of idt0 in convenient IDTR format
1272          * for #pf's on some older pentium processors. See pentium_pftrap().
1273          */
1274         idt0_default_r = idtr;
1275 #endif  /* __i386 */
1276 
1277         init_tss();
1278         CPU->cpu_tss = ktss0;
1279         init_ldt();



1280 }
1281 
1282 #endif  /* __xpv */
1283 
1284 /*
1285  * In the early kernel, we need to set up a simple GDT to run on.
1286  *
1287  * XXPV Can dboot use this too?  See dboot_gdt.s
1288  */
1289 void
1290 init_boot_gdt(user_desc_t *bgdt)
1291 {
1292 #if defined(__amd64)
1293         set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294             SDP_PAGES, SDP_OP32);
1295         set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296             SDP_PAGES, SDP_OP32);
1297 #elif defined(__i386)
1298         set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299             SDP_PAGES, SDP_OP32);
1300         set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1301             SDP_PAGES, SDP_OP32);
1302 #endif  /* __i386 */
1303 }
1304 
1305 /*
1306  * Enable interpositioning on the system call path by rewriting the
1307  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1308  * the branded entry points.
1309  */
1310 void
1311 brand_interpositioning_enable(void)
1312 {
1313         gate_desc_t     *idt = CPU->cpu_idt;
1314         int             i;
1315 
1316         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1317 
1318         for (i = 0; brand_tbl[i].ih_inum; i++) {
1319                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1320 #if defined(__xpv)
1321                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322                     brand_tbl[i].ih_inum);
1323 #endif
1324         }
1325 
1326 #if defined(__amd64)
1327 #if defined(__xpv)
1328 
1329         /*
1330          * Currently the hypervisor only supports 64-bit syscalls via
1331          * syscall instruction. The 32-bit syscalls are handled by
1332          * interrupt gate above.
1333          */
1334         xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335             CALLBACKF_mask_events);
1336 
1337 #else
1338 
1339         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {




1340                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342         }

1343 
1344 #endif
1345 #endif  /* __amd64 */
1346 
1347         if (is_x86_feature(x86_featureset, X86FSET_SEP))




1348                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);


1349 }
1350 
1351 /*
1352  * Disable interpositioning on the system call path by rewriting the
1353  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354  * the standard entry points, which bypass the interpositioning hooks.
1355  */
1356 void
1357 brand_interpositioning_disable(void)
1358 {
1359         gate_desc_t     *idt = CPU->cpu_idt;
1360         int i;
1361 
1362         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363 
1364         for (i = 0; brand_tbl[i].ih_inum; i++) {
1365                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 #if defined(__xpv)
1367                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368                     brand_tbl[i].ih_inum);
1369 #endif
1370         }
1371 
1372 #if defined(__amd64)
1373 #if defined(__xpv)
1374 
1375         /*
1376          * See comment above in brand_interpositioning_enable.
1377          */
1378         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379             CALLBACKF_mask_events);
1380 
1381 #else
1382 
1383         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {




1384                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386         }

1387 
1388 #endif
1389 #endif  /* __amd64 */
1390 
1391         if (is_x86_feature(x86_featureset, X86FSET_SEP))



1392                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);


1393 }
--- EOF ---