Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2011 Joyent, Inc. All rights reserved.
  28  */
  29 
  30 /*
  31  * Copyright (c) 1992 Terrence R. Lambert.
  32  * Copyright (c) 1990 The Regents of the University of California.
  33  * All rights reserved.
  34  *
  35  * This code is derived from software contributed to Berkeley by
  36  * William Jolitz.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:


  66  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  67  */
  68 
  69 #include <sys/types.h>
  70 #include <sys/sysmacros.h>
  71 #include <sys/tss.h>
  72 #include <sys/segments.h>
  73 #include <sys/trap.h>
  74 #include <sys/cpuvar.h>
  75 #include <sys/bootconf.h>
  76 #include <sys/x86_archext.h>
  77 #include <sys/controlregs.h>
  78 #include <sys/archsystm.h>
  79 #include <sys/machsystm.h>
  80 #include <sys/kobj.h>
  81 #include <sys/cmn_err.h>
  82 #include <sys/reboot.h>
  83 #include <sys/kdi.h>
  84 #include <sys/mach_mmu.h>
  85 #include <sys/systm.h>

  86 
  87 #ifdef __xpv
  88 #include <sys/hypervisor.h>
  89 #include <vm/as.h>
  90 #endif
  91 
  92 #include <sys/promif.h>
  93 #include <sys/bootinfo.h>
  94 #include <vm/kboot_mmu.h>
  95 #include <vm/hat_pte.h>
  96 
  97 /*
  98  * cpu0 and default tables and structures.
  99  */
 100 user_desc_t     *gdt0;
 101 #if !defined(__xpv)
 102 desctbr_t       gdt0_default_r;
 103 #endif
 104 
 105 gate_desc_t     *idt0;          /* interrupt descriptor table */


 111 
 112 #if defined(__i386)
 113 tss_t           *dftss0;                /* #DF double-fault exception */
 114 #endif  /* __i386 */
 115 
 116 user_desc_t     zero_udesc;             /* base zero user desc native procs */
 117 user_desc_t     null_udesc;             /* null user descriptor */
 118 system_desc_t   null_sdesc;             /* null system descriptor */
 119 
 120 #if defined(__amd64)
 121 user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 122 #endif  /* __amd64 */
 123 
 124 #if defined(__amd64)
 125 user_desc_t     ucs_on;
 126 user_desc_t     ucs_off;
 127 user_desc_t     ucs32_on;
 128 user_desc_t     ucs32_off;
 129 #endif  /* __amd64 */
 130 
 131 #pragma align   16(dblfault_stack0)
 132 char            dblfault_stack0[DEFAULTSTKSZ];





 133 
 134 extern void     fast_null(void);
 135 extern hrtime_t get_hrtime(void);
 136 extern hrtime_t gethrvtime(void);
 137 extern hrtime_t get_hrestime(void);
 138 extern uint64_t getlgrp(void);
 139 
 140 void (*(fasttable[]))(void) = {
 141         fast_null,                      /* T_FNULL routine */
 142         fast_null,                      /* T_FGETFP routine (initially null) */
 143         fast_null,                      /* T_FSETFP routine (initially null) */
 144         (void (*)())get_hrtime,         /* T_GETHRTIME */
 145         (void (*)())gethrvtime,         /* T_GETHRVTIME */
 146         (void (*)())get_hrestime,       /* T_GETHRESTIME */
 147         (void (*)())getlgrp             /* T_GETLGRP */
 148 };
 149 
 150 /*
 151  * Structure containing pre-computed descriptors to allow us to temporarily
 152  * interpose on a standard handler.


 293         dp->ssd_dpl = dpl;
 294         dp->ssd_p = 1;
 295         dp->ssd_gran = 0;    /* force byte units */
 296 }
 297 
 298 void *
 299 get_ssd_base(system_desc_t *dp)
 300 {
 301         uintptr_t       base;
 302 
 303         base = (uintptr_t)dp->ssd_lobase |
 304             (uintptr_t)dp->ssd_midbase << 16 |
 305             (uintptr_t)dp->ssd_hibase << (16 + 8);
 306         return ((void *)base);
 307 }
 308 
 309 #endif  /* __i386 */
 310 
 311 /*
 312  * Install gate segment descriptor for interrupt, trap, call and task gates.














 313  */
 314 
 315 #if defined(__amd64)
 316 
 317 /*ARGSUSED*/
 318 void
 319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 320     uint_t type, uint_t dpl, uint_t vector)
 321 {
 322         dp->sgd_looffset = (uintptr_t)func;
 323         dp->sgd_hioffset = (uintptr_t)func >> 16;
 324         dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);









 325 
 326         dp->sgd_selector =  (uint16_t)sel;
 327 
 328         /*
 329          * For 64 bit native we use the IST stack mechanism
 330          * for double faults. All other traps use the CPL = 0
 331          * (tss_rsp0) stack.
 332          */
 333 #if !defined(__xpv)
 334         if (vector == T_DBLFLT)
 335                 dp->sgd_ist = 1;
 336         else








 337 #endif
 338                 dp->sgd_ist = 0;
 339 
 340         dp->sgd_type = type;
 341         dp->sgd_dpl = dpl;
 342         dp->sgd_p = 1;
 343 }
 344 
 345 #elif defined(__i386)
 346 
 347 /*ARGSUSED*/
 348 void
 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 350     uint_t type, uint_t dpl, uint_t unused)
 351 {
 352         dp->sgd_looffset = (uintptr_t)func;
 353         dp->sgd_hioffset = (uintptr_t)func >> 16;
 354 
 355         dp->sgd_selector =  (uint16_t)sel;
 356         dp->sgd_stkcpy = 0;  /* always zero bytes */
 357         dp->sgd_type = type;
 358         dp->sgd_dpl = dpl;
 359         dp->sgd_p = 1;
 360 }
 361 
 362 #endif  /* __i386 */
 363 
 364 /*
 365  * Updates a single user descriptor in the the GDT of the current cpu.
 366  * Caller is responsible for preventing cpu migration.
 367  */
 368 
 369 void
 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 371 {
 372 #if defined(__xpv)
 373 
 374         uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 375 
 376         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 377                 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 378 
 379 #else   /* __xpv */
 380 
 381         CPU->cpu_gdt[sidx] = *udp;
 382 
 383 #endif  /* __xpv */


 900 #endif  /* __xpv */
 901 #endif  /* __i386 */
 902 
 903 /*
 904  * Build kernel IDT.
 905  *
 906  * Note that for amd64 we pretty much require every gate to be an interrupt
 907  * gate which blocks interrupts atomically on entry; that's because of our
 908  * dependency on using 'swapgs' every time we come into the kernel to find
 909  * the cpu structure. If we get interrupted just before doing that, %cs could
 910  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 911  * %gsbase is really still pointing at something in userland. Bad things will
 912  * ensue. We also use interrupt gates for i386 as well even though this is not
 913  * required for some traps.
 914  *
 915  * Perhaps they should have invented a trap gate that does an atomic swapgs?
 916  */
 917 static void
 918 init_idt_common(gate_desc_t *idt)
 919 {
 920         set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 921             0);
 922         set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 923             0);
 924         set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 925             0);
 926         set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 927             0);
 928         set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 929             0);
 930         set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
 931             TRP_KPL, 0);
 932         set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 933             0);
 934         set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
 935             0);








 936 
 937         /*
 938          * double fault handler.
 939          *
 940          * Note that on the hypervisor a guest does not receive #df faults.
 941          * Instead a failsafe event is injected into the guest if its selectors
 942          * and/or stack is in a broken state. See xen_failsafe_callback.
 943          */
 944 #if !defined(__xpv)
 945 #if defined(__amd64)
 946 
 947         set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 948             T_DBLFLT);
 949 
 950 #elif defined(__i386)
 951 
 952         /*
 953          * task gate required.
 954          */
 955         set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
 956             0);
 957 
 958 #endif  /* __i386 */
 959 #endif  /* !__xpv */
 960 
 961         /*
 962          * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 963          */



























 964 
 965         set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 966             0);
 967         set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 968             0);
 969         set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 970         set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 971         set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 972         set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 973             0);
 974         set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
 975             TRP_KPL, 0);
 976         set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 977         set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 978 
 979         /*
 980          * install fast trap handler at 210.
 981          */
 982         set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 983             0);

 984 
 985         /*
 986          * System call handler.
 987          */
 988 #if defined(__amd64)
 989         set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
 990             TRP_UPL, 0);
 991 
 992 #elif defined(__i386)
 993         set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
 994             TRP_UPL, 0);
 995 #endif  /* __i386 */
 996 
 997         /*
 998          * Install the DTrace interrupt handler for the pid provider.
 999          */
1000         set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001             SDT_SYSIGT, TRP_UPL, 0);

1002 
1003         /*
1004          * Prepare interposing descriptor for the syscall handler
1005          * and cache copy of the default descriptor.
1006          */
1007         brand_tbl[0].ih_inum = T_SYSCALLINT;
1008         brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1009 
1010 #if defined(__amd64)
1011         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1012             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 #elif defined(__i386)
1014         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1015             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 #endif  /* __i386 */
1017 
1018         brand_tbl[1].ih_inum = 0;
1019 }
1020 
1021 #if defined(__xpv)
1022 
1023 static void
1024 init_idt(gate_desc_t *idt)
1025 {
1026         init_idt_common(idt);
1027 }
1028 
1029 #else   /* __xpv */
1030 
1031 static void
1032 init_idt(gate_desc_t *idt)
1033 {
1034         char    ivctname[80];
1035         void    (*ivctptr)(void);
1036         int     i;
1037 
1038         /*
1039          * Initialize entire table with 'reserved' trap and then overwrite
1040          * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041          * since it can only be generated on a 386 processor. 15 is also
1042          * unsupported and reserved.
1043          */
1044         for (i = 0; i < NIDT; i++)








1045                 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046                     0);


1047 
1048         /*
1049          * 20-31 reserved
1050          */
1051         for (i = 20; i < 32; i++)








1052                 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053                     0);


1054 
1055         /*
1056          * interrupts 32 - 255
1057          */
1058         for (i = 32; i < 256; i++) {




1059                 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);

1060                 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061                 if (ivctptr == NULL)
1062                         panic("kobj_getsymvalue(%s) failed", ivctname);
1063 
1064                 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);

1065         }
1066 
1067         /*
1068          * Now install the common ones. Note that it will overlay some
1069          * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070          */
1071         init_idt_common(idt);
1072 }
1073 
1074 #endif  /* __xpv */
1075 
1076 /*
1077  * The kernel does not deal with LDTs unless a user explicitly creates
1078  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079  * to reference the LDT will therefore cause a #gp. System calls made via the
1080  * obsolete lcall mechanism are emulated by the #gp fault handler.
1081  */
1082 static void
1083 init_ldt(void)
1084 {
1085 #if defined(__xpv)
1086         xen_set_ldt(NULL, 0);
1087 #else
1088         wr_ldtr(0);
1089 #endif
1090 }
1091 
1092 #if !defined(__xpv)
1093 #if defined(__amd64)
1094 
1095 static void
1096 init_tss(void)
1097 {
1098         /*
1099          * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100          * All exceptions but #DF will run on the thread stack.
1101          * Set up the double fault stack here.
1102          */
1103         ktss0->tss_ist1 =
1104             (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105 
1106         /*
1107          * Set I/O bit map offset equal to size of TSS segment limit
1108          * for no I/O permission map. This will force all user I/O
1109          * instructions to generate #gp fault.
1110          */
1111         ktss0->tss_bitmapbase = sizeof (*ktss0);


1112 
1113         /*
1114          * Point %tr to descriptor for ktss0 in gdt.
1115          */
1116         wr_tsr(KTSS_SEL);
1117 }
1118 
1119 #elif defined(__i386)
1120 
1121 static void
1122 init_tss(void)
1123 {
1124         /*
1125          * ktss0->tss_esp dynamically filled in by resume() on each
1126          * context switch.
1127          */
1128         ktss0->tss_ss0       = KDS_SEL;
1129         ktss0->tss_eip       = (uint32_t)_start;
1130         ktss0->tss_ds        = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131         ktss0->tss_cs        = KCS_SEL;
1132         ktss0->tss_fs        = KFS_SEL;
1133         ktss0->tss_gs        = KGS_SEL;
1134         ktss0->tss_ldt       = ULDT_SEL;
1135 
1136         /*
1137          * Initialize double fault tss.
1138          */
1139         dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140         dftss0->tss_ss0      = KDS_SEL;
1141 
1142         /*
1143          * tss_cr3 will get initialized in hat_kern_setup() once our page
1144          * tables have been setup.
1145          */
1146         dftss0->tss_eip      = (uint32_t)syserrtrap;
1147         dftss0->tss_esp      = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148         dftss0->tss_cs       = KCS_SEL;
1149         dftss0->tss_ds       = KDS_SEL;
1150         dftss0->tss_es       = KDS_SEL;
1151         dftss0->tss_ss       = KDS_SEL;
1152         dftss0->tss_fs       = KFS_SEL;
1153         dftss0->tss_gs       = KGS_SEL;
1154 
1155         /*
1156          * Set I/O bit map offset equal to size of TSS segment limit
1157          * for no I/O permission map. This will force all user I/O
1158          * instructions to generate #gp fault.
1159          */
1160         ktss0->tss_bitmapbase = sizeof (*ktss0);
1161 
1162         /*
1163          * Point %tr to descriptor for ktss0 in gdt.
1164          */
1165         wr_tsr(KTSS_SEL);
1166 }
1167 
1168 #endif  /* __i386 */
1169 #endif  /* !__xpv */
1170 
1171 #if defined(__xpv)
1172 
1173 void
1174 init_desctbls(void)
1175 {
1176         uint_t vec;
1177         user_desc_t *gdt;
1178 
1179         /*
1180          * Setup and install our GDT.
1181          */
1182         gdt = init_gdt();
1183 
1184         /*
1185          * Store static pa of gdt to speed up pa_to_ma() translations
1186          * on lwp context switches.
1187          */
1188         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));


1240             PAGESIZE, PAGESIZE);
1241         bzero(ktss0, PAGESIZE);
1242 
1243 #if defined(__i386)
1244 #if !defined(__lint)
1245         ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 #endif
1247         dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248             PAGESIZE, PAGESIZE);
1249         bzero(dftss0, PAGESIZE);
1250 #endif
1251 
1252         /*
1253          * Setup and install our GDT.
1254          */
1255         gdt = init_gdt();
1256         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257         CPU->cpu_gdt = gdt;
1258 
1259         /*








1260          * Setup and install our IDT.
1261          */
1262         init_idt(idt0);
1263 
1264         idtr.dtr_base = (uintptr_t)idt0;
1265         idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266         wr_idtr(&idtr);
1267         CPU->cpu_idt = idt0;
1268 
1269 #if defined(__i386)
1270         /*
1271          * We maintain a description of idt0 in convenient IDTR format
1272          * for #pf's on some older pentium processors. See pentium_pftrap().
1273          */
1274         idt0_default_r = idtr;
1275 #endif  /* __i386 */
1276 
1277         init_tss();
1278         CPU->cpu_tss = ktss0;
1279         init_ldt();



1280 }
1281 
1282 #endif  /* __xpv */
1283 
1284 /*
1285  * In the early kernel, we need to set up a simple GDT to run on.
1286  *
1287  * XXPV Can dboot use this too?  See dboot_gdt.s
1288  */
1289 void
1290 init_boot_gdt(user_desc_t *bgdt)
1291 {
1292 #if defined(__amd64)
1293         set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294             SDP_PAGES, SDP_OP32);
1295         set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296             SDP_PAGES, SDP_OP32);
1297 #elif defined(__i386)
1298         set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299             SDP_PAGES, SDP_OP32);


1320 #if defined(__xpv)
1321                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322                     brand_tbl[i].ih_inum);
1323 #endif
1324         }
1325 
1326 #if defined(__amd64)
1327 #if defined(__xpv)
1328 
1329         /*
1330          * Currently the hypervisor only supports 64-bit syscalls via
1331          * syscall instruction. The 32-bit syscalls are handled by
1332          * interrupt gate above.
1333          */
1334         xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335             CALLBACKF_mask_events);
1336 
1337 #else
1338 
1339         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {




1340                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342         }

1343 
1344 #endif
1345 #endif  /* __amd64 */
1346 
1347         if (is_x86_feature(x86_featureset, X86FSET_SEP))




1348                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);


1349 }
1350 
1351 /*
1352  * Disable interpositioning on the system call path by rewriting the
1353  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354  * the standard entry points, which bypass the interpositioning hooks.
1355  */
1356 void
1357 brand_interpositioning_disable(void)
1358 {
1359         gate_desc_t     *idt = CPU->cpu_idt;
1360         int i;
1361 
1362         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363 
1364         for (i = 0; brand_tbl[i].ih_inum; i++) {
1365                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 #if defined(__xpv)
1367                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368                     brand_tbl[i].ih_inum);
1369 #endif
1370         }
1371 
1372 #if defined(__amd64)
1373 #if defined(__xpv)
1374 
1375         /*
1376          * See comment above in brand_interpositioning_enable.
1377          */
1378         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379             CALLBACKF_mask_events);
1380 
1381 #else
1382 
1383         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {




1384                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386         }

1387 
1388 #endif
1389 #endif  /* __amd64 */
1390 
1391         if (is_x86_feature(x86_featureset, X86FSET_SEP))



1392                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);


1393 }


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2018 Joyent, Inc. All rights reserved.
  28  */
  29 
  30 /*
  31  * Copyright (c) 1992 Terrence R. Lambert.
  32  * Copyright (c) 1990 The Regents of the University of California.
  33  * All rights reserved.
  34  *
  35  * This code is derived from software contributed to Berkeley by
  36  * William Jolitz.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:


  66  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  67  */
  68 
  69 #include <sys/types.h>
  70 #include <sys/sysmacros.h>
  71 #include <sys/tss.h>
  72 #include <sys/segments.h>
  73 #include <sys/trap.h>
  74 #include <sys/cpuvar.h>
  75 #include <sys/bootconf.h>
  76 #include <sys/x86_archext.h>
  77 #include <sys/controlregs.h>
  78 #include <sys/archsystm.h>
  79 #include <sys/machsystm.h>
  80 #include <sys/kobj.h>
  81 #include <sys/cmn_err.h>
  82 #include <sys/reboot.h>
  83 #include <sys/kdi.h>
  84 #include <sys/mach_mmu.h>
  85 #include <sys/systm.h>
  86 #include <sys/note.h>
  87 
  88 #ifdef __xpv
  89 #include <sys/hypervisor.h>
  90 #include <vm/as.h>
  91 #endif
  92 
  93 #include <sys/promif.h>
  94 #include <sys/bootinfo.h>
  95 #include <vm/kboot_mmu.h>
  96 #include <vm/hat_pte.h>
  97 
  98 /*
  99  * cpu0 and default tables and structures.
 100  */
 101 user_desc_t     *gdt0;
 102 #if !defined(__xpv)
 103 desctbr_t       gdt0_default_r;
 104 #endif
 105 
 106 gate_desc_t     *idt0;          /* interrupt descriptor table */


 112 
 113 #if defined(__i386)
 114 tss_t           *dftss0;                /* #DF double-fault exception */
 115 #endif  /* __i386 */
 116 
 117 user_desc_t     zero_udesc;             /* base zero user desc native procs */
 118 user_desc_t     null_udesc;             /* null user descriptor */
 119 system_desc_t   null_sdesc;             /* null system descriptor */
 120 
 121 #if defined(__amd64)
 122 user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 123 #endif  /* __amd64 */
 124 
 125 #if defined(__amd64)
 126 user_desc_t     ucs_on;
 127 user_desc_t     ucs_off;
 128 user_desc_t     ucs32_on;
 129 user_desc_t     ucs32_off;
 130 #endif  /* __amd64 */
 131 
 132 /*
 133  * If the size of this is changed, you must update hat_pcp_setup() and the
 134  * definitions in exception.s
 135  */
 136 extern char dblfault_stack0[DEFAULTSTKSZ];
 137 extern char nmi_stack0[DEFAULTSTKSZ];
 138 extern char mce_stack0[DEFAULTSTKSZ];
 139 
 140 extern void     fast_null(void);
 141 extern hrtime_t get_hrtime(void);
 142 extern hrtime_t gethrvtime(void);
 143 extern hrtime_t get_hrestime(void);
 144 extern uint64_t getlgrp(void);
 145 
 146 void (*(fasttable[]))(void) = {
 147         fast_null,                      /* T_FNULL routine */
 148         fast_null,                      /* T_FGETFP routine (initially null) */
 149         fast_null,                      /* T_FSETFP routine (initially null) */
 150         (void (*)())get_hrtime,         /* T_GETHRTIME */
 151         (void (*)())gethrvtime,         /* T_GETHRVTIME */
 152         (void (*)())get_hrestime,       /* T_GETHRESTIME */
 153         (void (*)())getlgrp             /* T_GETLGRP */
 154 };
 155 
 156 /*
 157  * Structure containing pre-computed descriptors to allow us to temporarily
 158  * interpose on a standard handler.


 299         dp->ssd_dpl = dpl;
 300         dp->ssd_p = 1;
 301         dp->ssd_gran = 0;    /* force byte units */
 302 }
 303 
 304 void *
 305 get_ssd_base(system_desc_t *dp)
 306 {
 307         uintptr_t       base;
 308 
 309         base = (uintptr_t)dp->ssd_lobase |
 310             (uintptr_t)dp->ssd_midbase << 16 |
 311             (uintptr_t)dp->ssd_hibase << (16 + 8);
 312         return ((void *)base);
 313 }
 314 
 315 #endif  /* __i386 */
 316 
 317 /*
 318  * Install gate segment descriptor for interrupt, trap, call and task gates.
 319  *
 320  * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
 321  * all interrupts.  We have different ISTs for each class of exceptions that are
 322  * most likely to occur while handling an existing exception; while many of
 323  * these are just going to panic, it's nice not to trample on the existing
 324  * exception state for debugging purposes.
 325  *
 326  * Normal interrupts are all redirected unconditionally to the KPTI trampoline
 327  * stack space. This unifies the trampoline handling between user and kernel
 328  * space (and avoids the need to touch %gs).
 329  *
 330  * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
 331  * we do a read from KMDB that cause another #PF.  Without its own IST, this
 332  * would stomp on the kernel's mcpu_kpti_flt frame.
 333  */
 334 uint_t
 335 idt_vector_to_ist(uint_t vector)





 336 {
 337 #if defined(__xpv)
 338         _NOTE(ARGUNUSED(vector));
 339         return (IST_NONE);
 340 #else
 341         switch (vector) {
 342         /* These should always use IST even without KPTI enabled. */
 343         case T_DBLFLT:
 344                 return (IST_DF);
 345         case T_NMIFLT:
 346                 return (IST_NMI);
 347         case T_MCE:
 348                 return (IST_MCE);
 349 
 350         case T_BPTFLT:
 351         case T_SGLSTP:
 352                 if (kpti_enable == 1) {
 353                         return (IST_DBG);
 354                 }
 355                 return (IST_NONE);
 356         case T_STKFLT:
 357         case T_GPFLT:
 358         case T_PGFLT:
 359                 if (kpti_enable == 1) {
 360                         return (IST_NESTABLE);
 361                 }
 362                 return (IST_NONE);
 363         default:
 364                 if (kpti_enable == 1) {
 365                         return (IST_DEFAULT);
 366                 }
 367                 return (IST_NONE);
 368         }
 369 #endif





 370 }
 371 



 372 void
 373 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 374     uint_t type, uint_t dpl, uint_t ist)
 375 {
 376         dp->sgd_looffset = (uintptr_t)func;
 377         dp->sgd_hioffset = (uintptr_t)func >> 16;
 378         dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
 379         dp->sgd_selector =  (uint16_t)sel;
 380         dp->sgd_ist = ist;
 381         dp->sgd_type = type;
 382         dp->sgd_dpl = dpl;
 383         dp->sgd_p = 1;
 384 }
 385 


 386 /*
 387  * Updates a single user descriptor in the the GDT of the current cpu.
 388  * Caller is responsible for preventing cpu migration.
 389  */
 390 
 391 void
 392 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 393 {
 394 #if defined(__xpv)
 395 
 396         uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 397 
 398         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 399                 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 400 
 401 #else   /* __xpv */
 402 
 403         CPU->cpu_gdt[sidx] = *udp;
 404 
 405 #endif  /* __xpv */


 922 #endif  /* __xpv */
 923 #endif  /* __i386 */
 924 
 925 /*
 926  * Build kernel IDT.
 927  *
 928  * Note that for amd64 we pretty much require every gate to be an interrupt
 929  * gate which blocks interrupts atomically on entry; that's because of our
 930  * dependency on using 'swapgs' every time we come into the kernel to find
 931  * the cpu structure. If we get interrupted just before doing that, %cs could
 932  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 933  * %gsbase is really still pointing at something in userland. Bad things will
 934  * ensue. We also use interrupt gates for i386 as well even though this is not
 935  * required for some traps.
 936  *
 937  * Perhaps they should have invented a trap gate that does an atomic swapgs?
 938  */
 939 static void
 940 init_idt_common(gate_desc_t *idt)
 941 {
 942         set_gatesegd(&idt[T_ZERODIV],
 943             (kpti_enable == 1) ? &tr_div0trap : &div0trap,
 944             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
 945         set_gatesegd(&idt[T_SGLSTP],
 946             (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
 947             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
 948         set_gatesegd(&idt[T_NMIFLT],
 949             (kpti_enable == 1) ? &tr_nmiint : &nmiint,
 950             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
 951         set_gatesegd(&idt[T_BPTFLT],
 952             (kpti_enable == 1) ? &tr_brktrap : &brktrap,
 953             KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
 954         set_gatesegd(&idt[T_OVFLW],
 955             (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
 956             KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
 957         set_gatesegd(&idt[T_BOUNDFLT],
 958             (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
 959             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
 960         set_gatesegd(&idt[T_ILLINST],
 961             (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
 962             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
 963         set_gatesegd(&idt[T_NOEXTFLT],
 964             (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
 965             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
 966 
 967         /*
 968          * double fault handler.
 969          *
 970          * Note that on the hypervisor a guest does not receive #df faults.
 971          * Instead a failsafe event is injected into the guest if its selectors
 972          * and/or stack is in a broken state. See xen_failsafe_callback.
 973          */
 974 #if !defined(__xpv)
 975         set_gatesegd(&idt[T_DBLFLT],
 976             (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
 977             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));











 978 #endif  /* !__xpv */
 979 
 980         /*
 981          * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 982          */
 983         set_gatesegd(&idt[T_TSSFLT],
 984             (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
 985             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
 986         set_gatesegd(&idt[T_SEGFLT],
 987             (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
 988             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
 989         set_gatesegd(&idt[T_STKFLT],
 990             (kpti_enable == 1) ? &tr_stktrap : &stktrap,
 991             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
 992         set_gatesegd(&idt[T_GPFLT],
 993             (kpti_enable == 1) ? &tr_gptrap : &gptrap,
 994             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
 995         set_gatesegd(&idt[T_PGFLT],
 996             (kpti_enable == 1) ? &tr_pftrap : &pftrap,
 997             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
 998         set_gatesegd(&idt[T_EXTERRFLT],
 999             (kpti_enable == 1) ? &tr_ndperr : &ndperr,
1000             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
1001         set_gatesegd(&idt[T_ALIGNMENT],
1002             (kpti_enable == 1) ? &tr_achktrap : &achktrap,
1003             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
1004         set_gatesegd(&idt[T_MCE],
1005             (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
1006             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
1007         set_gatesegd(&idt[T_SIMDFPE],
1008             (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
1009             KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
1010 














1011         /*
1012          * install fast trap handler at 210.
1013          */
1014         set_gatesegd(&idt[T_FASTTRAP],
1015             (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
1016             KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
1017 
1018         /*
1019          * System call handler.
1020          */
1021         set_gatesegd(&idt[T_SYSCALLINT],
1022             (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
1023             KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
1024 





1025         /*
1026          * Install the DTrace interrupt handler for the pid provider.
1027          */
1028         set_gatesegd(&idt[T_DTRACE_RET],
1029             (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
1030             KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
1031 
1032         /*
1033          * Prepare interposing descriptor for the syscall handler
1034          * and cache copy of the default descriptor.
1035          */
1036         brand_tbl[0].ih_inum = T_SYSCALLINT;
1037         brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1038 
1039         set_gatesegd(&(brand_tbl[0].ih_interp_desc),
1040             (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
1041             &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
1042             idt_vector_to_ist(T_SYSCALLINT));



1043 
1044         brand_tbl[1].ih_inum = 0;
1045 }
1046 
1047 #if defined(__xpv)
1048 
1049 static void
1050 init_idt(gate_desc_t *idt)
1051 {
1052         init_idt_common(idt);
1053 }
1054 
1055 #else   /* __xpv */
1056 
1057 static void
1058 init_idt(gate_desc_t *idt)
1059 {
1060         char    ivctname[80];
1061         void    (*ivctptr)(void);
1062         int     i;
1063 
1064         /*
1065          * Initialize entire table with 'reserved' trap and then overwrite
1066          * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1067          * since it can only be generated on a 386 processor. 15 is also
1068          * unsupported and reserved.
1069          */
1070 #if !defined(__xpv)
1071         for (i = 0; i < NIDT; i++) {
1072                 set_gatesegd(&idt[i],
1073                     (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
1074                     KCS_SEL, SDT_SYSIGT, TRP_KPL,
1075                     idt_vector_to_ist(T_RESVTRAP));
1076         }
1077 #else
1078         for (i = 0; i < NIDT; i++) {
1079                 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1080                     IST_NONE);
1081         }
1082 #endif
1083 
1084         /*
1085          * 20-31 reserved
1086          */
1087 #if !defined(__xpv)
1088         for (i = 20; i < 32; i++) {
1089                 set_gatesegd(&idt[i],
1090                     (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
1091                     KCS_SEL, SDT_SYSIGT, TRP_KPL,
1092                     idt_vector_to_ist(T_INVALTRAP));
1093         }
1094 #else
1095         for (i = 20; i < 32; i++) {
1096                 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1097                     IST_NONE);
1098         }
1099 #endif
1100 
1101         /*
1102          * interrupts 32 - 255
1103          */
1104         for (i = 32; i < 256; i++) {
1105 #if !defined(__xpv)
1106                 (void) snprintf(ivctname, sizeof (ivctname),
1107                     (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
1108 #else
1109                 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1110 #endif
1111                 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1112                 if (ivctptr == NULL)
1113                         panic("kobj_getsymvalue(%s) failed", ivctname);
1114 
1115                 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1116                     idt_vector_to_ist(i));
1117         }
1118 
1119         /*
1120          * Now install the common ones. Note that it will overlay some
1121          * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1122          */
1123         init_idt_common(idt);
1124 }
1125 
1126 #endif  /* __xpv */
1127 
1128 /*
1129  * The kernel does not deal with LDTs unless a user explicitly creates
1130  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1131  * to reference the LDT will therefore cause a #gp. System calls made via the
1132  * obsolete lcall mechanism are emulated by the #gp fault handler.
1133  */
1134 static void
1135 init_ldt(void)
1136 {
1137 #if defined(__xpv)
1138         xen_set_ldt(NULL, 0);
1139 #else
1140         wr_ldtr(0);
1141 #endif
1142 }
1143 
1144 #if !defined(__xpv)

1145 
1146 static void
1147 init_tss(void)
1148 {
1149         extern struct cpu cpus[];






1150 
1151         /*
1152          * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
1153          * context switch but it'll be overwritten with this same value anyway.

1154          */
1155         if (kpti_enable == 1) {
1156                 ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
1157         }
1158 
1159         /* Set up the IST stacks for double fault, NMI, MCE. */
1160         ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1161         ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
1162         ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];

1163 





1164         /*
1165          * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
1166          * enabled), and also for KDI (always).
1167          */
1168         ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;






1169 
1170         if (kpti_enable == 1) {
1171                 /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
1172                 ktss0->tss_ist5 =
1173                     (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;

1174 
1175                 /* This IST stack is used for all other intrs (for KPTI). */
1176                 ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
1177         }









1178 
1179         /*
1180          * Set I/O bit map offset equal to size of TSS segment limit
1181          * for no I/O permission map. This will force all user I/O
1182          * instructions to generate #gp fault.
1183          */
1184         ktss0->tss_bitmapbase = sizeof (*ktss0);
1185 
1186         /*
1187          * Point %tr to descriptor for ktss0 in gdt.
1188          */
1189         wr_tsr(KTSS_SEL);
1190 }
1191 

1192 #endif  /* !__xpv */
1193 
1194 #if defined(__xpv)
1195 
1196 void
1197 init_desctbls(void)
1198 {
1199         uint_t vec;
1200         user_desc_t *gdt;
1201 
1202         /*
1203          * Setup and install our GDT.
1204          */
1205         gdt = init_gdt();
1206 
1207         /*
1208          * Store static pa of gdt to speed up pa_to_ma() translations
1209          * on lwp context switches.
1210          */
1211         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));


1263             PAGESIZE, PAGESIZE);
1264         bzero(ktss0, PAGESIZE);
1265 
1266 #if defined(__i386)
1267 #if !defined(__lint)
1268         ASSERT(sizeof (*dftss0) <= PAGESIZE);
1269 #endif
1270         dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1271             PAGESIZE, PAGESIZE);
1272         bzero(dftss0, PAGESIZE);
1273 #endif
1274 
1275         /*
1276          * Setup and install our GDT.
1277          */
1278         gdt = init_gdt();
1279         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1280         CPU->cpu_gdt = gdt;
1281 
1282         /*
1283          * Initialize this CPU's LDT.
1284          */
1285         CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
1286             LDT_CPU_SIZE, PAGESIZE);
1287         bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
1288         CPU->cpu_m.mcpu_ldt_len = 0;
1289 
1290         /*
1291          * Setup and install our IDT.
1292          */
1293         init_idt(idt0);
1294 
1295         idtr.dtr_base = (uintptr_t)idt0;
1296         idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1297         wr_idtr(&idtr);
1298         CPU->cpu_idt = idt0;
1299 
1300 #if defined(__i386)
1301         /*
1302          * We maintain a description of idt0 in convenient IDTR format
1303          * for #pf's on some older pentium processors. See pentium_pftrap().
1304          */
1305         idt0_default_r = idtr;
1306 #endif  /* __i386 */
1307 
1308         init_tss();
1309         CPU->cpu_tss = ktss0;
1310         init_ldt();
1311 
1312         /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
1313         kpti_safe_cr3 = (uint64_t)getcr3();
1314 }
1315 
1316 #endif  /* __xpv */
1317 
1318 /*
1319  * In the early kernel, we need to set up a simple GDT to run on.
1320  *
1321  * XXPV Can dboot use this too?  See dboot_gdt.s
1322  */
1323 void
1324 init_boot_gdt(user_desc_t *bgdt)
1325 {
1326 #if defined(__amd64)
1327         set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1328             SDP_PAGES, SDP_OP32);
1329         set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1330             SDP_PAGES, SDP_OP32);
1331 #elif defined(__i386)
1332         set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1333             SDP_PAGES, SDP_OP32);


1354 #if defined(__xpv)
1355                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1356                     brand_tbl[i].ih_inum);
1357 #endif
1358         }
1359 
1360 #if defined(__amd64)
1361 #if defined(__xpv)
1362 
1363         /*
1364          * Currently the hypervisor only supports 64-bit syscalls via
1365          * syscall instruction. The 32-bit syscalls are handled by
1366          * interrupt gate above.
1367          */
1368         xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1369             CALLBACKF_mask_events);
1370 
1371 #else
1372 
1373         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1374                 if (kpti_enable == 1) {
1375                         wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
1376                         wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
1377                 } else {
1378                         wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1379                         wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1380                 }
1381         }
1382 
1383 #endif
1384 #endif  /* __amd64 */
1385 
1386         if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1387                 if (kpti_enable == 1) {
1388                         wrmsr(MSR_INTC_SEP_EIP,
1389                             (uintptr_t)tr_brand_sys_sysenter);
1390                 } else {
1391                         wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1392                 }
1393         }
1394 }
1395 
1396 /*
1397  * Disable interpositioning on the system call path by rewriting the
1398  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1399  * the standard entry points, which bypass the interpositioning hooks.
1400  */
1401 void
1402 brand_interpositioning_disable(void)
1403 {
1404         gate_desc_t     *idt = CPU->cpu_idt;
1405         int i;
1406 
1407         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1408 
1409         for (i = 0; brand_tbl[i].ih_inum; i++) {
1410                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1411 #if defined(__xpv)
1412                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1413                     brand_tbl[i].ih_inum);
1414 #endif
1415         }
1416 
1417 #if defined(__amd64)
1418 #if defined(__xpv)
1419 
1420         /*
1421          * See comment above in brand_interpositioning_enable.
1422          */
1423         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1424             CALLBACKF_mask_events);
1425 
1426 #else
1427 
1428         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1429                 if (kpti_enable == 1) {
1430                         wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
1431                         wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
1432                 } else {
1433                         wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1434                         wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1435                 }
1436         }
1437 
1438 #endif
1439 #endif  /* __amd64 */
1440 
1441         if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1442                 if (kpti_enable == 1) {
1443                         wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
1444                 } else {
1445                         wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1446                 }
1447         }
1448 }