7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 */
29
30 /*
31 * Copyright (c) 1992 Terrence R. Lambert.
32 * Copyright (c) 1990 The Regents of the University of California.
33 * All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * William Jolitz.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 */
68
69 #include <sys/types.h>
70 #include <sys/sysmacros.h>
71 #include <sys/tss.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/bootconf.h>
76 #include <sys/x86_archext.h>
77 #include <sys/controlregs.h>
78 #include <sys/archsystm.h>
79 #include <sys/machsystm.h>
80 #include <sys/kobj.h>
81 #include <sys/cmn_err.h>
82 #include <sys/reboot.h>
83 #include <sys/kdi.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/systm.h>
86
87 #ifdef __xpv
88 #include <sys/hypervisor.h>
89 #include <vm/as.h>
90 #endif
91
92 #include <sys/promif.h>
93 #include <sys/bootinfo.h>
94 #include <vm/kboot_mmu.h>
95 #include <vm/hat_pte.h>
96
97 /*
98 * cpu0 and default tables and structures.
99 */
100 user_desc_t *gdt0;
101 #if !defined(__xpv)
102 desctbr_t gdt0_default_r;
103 #endif
104
105 gate_desc_t *idt0; /* interrupt descriptor table */
111
112 #if defined(__i386)
113 tss_t *dftss0; /* #DF double-fault exception */
114 #endif /* __i386 */
115
116 user_desc_t zero_udesc; /* base zero user desc native procs */
117 user_desc_t null_udesc; /* null user descriptor */
118 system_desc_t null_sdesc; /* null system descriptor */
119
120 #if defined(__amd64)
121 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
122 #endif /* __amd64 */
123
124 #if defined(__amd64)
125 user_desc_t ucs_on;
126 user_desc_t ucs_off;
127 user_desc_t ucs32_on;
128 user_desc_t ucs32_off;
129 #endif /* __amd64 */
130
131 #pragma align 16(dblfault_stack0)
132 char dblfault_stack0[DEFAULTSTKSZ];
133
134 extern void fast_null(void);
135 extern hrtime_t get_hrtime(void);
136 extern hrtime_t gethrvtime(void);
137 extern hrtime_t get_hrestime(void);
138 extern uint64_t getlgrp(void);
139
140 void (*(fasttable[]))(void) = {
141 fast_null, /* T_FNULL routine */
142 fast_null, /* T_FGETFP routine (initially null) */
143 fast_null, /* T_FSETFP routine (initially null) */
144 (void (*)())get_hrtime, /* T_GETHRTIME */
145 (void (*)())gethrvtime, /* T_GETHRVTIME */
146 (void (*)())get_hrestime, /* T_GETHRESTIME */
147 (void (*)())getlgrp /* T_GETLGRP */
148 };
149
150 /*
151 * Structure containing pre-computed descriptors to allow us to temporarily
152 * interpose on a standard handler.
293 dp->ssd_dpl = dpl;
294 dp->ssd_p = 1;
295 dp->ssd_gran = 0; /* force byte units */
296 }
297
298 void *
299 get_ssd_base(system_desc_t *dp)
300 {
301 uintptr_t base;
302
303 base = (uintptr_t)dp->ssd_lobase |
304 (uintptr_t)dp->ssd_midbase << 16 |
305 (uintptr_t)dp->ssd_hibase << (16 + 8);
306 return ((void *)base);
307 }
308
309 #endif /* __i386 */
310
311 /*
312 * Install gate segment descriptor for interrupt, trap, call and task gates.
313 */
314
315 #if defined(__amd64)
316
317 /*ARGSUSED*/
318 void
319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
320 uint_t type, uint_t dpl, uint_t vector)
321 {
322 dp->sgd_looffset = (uintptr_t)func;
323 dp->sgd_hioffset = (uintptr_t)func >> 16;
324 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
325
326 dp->sgd_selector = (uint16_t)sel;
327
328 /*
329 * For 64 bit native we use the IST stack mechanism
330 * for double faults. All other traps use the CPL = 0
331 * (tss_rsp0) stack.
332 */
333 #if !defined(__xpv)
334 if (vector == T_DBLFLT)
335 dp->sgd_ist = 1;
336 else
337 #endif
338 dp->sgd_ist = 0;
339
340 dp->sgd_type = type;
341 dp->sgd_dpl = dpl;
342 dp->sgd_p = 1;
343 }
344
345 #elif defined(__i386)
346
347 /*ARGSUSED*/
348 void
349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350 uint_t type, uint_t dpl, uint_t unused)
351 {
352 dp->sgd_looffset = (uintptr_t)func;
353 dp->sgd_hioffset = (uintptr_t)func >> 16;
354
355 dp->sgd_selector = (uint16_t)sel;
356 dp->sgd_stkcpy = 0; /* always zero bytes */
357 dp->sgd_type = type;
358 dp->sgd_dpl = dpl;
359 dp->sgd_p = 1;
360 }
361
362 #endif /* __i386 */
363
364 /*
365 * Updates a single user descriptor in the the GDT of the current cpu.
366 * Caller is responsible for preventing cpu migration.
367 */
368
369 void
370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 {
372 #if defined(__xpv)
373
374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375
376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378
379 #else /* __xpv */
380
381 CPU->cpu_gdt[sidx] = *udp;
382
383 #endif /* __xpv */
900 #endif /* __xpv */
901 #endif /* __i386 */
902
903 /*
904 * Build kernel IDT.
905 *
906 * Note that for amd64 we pretty much require every gate to be an interrupt
907 * gate which blocks interrupts atomically on entry; that's because of our
908 * dependency on using 'swapgs' every time we come into the kernel to find
909 * the cpu structure. If we get interrupted just before doing that, %cs could
910 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
911 * %gsbase is really still pointing at something in userland. Bad things will
912 * ensue. We also use interrupt gates for i386 as well even though this is not
913 * required for some traps.
914 *
915 * Perhaps they should have invented a trap gate that does an atomic swapgs?
916 */
917 static void
918 init_idt_common(gate_desc_t *idt)
919 {
920 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 0);
922 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
923 0);
924 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
925 0);
926 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
927 0);
928 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
929 0);
930 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
931 TRP_KPL, 0);
932 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
933 0);
934 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
935 0);
936
937 /*
938 * double fault handler.
939 *
940 * Note that on the hypervisor a guest does not receive #df faults.
941 * Instead a failsafe event is injected into the guest if its selectors
942 * and/or stack is in a broken state. See xen_failsafe_callback.
943 */
944 #if !defined(__xpv)
945 #if defined(__amd64)
946
947 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
948 T_DBLFLT);
949
950 #elif defined(__i386)
951
952 /*
953 * task gate required.
954 */
955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
956 0);
957
958 #endif /* __i386 */
959 #endif /* !__xpv */
960
961 /*
962 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
963 */
964
965 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
966 0);
967 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
968 0);
969 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
970 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
971 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
972 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
973 0);
974 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
975 TRP_KPL, 0);
976 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
977 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
978
979 /*
980 * install fast trap handler at 210.
981 */
982 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 0);
984
985 /*
986 * System call handler.
987 */
988 #if defined(__amd64)
989 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990 TRP_UPL, 0);
991
992 #elif defined(__i386)
993 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
994 TRP_UPL, 0);
995 #endif /* __i386 */
996
997 /*
998 * Install the DTrace interrupt handler for the pid provider.
999 */
1000 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 SDT_SYSIGT, TRP_UPL, 0);
1002
1003 /*
1004 * Prepare interposing descriptor for the syscall handler
1005 * and cache copy of the default descriptor.
1006 */
1007 brand_tbl[0].ih_inum = T_SYSCALLINT;
1008 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1009
1010 #if defined(__amd64)
1011 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1012 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 #elif defined(__i386)
1014 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1015 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 #endif /* __i386 */
1017
1018 brand_tbl[1].ih_inum = 0;
1019 }
1020
1021 #if defined(__xpv)
1022
1023 static void
1024 init_idt(gate_desc_t *idt)
1025 {
1026 init_idt_common(idt);
1027 }
1028
1029 #else /* __xpv */
1030
1031 static void
1032 init_idt(gate_desc_t *idt)
1033 {
1034 char ivctname[80];
1035 void (*ivctptr)(void);
1036 int i;
1037
1038 /*
1039 * Initialize entire table with 'reserved' trap and then overwrite
1040 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041 * since it can only be generated on a 386 processor. 15 is also
1042 * unsupported and reserved.
1043 */
1044 for (i = 0; i < NIDT; i++)
1045 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046 0);
1047
1048 /*
1049 * 20-31 reserved
1050 */
1051 for (i = 20; i < 32; i++)
1052 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053 0);
1054
1055 /*
1056 * interrupts 32 - 255
1057 */
1058 for (i = 32; i < 256; i++) {
1059 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1060 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061 if (ivctptr == NULL)
1062 panic("kobj_getsymvalue(%s) failed", ivctname);
1063
1064 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1065 }
1066
1067 /*
1068 * Now install the common ones. Note that it will overlay some
1069 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070 */
1071 init_idt_common(idt);
1072 }
1073
1074 #endif /* __xpv */
1075
1076 /*
1077 * The kernel does not deal with LDTs unless a user explicitly creates
1078 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079 * to reference the LDT will therefore cause a #gp. System calls made via the
1080 * obsolete lcall mechanism are emulated by the #gp fault handler.
1081 */
1082 static void
1083 init_ldt(void)
1084 {
1085 #if defined(__xpv)
1086 xen_set_ldt(NULL, 0);
1087 #else
1088 wr_ldtr(0);
1089 #endif
1090 }
1091
1092 #if !defined(__xpv)
1093 #if defined(__amd64)
1094
1095 static void
1096 init_tss(void)
1097 {
1098 /*
1099 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100 * All exceptions but #DF will run on the thread stack.
1101 * Set up the double fault stack here.
1102 */
1103 ktss0->tss_ist1 =
1104 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105
1106 /*
1107 * Set I/O bit map offset equal to size of TSS segment limit
1108 * for no I/O permission map. This will force all user I/O
1109 * instructions to generate #gp fault.
1110 */
1111 ktss0->tss_bitmapbase = sizeof (*ktss0);
1112
1113 /*
1114 * Point %tr to descriptor for ktss0 in gdt.
1115 */
1116 wr_tsr(KTSS_SEL);
1117 }
1118
1119 #elif defined(__i386)
1120
1121 static void
1122 init_tss(void)
1123 {
1124 /*
1125 * ktss0->tss_esp dynamically filled in by resume() on each
1126 * context switch.
1127 */
1128 ktss0->tss_ss0 = KDS_SEL;
1129 ktss0->tss_eip = (uint32_t)_start;
1130 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131 ktss0->tss_cs = KCS_SEL;
1132 ktss0->tss_fs = KFS_SEL;
1133 ktss0->tss_gs = KGS_SEL;
1134 ktss0->tss_ldt = ULDT_SEL;
1135
1136 /*
1137 * Initialize double fault tss.
1138 */
1139 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140 dftss0->tss_ss0 = KDS_SEL;
1141
1142 /*
1143 * tss_cr3 will get initialized in hat_kern_setup() once our page
1144 * tables have been setup.
1145 */
1146 dftss0->tss_eip = (uint32_t)syserrtrap;
1147 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148 dftss0->tss_cs = KCS_SEL;
1149 dftss0->tss_ds = KDS_SEL;
1150 dftss0->tss_es = KDS_SEL;
1151 dftss0->tss_ss = KDS_SEL;
1152 dftss0->tss_fs = KFS_SEL;
1153 dftss0->tss_gs = KGS_SEL;
1154
1155 /*
1156 * Set I/O bit map offset equal to size of TSS segment limit
1157 * for no I/O permission map. This will force all user I/O
1158 * instructions to generate #gp fault.
1159 */
1160 ktss0->tss_bitmapbase = sizeof (*ktss0);
1161
1162 /*
1163 * Point %tr to descriptor for ktss0 in gdt.
1164 */
1165 wr_tsr(KTSS_SEL);
1166 }
1167
1168 #endif /* __i386 */
1169 #endif /* !__xpv */
1170
1171 #if defined(__xpv)
1172
1173 void
1174 init_desctbls(void)
1175 {
1176 uint_t vec;
1177 user_desc_t *gdt;
1178
1179 /*
1180 * Setup and install our GDT.
1181 */
1182 gdt = init_gdt();
1183
1184 /*
1185 * Store static pa of gdt to speed up pa_to_ma() translations
1186 * on lwp context switches.
1187 */
1188 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1240 PAGESIZE, PAGESIZE);
1241 bzero(ktss0, PAGESIZE);
1242
1243 #if defined(__i386)
1244 #if !defined(__lint)
1245 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 #endif
1247 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248 PAGESIZE, PAGESIZE);
1249 bzero(dftss0, PAGESIZE);
1250 #endif
1251
1252 /*
1253 * Setup and install our GDT.
1254 */
1255 gdt = init_gdt();
1256 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257 CPU->cpu_gdt = gdt;
1258
1259 /*
1260 * Setup and install our IDT.
1261 */
1262 init_idt(idt0);
1263
1264 idtr.dtr_base = (uintptr_t)idt0;
1265 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266 wr_idtr(&idtr);
1267 CPU->cpu_idt = idt0;
1268
1269 #if defined(__i386)
1270 /*
1271 * We maintain a description of idt0 in convenient IDTR format
1272 * for #pf's on some older pentium processors. See pentium_pftrap().
1273 */
1274 idt0_default_r = idtr;
1275 #endif /* __i386 */
1276
1277 init_tss();
1278 CPU->cpu_tss = ktss0;
1279 init_ldt();
1280 }
1281
1282 #endif /* __xpv */
1283
1284 /*
1285 * In the early kernel, we need to set up a simple GDT to run on.
1286 *
1287 * XXPV Can dboot use this too? See dboot_gdt.s
1288 */
1289 void
1290 init_boot_gdt(user_desc_t *bgdt)
1291 {
1292 #if defined(__amd64)
1293 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294 SDP_PAGES, SDP_OP32);
1295 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296 SDP_PAGES, SDP_OP32);
1297 #elif defined(__i386)
1298 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299 SDP_PAGES, SDP_OP32);
1320 #if defined(__xpv)
1321 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322 brand_tbl[i].ih_inum);
1323 #endif
1324 }
1325
1326 #if defined(__amd64)
1327 #if defined(__xpv)
1328
1329 /*
1330 * Currently the hypervisor only supports 64-bit syscalls via
1331 * syscall instruction. The 32-bit syscalls are handled by
1332 * interrupt gate above.
1333 */
1334 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335 CALLBACKF_mask_events);
1336
1337 #else
1338
1339 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1340 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342 }
1343
1344 #endif
1345 #endif /* __amd64 */
1346
1347 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1348 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1349 }
1350
1351 /*
1352 * Disable interpositioning on the system call path by rewriting the
1353 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354 * the standard entry points, which bypass the interpositioning hooks.
1355 */
1356 void
1357 brand_interpositioning_disable(void)
1358 {
1359 gate_desc_t *idt = CPU->cpu_idt;
1360 int i;
1361
1362 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363
1364 for (i = 0; brand_tbl[i].ih_inum; i++) {
1365 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 #if defined(__xpv)
1367 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368 brand_tbl[i].ih_inum);
1369 #endif
1370 }
1371
1372 #if defined(__amd64)
1373 #if defined(__xpv)
1374
1375 /*
1376 * See comment above in brand_interpositioning_enable.
1377 */
1378 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379 CALLBACKF_mask_events);
1380
1381 #else
1382
1383 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1384 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386 }
1387
1388 #endif
1389 #endif /* __amd64 */
1390
1391 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1392 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1393 }
|
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Joyent, Inc. All rights reserved.
28 */
29
30 /*
31 * Copyright (c) 1992 Terrence R. Lambert.
32 * Copyright (c) 1990 The Regents of the University of California.
33 * All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * William Jolitz.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 */
68
69 #include <sys/types.h>
70 #include <sys/sysmacros.h>
71 #include <sys/tss.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/bootconf.h>
76 #include <sys/x86_archext.h>
77 #include <sys/controlregs.h>
78 #include <sys/archsystm.h>
79 #include <sys/machsystm.h>
80 #include <sys/kobj.h>
81 #include <sys/cmn_err.h>
82 #include <sys/reboot.h>
83 #include <sys/kdi.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/systm.h>
86 #include <sys/note.h>
87
88 #ifdef __xpv
89 #include <sys/hypervisor.h>
90 #include <vm/as.h>
91 #endif
92
93 #include <sys/promif.h>
94 #include <sys/bootinfo.h>
95 #include <vm/kboot_mmu.h>
96 #include <vm/hat_pte.h>
97
98 /*
99 * cpu0 and default tables and structures.
100 */
101 user_desc_t *gdt0;
102 #if !defined(__xpv)
103 desctbr_t gdt0_default_r;
104 #endif
105
106 gate_desc_t *idt0; /* interrupt descriptor table */
112
113 #if defined(__i386)
114 tss_t *dftss0; /* #DF double-fault exception */
115 #endif /* __i386 */
116
117 user_desc_t zero_udesc; /* base zero user desc native procs */
118 user_desc_t null_udesc; /* null user descriptor */
119 system_desc_t null_sdesc; /* null system descriptor */
120
121 #if defined(__amd64)
122 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
123 #endif /* __amd64 */
124
125 #if defined(__amd64)
126 user_desc_t ucs_on;
127 user_desc_t ucs_off;
128 user_desc_t ucs32_on;
129 user_desc_t ucs32_off;
130 #endif /* __amd64 */
131
132 /*
133 * If the size of this is changed, you must update hat_pcp_setup() and the
134 * definitions in exception.s
135 */
136 extern char dblfault_stack0[DEFAULTSTKSZ];
137 extern char nmi_stack0[DEFAULTSTKSZ];
138 extern char mce_stack0[DEFAULTSTKSZ];
139
140 extern void fast_null(void);
141 extern hrtime_t get_hrtime(void);
142 extern hrtime_t gethrvtime(void);
143 extern hrtime_t get_hrestime(void);
144 extern uint64_t getlgrp(void);
145
146 void (*(fasttable[]))(void) = {
147 fast_null, /* T_FNULL routine */
148 fast_null, /* T_FGETFP routine (initially null) */
149 fast_null, /* T_FSETFP routine (initially null) */
150 (void (*)())get_hrtime, /* T_GETHRTIME */
151 (void (*)())gethrvtime, /* T_GETHRVTIME */
152 (void (*)())get_hrestime, /* T_GETHRESTIME */
153 (void (*)())getlgrp /* T_GETLGRP */
154 };
155
156 /*
157 * Structure containing pre-computed descriptors to allow us to temporarily
158 * interpose on a standard handler.
299 dp->ssd_dpl = dpl;
300 dp->ssd_p = 1;
301 dp->ssd_gran = 0; /* force byte units */
302 }
303
304 void *
305 get_ssd_base(system_desc_t *dp)
306 {
307 uintptr_t base;
308
309 base = (uintptr_t)dp->ssd_lobase |
310 (uintptr_t)dp->ssd_midbase << 16 |
311 (uintptr_t)dp->ssd_hibase << (16 + 8);
312 return ((void *)base);
313 }
314
315 #endif /* __i386 */
316
317 /*
318 * Install gate segment descriptor for interrupt, trap, call and task gates.
319 *
320 * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
321 * all interrupts. We have different ISTs for each class of exceptions that are
322 * most likely to occur while handling an existing exception; while many of
323 * these are just going to panic, it's nice not to trample on the existing
324 * exception state for debugging purposes.
325 *
326 * Normal interrupts are all redirected unconditionally to the KPTI trampoline
327 * stack space. This unifies the trampoline handling between user and kernel
328 * space (and avoids the need to touch %gs).
329 *
330 * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
331 * we do a read from KMDB that cause another #PF. Without its own IST, this
332 * would stomp on the kernel's mcpu_kpti_flt frame.
333 */
334 uint_t
335 idt_vector_to_ist(uint_t vector)
336 {
337 #if defined(__xpv)
338 _NOTE(ARGUNUSED(vector));
339 return (IST_NONE);
340 #else
341 switch (vector) {
342 /* These should always use IST even without KPTI enabled. */
343 case T_DBLFLT:
344 return (IST_DF);
345 case T_NMIFLT:
346 return (IST_NMI);
347 case T_MCE:
348 return (IST_MCE);
349
350 case T_BPTFLT:
351 case T_SGLSTP:
352 if (kpti_enable == 1) {
353 return (IST_DBG);
354 }
355 return (IST_NONE);
356 case T_STKFLT:
357 case T_GPFLT:
358 case T_PGFLT:
359 if (kpti_enable == 1) {
360 return (IST_NESTABLE);
361 }
362 return (IST_NONE);
363 default:
364 if (kpti_enable == 1) {
365 return (IST_DEFAULT);
366 }
367 return (IST_NONE);
368 }
369 #endif
370 }
371
372 void
373 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
374 uint_t type, uint_t dpl, uint_t ist)
375 {
376 dp->sgd_looffset = (uintptr_t)func;
377 dp->sgd_hioffset = (uintptr_t)func >> 16;
378 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
379 dp->sgd_selector = (uint16_t)sel;
380 dp->sgd_ist = ist;
381 dp->sgd_type = type;
382 dp->sgd_dpl = dpl;
383 dp->sgd_p = 1;
384 }
385
386 /*
387 * Updates a single user descriptor in the the GDT of the current cpu.
388 * Caller is responsible for preventing cpu migration.
389 */
390
391 void
392 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
393 {
394 #if defined(__xpv)
395
396 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
397
398 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
399 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
400
401 #else /* __xpv */
402
403 CPU->cpu_gdt[sidx] = *udp;
404
405 #endif /* __xpv */
922 #endif /* __xpv */
923 #endif /* __i386 */
924
925 /*
926 * Build kernel IDT.
927 *
928 * Note that for amd64 we pretty much require every gate to be an interrupt
929 * gate which blocks interrupts atomically on entry; that's because of our
930 * dependency on using 'swapgs' every time we come into the kernel to find
931 * the cpu structure. If we get interrupted just before doing that, %cs could
932 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
933 * %gsbase is really still pointing at something in userland. Bad things will
934 * ensue. We also use interrupt gates for i386 as well even though this is not
935 * required for some traps.
936 *
937 * Perhaps they should have invented a trap gate that does an atomic swapgs?
938 */
939 static void
940 init_idt_common(gate_desc_t *idt)
941 {
942 set_gatesegd(&idt[T_ZERODIV],
943 (kpti_enable == 1) ? &tr_div0trap : &div0trap,
944 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
945 set_gatesegd(&idt[T_SGLSTP],
946 (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
947 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
948 set_gatesegd(&idt[T_NMIFLT],
949 (kpti_enable == 1) ? &tr_nmiint : &nmiint,
950 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
951 set_gatesegd(&idt[T_BPTFLT],
952 (kpti_enable == 1) ? &tr_brktrap : &brktrap,
953 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
954 set_gatesegd(&idt[T_OVFLW],
955 (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
956 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
957 set_gatesegd(&idt[T_BOUNDFLT],
958 (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
959 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
960 set_gatesegd(&idt[T_ILLINST],
961 (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
962 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
963 set_gatesegd(&idt[T_NOEXTFLT],
964 (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
965 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
966
967 /*
968 * double fault handler.
969 *
970 * Note that on the hypervisor a guest does not receive #df faults.
971 * Instead a failsafe event is injected into the guest if its selectors
972 * and/or stack is in a broken state. See xen_failsafe_callback.
973 */
974 #if !defined(__xpv)
975 set_gatesegd(&idt[T_DBLFLT],
976 (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
977 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
978 #endif /* !__xpv */
979
980 /*
981 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
982 */
983 set_gatesegd(&idt[T_TSSFLT],
984 (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
985 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
986 set_gatesegd(&idt[T_SEGFLT],
987 (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
988 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
989 set_gatesegd(&idt[T_STKFLT],
990 (kpti_enable == 1) ? &tr_stktrap : &stktrap,
991 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
992 set_gatesegd(&idt[T_GPFLT],
993 (kpti_enable == 1) ? &tr_gptrap : &gptrap,
994 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
995 set_gatesegd(&idt[T_PGFLT],
996 (kpti_enable == 1) ? &tr_pftrap : &pftrap,
997 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
998 set_gatesegd(&idt[T_EXTERRFLT],
999 (kpti_enable == 1) ? &tr_ndperr : &ndperr,
1000 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
1001 set_gatesegd(&idt[T_ALIGNMENT],
1002 (kpti_enable == 1) ? &tr_achktrap : &achktrap,
1003 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
1004 set_gatesegd(&idt[T_MCE],
1005 (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
1006 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
1007 set_gatesegd(&idt[T_SIMDFPE],
1008 (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
1009 KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
1010
1011 /*
1012 * install fast trap handler at 210.
1013 */
1014 set_gatesegd(&idt[T_FASTTRAP],
1015 (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
1016 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
1017
1018 /*
1019 * System call handler.
1020 */
1021 set_gatesegd(&idt[T_SYSCALLINT],
1022 (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
1023 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
1024
1025 /*
1026 * Install the DTrace interrupt handler for the pid provider.
1027 */
1028 set_gatesegd(&idt[T_DTRACE_RET],
1029 (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
1030 KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
1031
1032 /*
1033 * Prepare interposing descriptor for the syscall handler
1034 * and cache copy of the default descriptor.
1035 */
1036 brand_tbl[0].ih_inum = T_SYSCALLINT;
1037 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1038
1039 set_gatesegd(&(brand_tbl[0].ih_interp_desc),
1040 (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
1041 &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
1042 idt_vector_to_ist(T_SYSCALLINT));
1043
1044 brand_tbl[1].ih_inum = 0;
1045 }
1046
1047 #if defined(__xpv)
1048
1049 static void
1050 init_idt(gate_desc_t *idt)
1051 {
1052 init_idt_common(idt);
1053 }
1054
1055 #else /* __xpv */
1056
1057 static void
1058 init_idt(gate_desc_t *idt)
1059 {
1060 char ivctname[80];
1061 void (*ivctptr)(void);
1062 int i;
1063
1064 /*
1065 * Initialize entire table with 'reserved' trap and then overwrite
1066 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1067 * since it can only be generated on a 386 processor. 15 is also
1068 * unsupported and reserved.
1069 */
1070 #if !defined(__xpv)
1071 for (i = 0; i < NIDT; i++) {
1072 set_gatesegd(&idt[i],
1073 (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
1074 KCS_SEL, SDT_SYSIGT, TRP_KPL,
1075 idt_vector_to_ist(T_RESVTRAP));
1076 }
1077 #else
1078 for (i = 0; i < NIDT; i++) {
1079 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1080 IST_NONE);
1081 }
1082 #endif
1083
1084 /*
1085 * 20-31 reserved
1086 */
1087 #if !defined(__xpv)
1088 for (i = 20; i < 32; i++) {
1089 set_gatesegd(&idt[i],
1090 (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
1091 KCS_SEL, SDT_SYSIGT, TRP_KPL,
1092 idt_vector_to_ist(T_INVALTRAP));
1093 }
1094 #else
1095 for (i = 20; i < 32; i++) {
1096 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1097 IST_NONE);
1098 }
1099 #endif
1100
1101 /*
1102 * interrupts 32 - 255
1103 */
1104 for (i = 32; i < 256; i++) {
1105 #if !defined(__xpv)
1106 (void) snprintf(ivctname, sizeof (ivctname),
1107 (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
1108 #else
1109 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1110 #endif
1111 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1112 if (ivctptr == NULL)
1113 panic("kobj_getsymvalue(%s) failed", ivctname);
1114
1115 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1116 idt_vector_to_ist(i));
1117 }
1118
1119 /*
1120 * Now install the common ones. Note that it will overlay some
1121 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1122 */
1123 init_idt_common(idt);
1124 }
1125
1126 #endif /* __xpv */
1127
1128 /*
1129 * The kernel does not deal with LDTs unless a user explicitly creates
1130 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1131 * to reference the LDT will therefore cause a #gp. System calls made via the
1132 * obsolete lcall mechanism are emulated by the #gp fault handler.
1133 */
1134 static void
1135 init_ldt(void)
1136 {
1137 #if defined(__xpv)
1138 xen_set_ldt(NULL, 0);
1139 #else
1140 wr_ldtr(0);
1141 #endif
1142 }
1143
1144 #if !defined(__xpv)
1145
1146 static void
1147 init_tss(void)
1148 {
1149 extern struct cpu cpus[];
1150
1151 /*
1152 * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
1153 * context switch but it'll be overwritten with this same value anyway.
1154 */
1155 if (kpti_enable == 1) {
1156 ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
1157 }
1158
1159 /* Set up the IST stacks for double fault, NMI, MCE. */
1160 ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1161 ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
1162 ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
1163
1164 /*
1165 * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
1166 * enabled), and also for KDI (always).
1167 */
1168 ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
1169
1170 if (kpti_enable == 1) {
1171 /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
1172 ktss0->tss_ist5 =
1173 (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
1174
1175 /* This IST stack is used for all other intrs (for KPTI). */
1176 ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
1177 }
1178
1179 /*
1180 * Set I/O bit map offset equal to size of TSS segment limit
1181 * for no I/O permission map. This will force all user I/O
1182 * instructions to generate #gp fault.
1183 */
1184 ktss0->tss_bitmapbase = sizeof (*ktss0);
1185
1186 /*
1187 * Point %tr to descriptor for ktss0 in gdt.
1188 */
1189 wr_tsr(KTSS_SEL);
1190 }
1191
1192 #endif /* !__xpv */
1193
1194 #if defined(__xpv)
1195
1196 void
1197 init_desctbls(void)
1198 {
1199 uint_t vec;
1200 user_desc_t *gdt;
1201
1202 /*
1203 * Setup and install our GDT.
1204 */
1205 gdt = init_gdt();
1206
1207 /*
1208 * Store static pa of gdt to speed up pa_to_ma() translations
1209 * on lwp context switches.
1210 */
1211 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1263 PAGESIZE, PAGESIZE);
1264 bzero(ktss0, PAGESIZE);
1265
1266 #if defined(__i386)
1267 #if !defined(__lint)
1268 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1269 #endif
1270 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1271 PAGESIZE, PAGESIZE);
1272 bzero(dftss0, PAGESIZE);
1273 #endif
1274
1275 /*
1276 * Setup and install our GDT.
1277 */
1278 gdt = init_gdt();
1279 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1280 CPU->cpu_gdt = gdt;
1281
1282 /*
1283 * Initialize this CPU's LDT.
1284 */
1285 CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
1286 LDT_CPU_SIZE, PAGESIZE);
1287 bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
1288 CPU->cpu_m.mcpu_ldt_len = 0;
1289
1290 /*
1291 * Setup and install our IDT.
1292 */
1293 init_idt(idt0);
1294
1295 idtr.dtr_base = (uintptr_t)idt0;
1296 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1297 wr_idtr(&idtr);
1298 CPU->cpu_idt = idt0;
1299
1300 #if defined(__i386)
1301 /*
1302 * We maintain a description of idt0 in convenient IDTR format
1303 * for #pf's on some older pentium processors. See pentium_pftrap().
1304 */
1305 idt0_default_r = idtr;
1306 #endif /* __i386 */
1307
1308 init_tss();
1309 CPU->cpu_tss = ktss0;
1310 init_ldt();
1311
1312 /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
1313 kpti_safe_cr3 = (uint64_t)getcr3();
1314 }
1315
1316 #endif /* __xpv */
1317
1318 /*
1319 * In the early kernel, we need to set up a simple GDT to run on.
1320 *
1321 * XXPV Can dboot use this too? See dboot_gdt.s
1322 */
1323 void
1324 init_boot_gdt(user_desc_t *bgdt)
1325 {
1326 #if defined(__amd64)
1327 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1328 SDP_PAGES, SDP_OP32);
1329 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1330 SDP_PAGES, SDP_OP32);
1331 #elif defined(__i386)
1332 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1333 SDP_PAGES, SDP_OP32);
1354 #if defined(__xpv)
1355 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1356 brand_tbl[i].ih_inum);
1357 #endif
1358 }
1359
1360 #if defined(__amd64)
1361 #if defined(__xpv)
1362
1363 /*
1364 * Currently the hypervisor only supports 64-bit syscalls via
1365 * syscall instruction. The 32-bit syscalls are handled by
1366 * interrupt gate above.
1367 */
1368 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1369 CALLBACKF_mask_events);
1370
1371 #else
1372
1373 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1374 if (kpti_enable == 1) {
1375 wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
1376 wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
1377 } else {
1378 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1379 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1380 }
1381 }
1382
1383 #endif
1384 #endif /* __amd64 */
1385
1386 if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1387 if (kpti_enable == 1) {
1388 wrmsr(MSR_INTC_SEP_EIP,
1389 (uintptr_t)tr_brand_sys_sysenter);
1390 } else {
1391 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1392 }
1393 }
1394 }
1395
1396 /*
1397 * Disable interpositioning on the system call path by rewriting the
1398 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1399 * the standard entry points, which bypass the interpositioning hooks.
1400 */
1401 void
1402 brand_interpositioning_disable(void)
1403 {
1404 gate_desc_t *idt = CPU->cpu_idt;
1405 int i;
1406
1407 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1408
1409 for (i = 0; brand_tbl[i].ih_inum; i++) {
1410 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1411 #if defined(__xpv)
1412 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1413 brand_tbl[i].ih_inum);
1414 #endif
1415 }
1416
1417 #if defined(__amd64)
1418 #if defined(__xpv)
1419
1420 /*
1421 * See comment above in brand_interpositioning_enable.
1422 */
1423 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1424 CALLBACKF_mask_events);
1425
1426 #else
1427
1428 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1429 if (kpti_enable == 1) {
1430 wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
1431 wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
1432 } else {
1433 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1434 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1435 }
1436 }
1437
1438 #endif
1439 #endif /* __amd64 */
1440
1441 if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1442 if (kpti_enable == 1) {
1443 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
1444 } else {
1445 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1446 }
1447 }
1448 }
|