1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 */
29
30 /*
31 * Copyright (c) 1992 Terrence R. Lambert.
32 * Copyright (c) 1990 The Regents of the University of California.
33 * All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * William Jolitz.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 */
68
69 #include <sys/types.h>
70 #include <sys/sysmacros.h>
71 #include <sys/tss.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/bootconf.h>
76 #include <sys/x86_archext.h>
77 #include <sys/controlregs.h>
78 #include <sys/archsystm.h>
79 #include <sys/machsystm.h>
80 #include <sys/kobj.h>
81 #include <sys/cmn_err.h>
82 #include <sys/reboot.h>
83 #include <sys/kdi.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/systm.h>
86
87 #ifdef __xpv
88 #include <sys/hypervisor.h>
89 #include <vm/as.h>
90 #endif
91
92 #include <sys/promif.h>
93 #include <sys/bootinfo.h>
94 #include <vm/kboot_mmu.h>
95 #include <vm/hat_pte.h>
96
97 /*
98 * cpu0 and default tables and structures.
99 */
100 user_desc_t *gdt0;
101 #if !defined(__xpv)
102 desctbr_t gdt0_default_r;
103 #endif
104
105 gate_desc_t *idt0; /* interrupt descriptor table */
106 #if defined(__i386)
107 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */
108 #endif
109
110 tss_t *ktss0; /* kernel task state structure */
111
112 #if defined(__i386)
113 tss_t *dftss0; /* #DF double-fault exception */
114 #endif /* __i386 */
115
116 user_desc_t zero_udesc; /* base zero user desc native procs */
117 user_desc_t null_udesc; /* null user descriptor */
118 system_desc_t null_sdesc; /* null system descriptor */
119
120 #if defined(__amd64)
121 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
122 #endif /* __amd64 */
123
124 #if defined(__amd64)
125 user_desc_t ucs_on;
126 user_desc_t ucs_off;
127 user_desc_t ucs32_on;
128 user_desc_t ucs32_off;
129 #endif /* __amd64 */
130
131 #pragma align 16(dblfault_stack0)
132 char dblfault_stack0[DEFAULTSTKSZ];
133
134 extern void fast_null(void);
135 extern hrtime_t get_hrtime(void);
136 extern hrtime_t gethrvtime(void);
137 extern hrtime_t get_hrestime(void);
138 extern uint64_t getlgrp(void);
139
140 void (*(fasttable[]))(void) = {
141 fast_null, /* T_FNULL routine */
142 fast_null, /* T_FGETFP routine (initially null) */
143 fast_null, /* T_FSETFP routine (initially null) */
144 (void (*)())get_hrtime, /* T_GETHRTIME */
145 (void (*)())gethrvtime, /* T_GETHRVTIME */
146 (void (*)())get_hrestime, /* T_GETHRESTIME */
147 (void (*)())getlgrp /* T_GETLGRP */
148 };
149
150 /*
151 * Structure containing pre-computed descriptors to allow us to temporarily
152 * interpose on a standard handler.
153 */
154 struct interposing_handler {
155 int ih_inum;
156 gate_desc_t ih_interp_desc;
157 gate_desc_t ih_default_desc;
158 };
159
160 /*
161 * The brand infrastructure interposes on two handlers, and we use one as a
162 * NULL signpost.
163 */
164 static struct interposing_handler brand_tbl[3];
165
166 /*
167 * software prototypes for default local descriptor table
168 */
169
170 /*
171 * Routines for loading segment descriptors in format the hardware
172 * can understand.
173 */
174
175 #if defined(__amd64)
176
177 /*
178 * In long mode we have the new L or long mode attribute bit
179 * for code segments. Only the conforming bit in type is used along
180 * with descriptor priority and present bits. Default operand size must
181 * be zero when in long mode. In 32-bit compatibility mode all fields
182 * are treated as in legacy mode. For data segments while in long mode
183 * only the present bit is loaded.
184 */
185 void
186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
187 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
188 {
189 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
190
191 /*
192 * 64-bit long mode.
193 */
194 if (lmode == SDP_LONG)
195 dp->usd_def32 = 0; /* 32-bit operands only */
196 else
197 /*
198 * 32-bit compatibility mode.
199 */
200 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
201
202 dp->usd_long = lmode; /* 64-bit mode */
203 dp->usd_type = type;
204 dp->usd_dpl = dpl;
205 dp->usd_p = 1;
206 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
207
208 dp->usd_lobase = (uintptr_t)base;
209 dp->usd_midbase = (uintptr_t)base >> 16;
210 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
211 dp->usd_lolimit = size;
212 dp->usd_hilimit = (uintptr_t)size >> 16;
213 }
214
215 #elif defined(__i386)
216
217 /*
218 * Install user segment descriptor for code and data.
219 */
220 void
221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
222 uint_t dpl, uint_t gran, uint_t defopsz)
223 {
224 dp->usd_lolimit = size;
225 dp->usd_hilimit = (uintptr_t)size >> 16;
226
227 dp->usd_lobase = (uintptr_t)base;
228 dp->usd_midbase = (uintptr_t)base >> 16;
229 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
230
231 dp->usd_type = type;
232 dp->usd_dpl = dpl;
233 dp->usd_p = 1;
234 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */
235 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
236 }
237
238 #endif /* __i386 */
239
240 /*
241 * Install system segment descriptor for LDT and TSS segments.
242 */
243
244 #if defined(__amd64)
245
246 void
247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
248 uint_t dpl)
249 {
250 dp->ssd_lolimit = size;
251 dp->ssd_hilimit = (uintptr_t)size >> 16;
252
253 dp->ssd_lobase = (uintptr_t)base;
254 dp->ssd_midbase = (uintptr_t)base >> 16;
255 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
256 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
257
258 dp->ssd_type = type;
259 dp->ssd_zero1 = 0; /* must be zero */
260 dp->ssd_zero2 = 0;
261 dp->ssd_dpl = dpl;
262 dp->ssd_p = 1;
263 dp->ssd_gran = 0; /* force byte units */
264 }
265
266 void *
267 get_ssd_base(system_desc_t *dp)
268 {
269 uintptr_t base;
270
271 base = (uintptr_t)dp->ssd_lobase |
272 (uintptr_t)dp->ssd_midbase << 16 |
273 (uintptr_t)dp->ssd_hibase << (16 + 8) |
274 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
275 return ((void *)base);
276 }
277
278 #elif defined(__i386)
279
280 void
281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
282 uint_t dpl)
283 {
284 dp->ssd_lolimit = size;
285 dp->ssd_hilimit = (uintptr_t)size >> 16;
286
287 dp->ssd_lobase = (uintptr_t)base;
288 dp->ssd_midbase = (uintptr_t)base >> 16;
289 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
290
291 dp->ssd_type = type;
292 dp->ssd_zero = 0; /* must be zero */
293 dp->ssd_dpl = dpl;
294 dp->ssd_p = 1;
295 dp->ssd_gran = 0; /* force byte units */
296 }
297
298 void *
299 get_ssd_base(system_desc_t *dp)
300 {
301 uintptr_t base;
302
303 base = (uintptr_t)dp->ssd_lobase |
304 (uintptr_t)dp->ssd_midbase << 16 |
305 (uintptr_t)dp->ssd_hibase << (16 + 8);
306 return ((void *)base);
307 }
308
309 #endif /* __i386 */
310
311 /*
312 * Install gate segment descriptor for interrupt, trap, call and task gates.
313 */
314
315 #if defined(__amd64)
316
317 /*ARGSUSED*/
318 void
319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
320 uint_t type, uint_t dpl, uint_t vector)
321 {
322 dp->sgd_looffset = (uintptr_t)func;
323 dp->sgd_hioffset = (uintptr_t)func >> 16;
324 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
325
326 dp->sgd_selector = (uint16_t)sel;
327
328 /*
329 * For 64 bit native we use the IST stack mechanism
330 * for double faults. All other traps use the CPL = 0
331 * (tss_rsp0) stack.
332 */
333 #if !defined(__xpv)
334 if (vector == T_DBLFLT)
335 dp->sgd_ist = 1;
336 else
337 #endif
338 dp->sgd_ist = 0;
339
340 dp->sgd_type = type;
341 dp->sgd_dpl = dpl;
342 dp->sgd_p = 1;
343 }
344
345 #elif defined(__i386)
346
347 /*ARGSUSED*/
348 void
349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350 uint_t type, uint_t dpl, uint_t unused)
351 {
352 dp->sgd_looffset = (uintptr_t)func;
353 dp->sgd_hioffset = (uintptr_t)func >> 16;
354
355 dp->sgd_selector = (uint16_t)sel;
356 dp->sgd_stkcpy = 0; /* always zero bytes */
357 dp->sgd_type = type;
358 dp->sgd_dpl = dpl;
359 dp->sgd_p = 1;
360 }
361
362 #endif /* __i386 */
363
364 /*
365 * Updates a single user descriptor in the the GDT of the current cpu.
366 * Caller is responsible for preventing cpu migration.
367 */
368
369 void
370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 {
372 #if defined(__xpv)
373
374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375
376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378
379 #else /* __xpv */
380
381 CPU->cpu_gdt[sidx] = *udp;
382
383 #endif /* __xpv */
384 }
385
386 /*
387 * Writes single descriptor pointed to by udp into a processes
388 * LDT entry pointed to by ldp.
389 */
390 int
391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392 {
393 #if defined(__xpv)
394
395 uint64_t dpa;
396
397 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398 ((uintptr_t)ldp & PAGEOFFSET);
399
400 /*
401 * The hypervisor is a little more restrictive about what it
402 * supports in the LDT.
403 */
404 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405 return (EINVAL);
406
407 #else /* __xpv */
408
409 *ldp = *udp;
410
411 #endif /* __xpv */
412 return (0);
413 }
414
415 #if defined(__xpv)
416
417 /*
418 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419 * Returns true if a valid entry was written.
420 */
421 int
422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423 {
424 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
425
426 /*
427 * skip holes in the IDT
428 */
429 if (GATESEG_GETOFFSET(sgd) == 0)
430 return (0);
431
432 ASSERT(sgd->sgd_type == SDT_SYSIGT);
433 ti->vector = vec;
434 TI_SET_DPL(ti, sgd->sgd_dpl);
435
436 /*
437 * Is this an interrupt gate?
438 */
439 if (sgd->sgd_type == SDT_SYSIGT) {
440 /* LINTED */
441 TI_SET_IF(ti, 1);
442 }
443 ti->cs = sgd->sgd_selector;
444 #if defined(__amd64)
445 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
446 #endif
447 ti->address = GATESEG_GETOFFSET(sgd);
448 return (1);
449 }
450
451 /*
452 * Convert a single hw format gate descriptor and write it into our virtual IDT.
453 */
454 void
455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
456 {
457 trap_info_t trapinfo[2];
458
459 bzero(trapinfo, sizeof (trapinfo));
460 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461 return;
462 if (xen_set_trap_table(trapinfo) != 0)
463 panic("xen_idt_write: xen_set_trap_table() failed");
464 }
465
466 #endif /* __xpv */
467
468 #if defined(__amd64)
469
470 /*
471 * Build kernel GDT.
472 */
473
474 static void
475 init_gdt_common(user_desc_t *gdt)
476 {
477 int i;
478
479 /*
480 * 64-bit kernel code segment.
481 */
482 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
483 SDP_PAGES, SDP_OP32);
484
485 /*
486 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
487 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
488 * instruction to return from system calls back to 32-bit applications.
489 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
490 * descriptors. We therefore must ensure that the kernel uses something,
491 * though it will be ignored by hardware, that is compatible with 32-bit
492 * apps. For the same reason we must set the default op size of this
493 * descriptor to 32-bit operands.
494 */
495 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
496 SEL_KPL, SDP_PAGES, SDP_OP32);
497 gdt[GDT_KDATA].usd_def32 = 1;
498
499 /*
500 * 64-bit user code segment.
501 */
502 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
503 SDP_PAGES, SDP_OP32);
504
505 /*
506 * 32-bit user code segment.
507 */
508 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
509 SEL_UPL, SDP_PAGES, SDP_OP32);
510
511 /*
512 * See gdt_ucode32() and gdt_ucode_native().
513 */
514 ucs_on = ucs_off = gdt[GDT_UCODE];
515 ucs_off.usd_p = 0; /* forces #np fault */
516
517 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518 ucs32_off.usd_p = 0; /* forces #np fault */
519
520 /*
521 * 32 and 64 bit data segments can actually share the same descriptor.
522 * In long mode only the present bit is checked but all other fields
523 * are loaded. But in compatibility mode all fields are interpreted
524 * as in legacy mode so they must be set correctly for a 32-bit data
525 * segment.
526 */
527 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
528 SDP_PAGES, SDP_OP32);
529
530 #if !defined(__xpv)
531
532 /*
533 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
534 * in the GDT is 0.
535 */
536
537 /*
538 * Kernel TSS
539 */
540 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
541 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
542
543 #endif /* !__xpv */
544
545 /*
546 * Initialize fs and gs descriptors for 32 bit processes.
547 * Only attributes and limits are initialized, the effective
548 * base address is programmed via fsbase/gsbase.
549 */
550 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
551 SEL_UPL, SDP_PAGES, SDP_OP32);
552 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
553 SEL_UPL, SDP_PAGES, SDP_OP32);
554
555 /*
556 * Initialize the descriptors set aside for brand usage.
557 * Only attributes and limits are initialized.
558 */
559 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
560 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
561 SEL_UPL, SDP_PAGES, SDP_OP32);
562
563 /*
564 * Initialize convenient zero base user descriptors for clearing
565 * lwp private %fs and %gs descriptors in GDT. See setregs() for
566 * an example.
567 */
568 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
569 SDP_BYTES, SDP_OP32);
570 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
571 SDP_PAGES, SDP_OP32);
572 }
573
574 #if defined(__xpv)
575
576 static user_desc_t *
577 init_gdt(void)
578 {
579 uint64_t gdtpa;
580 ulong_t ma[1]; /* XXPV should be a memory_t */
581 ulong_t addr;
582
583 #if !defined(__lint)
584 /*
585 * Our gdt is never larger than a single page.
586 */
587 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588 #endif
589 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590 PAGESIZE, PAGESIZE);
591 bzero(gdt0, PAGESIZE);
592
593 init_gdt_common(gdt0);
594
595 /*
596 * XXX Since we never invoke kmdb until after the kernel takes
597 * over the descriptor tables why not have it use the kernel's
598 * selectors?
599 */
600 if (boothowto & RB_DEBUG) {
601 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
602 SEL_KPL, SDP_PAGES, SDP_OP32);
603 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
604 SEL_KPL, SDP_PAGES, SDP_OP32);
605 }
606
607 /*
608 * Clear write permission for page containing the gdt and install it.
609 */
610 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
611 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
612 kbm_read_only((uintptr_t)gdt0, gdtpa);
613 xen_set_gdt(ma, NGDT);
614
615 /*
616 * Reload the segment registers to use the new GDT.
617 * On 64-bit, fixup KCS_SEL to be in ring 3.
618 * See KCS_SEL in segments.h.
619 */
620 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
621
622 /*
623 * setup %gs for kernel
624 */
625 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
626
627 /*
628 * XX64 We should never dereference off "other gsbase" or
629 * "fsbase". So, we should arrange to point FSBASE and
630 * KGSBASE somewhere truly awful e.g. point it at the last
631 * valid address below the hole so that any attempts to index
632 * off them cause an exception.
633 *
634 * For now, point it at 8G -- at least it should be unmapped
635 * until some 64-bit processes run.
636 */
637 addr = 0x200000000ul;
638 xen_set_segment_base(SEGBASE_FS, addr);
639 xen_set_segment_base(SEGBASE_GS_USER, addr);
640 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
641
642 return (gdt0);
643 }
644
645 #else /* __xpv */
646
647 static user_desc_t *
648 init_gdt(void)
649 {
650 desctbr_t r_bgdt, r_gdt;
651 user_desc_t *bgdt;
652
653 #if !defined(__lint)
654 /*
655 * Our gdt is never larger than a single page.
656 */
657 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
658 #endif
659 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
660 PAGESIZE, PAGESIZE);
661 bzero(gdt0, PAGESIZE);
662
663 init_gdt_common(gdt0);
664
665 /*
666 * Copy in from boot's gdt to our gdt.
667 * Entry 0 is the null descriptor by definition.
668 */
669 rd_gdtr(&r_bgdt);
670 bgdt = (user_desc_t *)r_bgdt.dtr_base;
671 if (bgdt == NULL)
672 panic("null boot gdt");
673
674 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
675 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
676 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
677 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
678 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
679
680 /*
681 * Install our new GDT
682 */
683 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
684 r_gdt.dtr_base = (uintptr_t)gdt0;
685 wr_gdtr(&r_gdt);
686
687 /*
688 * Reload the segment registers to use the new GDT
689 */
690 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
691
692 /*
693 * setup %gs for kernel
694 */
695 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
696
697 /*
698 * XX64 We should never dereference off "other gsbase" or
699 * "fsbase". So, we should arrange to point FSBASE and
700 * KGSBASE somewhere truly awful e.g. point it at the last
701 * valid address below the hole so that any attempts to index
702 * off them cause an exception.
703 *
704 * For now, point it at 8G -- at least it should be unmapped
705 * until some 64-bit processes run.
706 */
707 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
708 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
709 return (gdt0);
710 }
711
712 #endif /* __xpv */
713
714 #elif defined(__i386)
715
716 static void
717 init_gdt_common(user_desc_t *gdt)
718 {
719 int i;
720
721 /*
722 * Text and data for both kernel and user span entire 32 bit
723 * address space.
724 */
725
726 /*
727 * kernel code segment.
728 */
729 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
730 SDP_OP32);
731
732 /*
733 * kernel data segment.
734 */
735 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
736 SDP_OP32);
737
738 /*
739 * user code segment.
740 */
741 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
742 SDP_OP32);
743
744 /*
745 * user data segment.
746 */
747 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
748 SDP_OP32);
749
750 #if !defined(__xpv)
751
752 /*
753 * TSS for T_DBLFLT (double fault) handler
754 */
755 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
756 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
757
758 /*
759 * TSS for kernel
760 */
761 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
762 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
763
764 #endif /* !__xpv */
765
766 /*
767 * %gs selector for kernel
768 */
769 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
770 SEL_KPL, SDP_BYTES, SDP_OP32);
771
772 /*
773 * Initialize lwp private descriptors.
774 * Only attributes and limits are initialized, the effective
775 * base address is programmed via fsbase/gsbase.
776 */
777 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
778 SDP_PAGES, SDP_OP32);
779 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
780 SDP_PAGES, SDP_OP32);
781
782 /*
783 * Initialize the descriptors set aside for brand usage.
784 * Only attributes and limits are initialized.
785 */
786 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
787 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
788 SDP_PAGES, SDP_OP32);
789 /*
790 * Initialize convenient zero base user descriptor for clearing
791 * lwp private %fs and %gs descriptors in GDT. See setregs() for
792 * an example.
793 */
794 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
795 SDP_BYTES, SDP_OP32);
796 }
797
798 #if defined(__xpv)
799
800 static user_desc_t *
801 init_gdt(void)
802 {
803 uint64_t gdtpa;
804 ulong_t ma[1]; /* XXPV should be a memory_t */
805
806 #if !defined(__lint)
807 /*
808 * Our gdt is never larger than a single page.
809 */
810 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
811 #endif
812 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
813 PAGESIZE, PAGESIZE);
814 bzero(gdt0, PAGESIZE);
815
816 init_gdt_common(gdt0);
817 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
818
819 /*
820 * XXX Since we never invoke kmdb until after the kernel takes
821 * over the descriptor tables why not have it use the kernel's
822 * selectors?
823 */
824 if (boothowto & RB_DEBUG) {
825 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
826 SDP_PAGES, SDP_OP32);
827 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
828 SDP_PAGES, SDP_OP32);
829 }
830
831 /*
832 * Clear write permission for page containing the gdt and install it.
833 */
834 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
835 kbm_read_only((uintptr_t)gdt0, gdtpa);
836 xen_set_gdt(ma, NGDT);
837
838 /*
839 * Reload the segment registers to use the new GDT
840 */
841 load_segment_registers(
842 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
843
844 return (gdt0);
845 }
846
847 #else /* __xpv */
848
849 static user_desc_t *
850 init_gdt(void)
851 {
852 desctbr_t r_bgdt, r_gdt;
853 user_desc_t *bgdt;
854
855 #if !defined(__lint)
856 /*
857 * Our gdt is never larger than a single page.
858 */
859 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
860 #endif
861 /*
862 * XXX this allocation belongs in our caller, not here.
863 */
864 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
865 PAGESIZE, PAGESIZE);
866 bzero(gdt0, PAGESIZE);
867
868 init_gdt_common(gdt0);
869
870 /*
871 * Copy in from boot's gdt to our gdt entries.
872 * Entry 0 is null descriptor by definition.
873 */
874 rd_gdtr(&r_bgdt);
875 bgdt = (user_desc_t *)r_bgdt.dtr_base;
876 if (bgdt == NULL)
877 panic("null boot gdt");
878
879 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
880 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
881 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
882 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
883
884 /*
885 * Install our new GDT
886 */
887 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
888 r_gdt.dtr_base = (uintptr_t)gdt0;
889 wr_gdtr(&r_gdt);
890
891 /*
892 * Reload the segment registers to use the new GDT
893 */
894 load_segment_registers(
895 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
896
897 return (gdt0);
898 }
899
900 #endif /* __xpv */
901 #endif /* __i386 */
902
903 /*
904 * Build kernel IDT.
905 *
906 * Note that for amd64 we pretty much require every gate to be an interrupt
907 * gate which blocks interrupts atomically on entry; that's because of our
908 * dependency on using 'swapgs' every time we come into the kernel to find
909 * the cpu structure. If we get interrupted just before doing that, %cs could
910 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
911 * %gsbase is really still pointing at something in userland. Bad things will
912 * ensue. We also use interrupt gates for i386 as well even though this is not
913 * required for some traps.
914 *
915 * Perhaps they should have invented a trap gate that does an atomic swapgs?
916 */
917 static void
918 init_idt_common(gate_desc_t *idt)
919 {
920 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 0);
922 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
923 0);
924 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
925 0);
926 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
927 0);
928 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
929 0);
930 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
931 TRP_KPL, 0);
932 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
933 0);
934 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
935 0);
936
937 /*
938 * double fault handler.
939 *
940 * Note that on the hypervisor a guest does not receive #df faults.
941 * Instead a failsafe event is injected into the guest if its selectors
942 * and/or stack is in a broken state. See xen_failsafe_callback.
943 */
944 #if !defined(__xpv)
945 #if defined(__amd64)
946
947 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
948 T_DBLFLT);
949
950 #elif defined(__i386)
951
952 /*
953 * task gate required.
954 */
955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
956 0);
957
958 #endif /* __i386 */
959 #endif /* !__xpv */
960
961 /*
962 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
963 */
964
965 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
966 0);
967 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
968 0);
969 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
970 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
971 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
972 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
973 0);
974 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
975 TRP_KPL, 0);
976 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
977 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
978
979 /*
980 * install "int80" handler at, well, 0x80.
981 */
982 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 0);
984
985 /*
986 * install fast trap handler at 210.
987 */
988 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
989 0);
990
991 /*
992 * System call handler.
993 */
994 #if defined(__amd64)
995 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
996 TRP_UPL, 0);
997
998 #elif defined(__i386)
999 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
1000 TRP_UPL, 0);
1001 #endif /* __i386 */
1002
1003 /*
1004 * Install the DTrace interrupt handler for the pid provider.
1005 */
1006 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1007 SDT_SYSIGT, TRP_UPL, 0);
1008
1009 /*
1010 - * Prepare interposing descriptors for the branded "int80"
1011 - * and syscall handlers and cache copies of the default
1012 - * descriptors.
1013 */
1014 brand_tbl[0].ih_inum = T_INT80;
1015 brand_tbl[0].ih_default_desc = idt0[T_INT80];
1016 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1017 SDT_SYSIGT, TRP_UPL, 0);
1018
1019 brand_tbl[1].ih_inum = T_SYSCALLINT;
1020 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
1021
1022 #if defined(__amd64)
1023 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1024 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1025 #elif defined(__i386)
1026 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1027 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1028 #endif /* __i386 */
1029
1030 brand_tbl[2].ih_inum = 0;
1031 }
1032
1033 #if defined(__xpv)
1034
1035 static void
1036 init_idt(gate_desc_t *idt)
1037 {
1038 init_idt_common(idt);
1039 }
1040
1041 #else /* __xpv */
1042
1043 static void
1044 init_idt(gate_desc_t *idt)
1045 {
1046 char ivctname[80];
1047 void (*ivctptr)(void);
1048 int i;
1049
1050 /*
1051 * Initialize entire table with 'reserved' trap and then overwrite
1052 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1053 * since it can only be generated on a 386 processor. 15 is also
1054 * unsupported and reserved.
1055 */
1056 for (i = 0; i < NIDT; i++)
1057 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1058 0);
1059
1060 /*
1061 * 20-31 reserved
1062 */
1063 for (i = 20; i < 32; i++)
1064 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1065 0);
1066
1067 /*
1068 * interrupts 32 - 255
1069 */
1070 for (i = 32; i < 256; i++) {
1071 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1072 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1073 if (ivctptr == NULL)
1074 panic("kobj_getsymvalue(%s) failed", ivctname);
1075
1076 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1077 }
1078
1079 /*
1080 * Now install the common ones. Note that it will overlay some
1081 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1082 */
1083 init_idt_common(idt);
1084 }
1085
1086 #endif /* __xpv */
1087
1088 /*
1089 * The kernel does not deal with LDTs unless a user explicitly creates
1090 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1091 * to reference the LDT will therefore cause a #gp. System calls made via the
1092 * obsolete lcall mechanism are emulated by the #gp fault handler.
1093 */
1094 static void
1095 init_ldt(void)
1096 {
1097 #if defined(__xpv)
1098 xen_set_ldt(NULL, 0);
1099 #else
1100 wr_ldtr(0);
1101 #endif
1102 }
1103
1104 #if !defined(__xpv)
1105 #if defined(__amd64)
1106
1107 static void
1108 init_tss(void)
1109 {
1110 /*
1111 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1112 * All exceptions but #DF will run on the thread stack.
1113 * Set up the double fault stack here.
1114 */
1115 ktss0->tss_ist1 =
1116 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1117
1118 /*
1119 * Set I/O bit map offset equal to size of TSS segment limit
1120 * for no I/O permission map. This will force all user I/O
1121 * instructions to generate #gp fault.
1122 */
1123 ktss0->tss_bitmapbase = sizeof (*ktss0);
1124
1125 /*
1126 * Point %tr to descriptor for ktss0 in gdt.
1127 */
1128 wr_tsr(KTSS_SEL);
1129 }
1130
1131 #elif defined(__i386)
1132
1133 static void
1134 init_tss(void)
1135 {
1136 /*
1137 * ktss0->tss_esp dynamically filled in by resume() on each
1138 * context switch.
1139 */
1140 ktss0->tss_ss0 = KDS_SEL;
1141 ktss0->tss_eip = (uint32_t)_start;
1142 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1143 ktss0->tss_cs = KCS_SEL;
1144 ktss0->tss_fs = KFS_SEL;
1145 ktss0->tss_gs = KGS_SEL;
1146 ktss0->tss_ldt = ULDT_SEL;
1147
1148 /*
1149 * Initialize double fault tss.
1150 */
1151 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1152 dftss0->tss_ss0 = KDS_SEL;
1153
1154 /*
1155 * tss_cr3 will get initialized in hat_kern_setup() once our page
1156 * tables have been setup.
1157 */
1158 dftss0->tss_eip = (uint32_t)syserrtrap;
1159 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1160 dftss0->tss_cs = KCS_SEL;
1161 dftss0->tss_ds = KDS_SEL;
1162 dftss0->tss_es = KDS_SEL;
1163 dftss0->tss_ss = KDS_SEL;
1164 dftss0->tss_fs = KFS_SEL;
1165 dftss0->tss_gs = KGS_SEL;
1166
1167 /*
1168 * Set I/O bit map offset equal to size of TSS segment limit
1169 * for no I/O permission map. This will force all user I/O
1170 * instructions to generate #gp fault.
1171 */
1172 ktss0->tss_bitmapbase = sizeof (*ktss0);
1173
1174 /*
1175 * Point %tr to descriptor for ktss0 in gdt.
1176 */
1177 wr_tsr(KTSS_SEL);
1178 }
1179
1180 #endif /* __i386 */
1181 #endif /* !__xpv */
1182
1183 #if defined(__xpv)
1184
1185 void
1186 init_desctbls(void)
1187 {
1188 uint_t vec;
1189 user_desc_t *gdt;
1190
1191 /*
1192 * Setup and install our GDT.
1193 */
1194 gdt = init_gdt();
1195
1196 /*
1197 * Store static pa of gdt to speed up pa_to_ma() translations
1198 * on lwp context switches.
1199 */
1200 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1201 CPU->cpu_gdt = gdt;
1202 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1203
1204 /*
1205 * Setup and install our IDT.
1206 */
1207 #if !defined(__lint)
1208 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1209 #endif
1210 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1211 PAGESIZE, PAGESIZE);
1212 bzero(idt0, PAGESIZE);
1213 init_idt(idt0);
1214 for (vec = 0; vec < NIDT; vec++)
1215 xen_idt_write(&idt0[vec], vec);
1216
1217 CPU->cpu_idt = idt0;
1218
1219 /*
1220 * set default kernel stack
1221 */
1222 xen_stack_switch(KDS_SEL,
1223 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1224
1225 xen_init_callbacks();
1226
1227 init_ldt();
1228 }
1229
1230 #else /* __xpv */
1231
1232 void
1233 init_desctbls(void)
1234 {
1235 user_desc_t *gdt;
1236 desctbr_t idtr;
1237
1238 /*
1239 * Allocate IDT and TSS structures on unique pages for better
1240 * performance in virtual machines.
1241 */
1242 #if !defined(__lint)
1243 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1244 #endif
1245 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1246 PAGESIZE, PAGESIZE);
1247 bzero(idt0, PAGESIZE);
1248 #if !defined(__lint)
1249 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1250 #endif
1251 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1252 PAGESIZE, PAGESIZE);
1253 bzero(ktss0, PAGESIZE);
1254
1255 #if defined(__i386)
1256 #if !defined(__lint)
1257 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1258 #endif
1259 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1260 PAGESIZE, PAGESIZE);
1261 bzero(dftss0, PAGESIZE);
1262 #endif
1263
1264 /*
1265 * Setup and install our GDT.
1266 */
1267 gdt = init_gdt();
1268 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1269 CPU->cpu_gdt = gdt;
1270
1271 /*
1272 * Setup and install our IDT.
1273 */
1274 init_idt(idt0);
1275
1276 idtr.dtr_base = (uintptr_t)idt0;
1277 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1278 wr_idtr(&idtr);
1279 CPU->cpu_idt = idt0;
1280
1281 #if defined(__i386)
1282 /*
1283 * We maintain a description of idt0 in convenient IDTR format
1284 * for #pf's on some older pentium processors. See pentium_pftrap().
1285 */
1286 idt0_default_r = idtr;
1287 #endif /* __i386 */
1288
1289 init_tss();
1290 CPU->cpu_tss = ktss0;
1291 init_ldt();
1292 }
1293
1294 #endif /* __xpv */
1295
1296 /*
1297 * In the early kernel, we need to set up a simple GDT to run on.
1298 *
1299 * XXPV Can dboot use this too? See dboot_gdt.s
1300 */
1301 void
1302 init_boot_gdt(user_desc_t *bgdt)
1303 {
1304 #if defined(__amd64)
1305 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1306 SDP_PAGES, SDP_OP32);
1307 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1308 SDP_PAGES, SDP_OP32);
1309 #elif defined(__i386)
1310 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1311 SDP_PAGES, SDP_OP32);
1312 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1313 SDP_PAGES, SDP_OP32);
1314 #endif /* __i386 */
1315 }
1316
1317 /*
1318 * Enable interpositioning on the system call path by rewriting the
1319 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1320 * the branded entry points.
1321 */
1322 void
1323 brand_interpositioning_enable(void)
1324 {
1325 gate_desc_t *idt = CPU->cpu_idt;
1326 int i;
1327
1328 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1329
1330 for (i = 0; brand_tbl[i].ih_inum; i++) {
1331 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1332 #if defined(__xpv)
1333 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1334 brand_tbl[i].ih_inum);
1335 #endif
1336 }
1337
1338 #if defined(__amd64)
1339 #if defined(__xpv)
1340
1341 /*
1342 * Currently the hypervisor only supports 64-bit syscalls via
1343 * syscall instruction. The 32-bit syscalls are handled by
1344 * interrupt gate above.
1345 */
1346 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1347 CALLBACKF_mask_events);
1348
1349 #else
1350
1351 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1352 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1353 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1354 }
1355
1356 #endif
1357 #endif /* __amd64 */
1358
1359 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1360 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1361 }
1362
1363 /*
1364 * Disable interpositioning on the system call path by rewriting the
1365 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1366 * the standard entry points, which bypass the interpositioning hooks.
1367 */
1368 void
1369 brand_interpositioning_disable(void)
1370 {
1371 gate_desc_t *idt = CPU->cpu_idt;
1372 int i;
1373
1374 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1375
1376 for (i = 0; brand_tbl[i].ih_inum; i++) {
1377 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1378 #if defined(__xpv)
1379 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1380 brand_tbl[i].ih_inum);
1381 #endif
1382 }
1383
1384 #if defined(__amd64)
1385 #if defined(__xpv)
1386
1387 /*
1388 * See comment above in brand_interpositioning_enable.
1389 */
1390 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1391 CALLBACKF_mask_events);
1392
1393 #else
1394
1395 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1396 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1397 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1398 }
1399
1400 #endif
1401 #endif /* __amd64 */
1402
1403 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1404 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1405 }