Print this page
Bring back LX zones.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/os/desctbls.c
+++ new/usr/src/uts/intel/ia32/os/desctbls.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 28 */
29 29
30 30 /*
31 31 * Copyright (c) 1992 Terrence R. Lambert.
32 32 * Copyright (c) 1990 The Regents of the University of California.
33 33 * All rights reserved.
34 34 *
35 35 * This code is derived from software contributed to Berkeley by
36 36 * William Jolitz.
37 37 *
38 38 * Redistribution and use in source and binary forms, with or without
39 39 * modification, are permitted provided that the following conditions
40 40 * are met:
41 41 * 1. Redistributions of source code must retain the above copyright
42 42 * notice, this list of conditions and the following disclaimer.
43 43 * 2. Redistributions in binary form must reproduce the above copyright
44 44 * notice, this list of conditions and the following disclaimer in the
45 45 * documentation and/or other materials provided with the distribution.
46 46 * 3. All advertising materials mentioning features or use of this software
47 47 * must display the following acknowledgement:
48 48 * This product includes software developed by the University of
49 49 * California, Berkeley and its contributors.
50 50 * 4. Neither the name of the University nor the names of its contributors
51 51 * may be used to endorse or promote products derived from this software
52 52 * without specific prior written permission.
53 53 *
54 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 64 * SUCH DAMAGE.
65 65 *
66 66 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 67 */
68 68
69 69 #include <sys/types.h>
70 70 #include <sys/sysmacros.h>
71 71 #include <sys/tss.h>
72 72 #include <sys/segments.h>
73 73 #include <sys/trap.h>
74 74 #include <sys/cpuvar.h>
75 75 #include <sys/bootconf.h>
76 76 #include <sys/x86_archext.h>
77 77 #include <sys/controlregs.h>
78 78 #include <sys/archsystm.h>
79 79 #include <sys/machsystm.h>
80 80 #include <sys/kobj.h>
81 81 #include <sys/cmn_err.h>
82 82 #include <sys/reboot.h>
83 83 #include <sys/kdi.h>
84 84 #include <sys/mach_mmu.h>
85 85 #include <sys/systm.h>
86 86
87 87 #ifdef __xpv
88 88 #include <sys/hypervisor.h>
89 89 #include <vm/as.h>
90 90 #endif
91 91
92 92 #include <sys/promif.h>
93 93 #include <sys/bootinfo.h>
94 94 #include <vm/kboot_mmu.h>
95 95 #include <vm/hat_pte.h>
96 96
97 97 /*
98 98 * cpu0 and default tables and structures.
99 99 */
100 100 user_desc_t *gdt0;
101 101 #if !defined(__xpv)
102 102 desctbr_t gdt0_default_r;
103 103 #endif
104 104
105 105 gate_desc_t *idt0; /* interrupt descriptor table */
106 106 #if defined(__i386)
107 107 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */
108 108 #endif
109 109
110 110 tss_t *ktss0; /* kernel task state structure */
111 111
112 112 #if defined(__i386)
113 113 tss_t *dftss0; /* #DF double-fault exception */
114 114 #endif /* __i386 */
115 115
116 116 user_desc_t zero_udesc; /* base zero user desc native procs */
117 117 user_desc_t null_udesc; /* null user descriptor */
118 118 system_desc_t null_sdesc; /* null system descriptor */
119 119
120 120 #if defined(__amd64)
121 121 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
122 122 #endif /* __amd64 */
123 123
124 124 #if defined(__amd64)
125 125 user_desc_t ucs_on;
126 126 user_desc_t ucs_off;
127 127 user_desc_t ucs32_on;
128 128 user_desc_t ucs32_off;
129 129 #endif /* __amd64 */
130 130
131 131 #pragma align 16(dblfault_stack0)
132 132 char dblfault_stack0[DEFAULTSTKSZ];
133 133
134 134 extern void fast_null(void);
135 135 extern hrtime_t get_hrtime(void);
136 136 extern hrtime_t gethrvtime(void);
137 137 extern hrtime_t get_hrestime(void);
138 138 extern uint64_t getlgrp(void);
139 139
140 140 void (*(fasttable[]))(void) = {
141 141 fast_null, /* T_FNULL routine */
142 142 fast_null, /* T_FGETFP routine (initially null) */
143 143 fast_null, /* T_FSETFP routine (initially null) */
144 144 (void (*)())get_hrtime, /* T_GETHRTIME */
145 145 (void (*)())gethrvtime, /* T_GETHRVTIME */
146 146 (void (*)())get_hrestime, /* T_GETHRESTIME */
147 147 (void (*)())getlgrp /* T_GETLGRP */
148 148 };
149 149
150 150 /*
151 151 * Structure containing pre-computed descriptors to allow us to temporarily
152 152 * interpose on a standard handler.
153 153 */
↓ open down ↓ |
153 lines elided |
↑ open up ↑ |
154 154 struct interposing_handler {
155 155 int ih_inum;
156 156 gate_desc_t ih_interp_desc;
157 157 gate_desc_t ih_default_desc;
158 158 };
159 159
160 160 /*
161 161 * The brand infrastructure interposes on two handlers, and we use one as a
162 162 * NULL signpost.
163 163 */
164 -static struct interposing_handler brand_tbl[2];
164 +static struct interposing_handler brand_tbl[3];
165 165
166 166 /*
167 167 * software prototypes for default local descriptor table
168 168 */
169 169
170 170 /*
171 171 * Routines for loading segment descriptors in format the hardware
172 172 * can understand.
173 173 */
174 174
175 175 #if defined(__amd64)
176 176
177 177 /*
178 178 * In long mode we have the new L or long mode attribute bit
179 179 * for code segments. Only the conforming bit in type is used along
180 180 * with descriptor priority and present bits. Default operand size must
181 181 * be zero when in long mode. In 32-bit compatibility mode all fields
182 182 * are treated as in legacy mode. For data segments while in long mode
183 183 * only the present bit is loaded.
184 184 */
185 185 void
186 186 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
187 187 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
188 188 {
189 189 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
190 190
191 191 /*
192 192 * 64-bit long mode.
193 193 */
194 194 if (lmode == SDP_LONG)
195 195 dp->usd_def32 = 0; /* 32-bit operands only */
196 196 else
197 197 /*
198 198 * 32-bit compatibility mode.
199 199 */
200 200 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
201 201
202 202 dp->usd_long = lmode; /* 64-bit mode */
203 203 dp->usd_type = type;
204 204 dp->usd_dpl = dpl;
205 205 dp->usd_p = 1;
206 206 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
207 207
208 208 dp->usd_lobase = (uintptr_t)base;
209 209 dp->usd_midbase = (uintptr_t)base >> 16;
210 210 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
211 211 dp->usd_lolimit = size;
212 212 dp->usd_hilimit = (uintptr_t)size >> 16;
213 213 }
214 214
215 215 #elif defined(__i386)
216 216
217 217 /*
218 218 * Install user segment descriptor for code and data.
219 219 */
220 220 void
221 221 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
222 222 uint_t dpl, uint_t gran, uint_t defopsz)
223 223 {
224 224 dp->usd_lolimit = size;
225 225 dp->usd_hilimit = (uintptr_t)size >> 16;
226 226
227 227 dp->usd_lobase = (uintptr_t)base;
228 228 dp->usd_midbase = (uintptr_t)base >> 16;
229 229 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
230 230
231 231 dp->usd_type = type;
232 232 dp->usd_dpl = dpl;
233 233 dp->usd_p = 1;
234 234 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */
235 235 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
236 236 }
237 237
238 238 #endif /* __i386 */
239 239
240 240 /*
241 241 * Install system segment descriptor for LDT and TSS segments.
242 242 */
243 243
244 244 #if defined(__amd64)
245 245
246 246 void
247 247 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
248 248 uint_t dpl)
249 249 {
250 250 dp->ssd_lolimit = size;
251 251 dp->ssd_hilimit = (uintptr_t)size >> 16;
252 252
253 253 dp->ssd_lobase = (uintptr_t)base;
254 254 dp->ssd_midbase = (uintptr_t)base >> 16;
255 255 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
256 256 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
257 257
258 258 dp->ssd_type = type;
259 259 dp->ssd_zero1 = 0; /* must be zero */
260 260 dp->ssd_zero2 = 0;
261 261 dp->ssd_dpl = dpl;
262 262 dp->ssd_p = 1;
263 263 dp->ssd_gran = 0; /* force byte units */
264 264 }
265 265
266 266 void *
267 267 get_ssd_base(system_desc_t *dp)
268 268 {
269 269 uintptr_t base;
270 270
271 271 base = (uintptr_t)dp->ssd_lobase |
272 272 (uintptr_t)dp->ssd_midbase << 16 |
273 273 (uintptr_t)dp->ssd_hibase << (16 + 8) |
274 274 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
275 275 return ((void *)base);
276 276 }
277 277
278 278 #elif defined(__i386)
279 279
280 280 void
281 281 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
282 282 uint_t dpl)
283 283 {
284 284 dp->ssd_lolimit = size;
285 285 dp->ssd_hilimit = (uintptr_t)size >> 16;
286 286
287 287 dp->ssd_lobase = (uintptr_t)base;
288 288 dp->ssd_midbase = (uintptr_t)base >> 16;
289 289 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
290 290
291 291 dp->ssd_type = type;
292 292 dp->ssd_zero = 0; /* must be zero */
293 293 dp->ssd_dpl = dpl;
294 294 dp->ssd_p = 1;
295 295 dp->ssd_gran = 0; /* force byte units */
296 296 }
297 297
298 298 void *
299 299 get_ssd_base(system_desc_t *dp)
300 300 {
301 301 uintptr_t base;
302 302
303 303 base = (uintptr_t)dp->ssd_lobase |
304 304 (uintptr_t)dp->ssd_midbase << 16 |
305 305 (uintptr_t)dp->ssd_hibase << (16 + 8);
306 306 return ((void *)base);
307 307 }
308 308
309 309 #endif /* __i386 */
310 310
311 311 /*
312 312 * Install gate segment descriptor for interrupt, trap, call and task gates.
313 313 */
314 314
315 315 #if defined(__amd64)
316 316
317 317 /*ARGSUSED*/
318 318 void
319 319 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
320 320 uint_t type, uint_t dpl, uint_t vector)
321 321 {
322 322 dp->sgd_looffset = (uintptr_t)func;
323 323 dp->sgd_hioffset = (uintptr_t)func >> 16;
324 324 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
325 325
326 326 dp->sgd_selector = (uint16_t)sel;
327 327
328 328 /*
329 329 * For 64 bit native we use the IST stack mechanism
330 330 * for double faults. All other traps use the CPL = 0
331 331 * (tss_rsp0) stack.
332 332 */
333 333 #if !defined(__xpv)
334 334 if (vector == T_DBLFLT)
335 335 dp->sgd_ist = 1;
336 336 else
337 337 #endif
338 338 dp->sgd_ist = 0;
339 339
340 340 dp->sgd_type = type;
341 341 dp->sgd_dpl = dpl;
342 342 dp->sgd_p = 1;
343 343 }
344 344
345 345 #elif defined(__i386)
346 346
347 347 /*ARGSUSED*/
348 348 void
349 349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350 350 uint_t type, uint_t dpl, uint_t unused)
351 351 {
352 352 dp->sgd_looffset = (uintptr_t)func;
353 353 dp->sgd_hioffset = (uintptr_t)func >> 16;
354 354
355 355 dp->sgd_selector = (uint16_t)sel;
356 356 dp->sgd_stkcpy = 0; /* always zero bytes */
357 357 dp->sgd_type = type;
358 358 dp->sgd_dpl = dpl;
359 359 dp->sgd_p = 1;
360 360 }
361 361
362 362 #endif /* __i386 */
363 363
364 364 /*
365 365 * Updates a single user descriptor in the the GDT of the current cpu.
366 366 * Caller is responsible for preventing cpu migration.
367 367 */
368 368
369 369 void
370 370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 371 {
372 372 #if defined(__xpv)
373 373
374 374 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375 375
376 376 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 377 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378 378
379 379 #else /* __xpv */
380 380
381 381 CPU->cpu_gdt[sidx] = *udp;
382 382
383 383 #endif /* __xpv */
384 384 }
385 385
386 386 /*
387 387 * Writes single descriptor pointed to by udp into a processes
388 388 * LDT entry pointed to by ldp.
389 389 */
390 390 int
391 391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392 392 {
393 393 #if defined(__xpv)
394 394
395 395 uint64_t dpa;
396 396
397 397 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398 398 ((uintptr_t)ldp & PAGEOFFSET);
399 399
400 400 /*
401 401 * The hypervisor is a little more restrictive about what it
402 402 * supports in the LDT.
403 403 */
404 404 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405 405 return (EINVAL);
406 406
407 407 #else /* __xpv */
408 408
409 409 *ldp = *udp;
410 410
411 411 #endif /* __xpv */
412 412 return (0);
413 413 }
414 414
415 415 #if defined(__xpv)
416 416
417 417 /*
418 418 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419 419 * Returns true if a valid entry was written.
420 420 */
421 421 int
422 422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423 423 {
424 424 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
425 425
426 426 /*
427 427 * skip holes in the IDT
428 428 */
429 429 if (GATESEG_GETOFFSET(sgd) == 0)
430 430 return (0);
431 431
432 432 ASSERT(sgd->sgd_type == SDT_SYSIGT);
433 433 ti->vector = vec;
434 434 TI_SET_DPL(ti, sgd->sgd_dpl);
435 435
436 436 /*
437 437 * Is this an interrupt gate?
438 438 */
439 439 if (sgd->sgd_type == SDT_SYSIGT) {
440 440 /* LINTED */
441 441 TI_SET_IF(ti, 1);
442 442 }
443 443 ti->cs = sgd->sgd_selector;
444 444 #if defined(__amd64)
445 445 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
446 446 #endif
447 447 ti->address = GATESEG_GETOFFSET(sgd);
448 448 return (1);
449 449 }
450 450
451 451 /*
452 452 * Convert a single hw format gate descriptor and write it into our virtual IDT.
453 453 */
454 454 void
455 455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
456 456 {
457 457 trap_info_t trapinfo[2];
458 458
459 459 bzero(trapinfo, sizeof (trapinfo));
460 460 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461 461 return;
462 462 if (xen_set_trap_table(trapinfo) != 0)
463 463 panic("xen_idt_write: xen_set_trap_table() failed");
464 464 }
465 465
466 466 #endif /* __xpv */
467 467
468 468 #if defined(__amd64)
469 469
470 470 /*
471 471 * Build kernel GDT.
472 472 */
473 473
474 474 static void
475 475 init_gdt_common(user_desc_t *gdt)
476 476 {
477 477 int i;
478 478
479 479 /*
480 480 * 64-bit kernel code segment.
481 481 */
482 482 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
483 483 SDP_PAGES, SDP_OP32);
484 484
485 485 /*
486 486 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
487 487 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
488 488 * instruction to return from system calls back to 32-bit applications.
489 489 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
490 490 * descriptors. We therefore must ensure that the kernel uses something,
491 491 * though it will be ignored by hardware, that is compatible with 32-bit
492 492 * apps. For the same reason we must set the default op size of this
493 493 * descriptor to 32-bit operands.
494 494 */
495 495 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
496 496 SEL_KPL, SDP_PAGES, SDP_OP32);
497 497 gdt[GDT_KDATA].usd_def32 = 1;
498 498
499 499 /*
500 500 * 64-bit user code segment.
501 501 */
502 502 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
503 503 SDP_PAGES, SDP_OP32);
504 504
505 505 /*
506 506 * 32-bit user code segment.
507 507 */
508 508 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
509 509 SEL_UPL, SDP_PAGES, SDP_OP32);
510 510
511 511 /*
512 512 * See gdt_ucode32() and gdt_ucode_native().
513 513 */
514 514 ucs_on = ucs_off = gdt[GDT_UCODE];
515 515 ucs_off.usd_p = 0; /* forces #np fault */
516 516
517 517 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518 518 ucs32_off.usd_p = 0; /* forces #np fault */
519 519
520 520 /*
521 521 * 32 and 64 bit data segments can actually share the same descriptor.
522 522 * In long mode only the present bit is checked but all other fields
523 523 * are loaded. But in compatibility mode all fields are interpreted
524 524 * as in legacy mode so they must be set correctly for a 32-bit data
525 525 * segment.
526 526 */
527 527 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
528 528 SDP_PAGES, SDP_OP32);
529 529
530 530 #if !defined(__xpv)
531 531
532 532 /*
533 533 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
534 534 * in the GDT is 0.
535 535 */
536 536
537 537 /*
538 538 * Kernel TSS
539 539 */
540 540 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
541 541 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
542 542
543 543 #endif /* !__xpv */
544 544
545 545 /*
546 546 * Initialize fs and gs descriptors for 32 bit processes.
547 547 * Only attributes and limits are initialized, the effective
548 548 * base address is programmed via fsbase/gsbase.
549 549 */
550 550 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
551 551 SEL_UPL, SDP_PAGES, SDP_OP32);
552 552 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
553 553 SEL_UPL, SDP_PAGES, SDP_OP32);
554 554
555 555 /*
556 556 * Initialize the descriptors set aside for brand usage.
557 557 * Only attributes and limits are initialized.
558 558 */
559 559 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
560 560 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
561 561 SEL_UPL, SDP_PAGES, SDP_OP32);
562 562
563 563 /*
564 564 * Initialize convenient zero base user descriptors for clearing
565 565 * lwp private %fs and %gs descriptors in GDT. See setregs() for
566 566 * an example.
567 567 */
568 568 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
569 569 SDP_BYTES, SDP_OP32);
570 570 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
571 571 SDP_PAGES, SDP_OP32);
572 572 }
573 573
574 574 #if defined(__xpv)
575 575
576 576 static user_desc_t *
577 577 init_gdt(void)
578 578 {
579 579 uint64_t gdtpa;
580 580 ulong_t ma[1]; /* XXPV should be a memory_t */
581 581 ulong_t addr;
582 582
583 583 #if !defined(__lint)
584 584 /*
585 585 * Our gdt is never larger than a single page.
586 586 */
587 587 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588 588 #endif
589 589 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590 590 PAGESIZE, PAGESIZE);
591 591 bzero(gdt0, PAGESIZE);
592 592
593 593 init_gdt_common(gdt0);
594 594
595 595 /*
596 596 * XXX Since we never invoke kmdb until after the kernel takes
597 597 * over the descriptor tables why not have it use the kernel's
598 598 * selectors?
599 599 */
600 600 if (boothowto & RB_DEBUG) {
601 601 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
602 602 SEL_KPL, SDP_PAGES, SDP_OP32);
603 603 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
604 604 SEL_KPL, SDP_PAGES, SDP_OP32);
605 605 }
606 606
607 607 /*
608 608 * Clear write permission for page containing the gdt and install it.
609 609 */
610 610 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
611 611 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
612 612 kbm_read_only((uintptr_t)gdt0, gdtpa);
613 613 xen_set_gdt(ma, NGDT);
614 614
615 615 /*
616 616 * Reload the segment registers to use the new GDT.
617 617 * On 64-bit, fixup KCS_SEL to be in ring 3.
618 618 * See KCS_SEL in segments.h.
619 619 */
620 620 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
621 621
622 622 /*
623 623 * setup %gs for kernel
624 624 */
625 625 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
626 626
627 627 /*
628 628 * XX64 We should never dereference off "other gsbase" or
629 629 * "fsbase". So, we should arrange to point FSBASE and
630 630 * KGSBASE somewhere truly awful e.g. point it at the last
631 631 * valid address below the hole so that any attempts to index
632 632 * off them cause an exception.
633 633 *
634 634 * For now, point it at 8G -- at least it should be unmapped
635 635 * until some 64-bit processes run.
636 636 */
637 637 addr = 0x200000000ul;
638 638 xen_set_segment_base(SEGBASE_FS, addr);
639 639 xen_set_segment_base(SEGBASE_GS_USER, addr);
640 640 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
641 641
642 642 return (gdt0);
643 643 }
644 644
645 645 #else /* __xpv */
646 646
647 647 static user_desc_t *
648 648 init_gdt(void)
649 649 {
650 650 desctbr_t r_bgdt, r_gdt;
651 651 user_desc_t *bgdt;
652 652
653 653 #if !defined(__lint)
654 654 /*
655 655 * Our gdt is never larger than a single page.
656 656 */
657 657 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
658 658 #endif
659 659 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
660 660 PAGESIZE, PAGESIZE);
661 661 bzero(gdt0, PAGESIZE);
662 662
663 663 init_gdt_common(gdt0);
664 664
665 665 /*
666 666 * Copy in from boot's gdt to our gdt.
667 667 * Entry 0 is the null descriptor by definition.
668 668 */
669 669 rd_gdtr(&r_bgdt);
670 670 bgdt = (user_desc_t *)r_bgdt.dtr_base;
671 671 if (bgdt == NULL)
672 672 panic("null boot gdt");
673 673
674 674 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
675 675 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
676 676 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
677 677 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
678 678 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
679 679
680 680 /*
681 681 * Install our new GDT
682 682 */
683 683 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
684 684 r_gdt.dtr_base = (uintptr_t)gdt0;
685 685 wr_gdtr(&r_gdt);
686 686
687 687 /*
688 688 * Reload the segment registers to use the new GDT
689 689 */
690 690 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
691 691
692 692 /*
693 693 * setup %gs for kernel
694 694 */
695 695 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
696 696
697 697 /*
698 698 * XX64 We should never dereference off "other gsbase" or
699 699 * "fsbase". So, we should arrange to point FSBASE and
700 700 * KGSBASE somewhere truly awful e.g. point it at the last
701 701 * valid address below the hole so that any attempts to index
702 702 * off them cause an exception.
703 703 *
704 704 * For now, point it at 8G -- at least it should be unmapped
705 705 * until some 64-bit processes run.
706 706 */
707 707 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
708 708 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
709 709 return (gdt0);
710 710 }
711 711
712 712 #endif /* __xpv */
713 713
714 714 #elif defined(__i386)
715 715
716 716 static void
717 717 init_gdt_common(user_desc_t *gdt)
718 718 {
719 719 int i;
720 720
721 721 /*
722 722 * Text and data for both kernel and user span entire 32 bit
723 723 * address space.
724 724 */
725 725
726 726 /*
727 727 * kernel code segment.
728 728 */
729 729 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
730 730 SDP_OP32);
731 731
732 732 /*
733 733 * kernel data segment.
734 734 */
735 735 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
736 736 SDP_OP32);
737 737
738 738 /*
739 739 * user code segment.
740 740 */
741 741 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
742 742 SDP_OP32);
743 743
744 744 /*
745 745 * user data segment.
746 746 */
747 747 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
748 748 SDP_OP32);
749 749
750 750 #if !defined(__xpv)
751 751
752 752 /*
753 753 * TSS for T_DBLFLT (double fault) handler
754 754 */
755 755 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
756 756 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
757 757
758 758 /*
759 759 * TSS for kernel
760 760 */
761 761 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
762 762 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
763 763
764 764 #endif /* !__xpv */
765 765
766 766 /*
767 767 * %gs selector for kernel
768 768 */
769 769 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
770 770 SEL_KPL, SDP_BYTES, SDP_OP32);
771 771
772 772 /*
773 773 * Initialize lwp private descriptors.
774 774 * Only attributes and limits are initialized, the effective
775 775 * base address is programmed via fsbase/gsbase.
776 776 */
777 777 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
778 778 SDP_PAGES, SDP_OP32);
779 779 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
780 780 SDP_PAGES, SDP_OP32);
781 781
782 782 /*
783 783 * Initialize the descriptors set aside for brand usage.
784 784 * Only attributes and limits are initialized.
785 785 */
786 786 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
787 787 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
788 788 SDP_PAGES, SDP_OP32);
789 789 /*
790 790 * Initialize convenient zero base user descriptor for clearing
791 791 * lwp private %fs and %gs descriptors in GDT. See setregs() for
792 792 * an example.
793 793 */
794 794 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
795 795 SDP_BYTES, SDP_OP32);
796 796 }
797 797
798 798 #if defined(__xpv)
799 799
800 800 static user_desc_t *
801 801 init_gdt(void)
802 802 {
803 803 uint64_t gdtpa;
804 804 ulong_t ma[1]; /* XXPV should be a memory_t */
805 805
806 806 #if !defined(__lint)
807 807 /*
808 808 * Our gdt is never larger than a single page.
809 809 */
810 810 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
811 811 #endif
812 812 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
813 813 PAGESIZE, PAGESIZE);
814 814 bzero(gdt0, PAGESIZE);
815 815
816 816 init_gdt_common(gdt0);
817 817 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
818 818
819 819 /*
820 820 * XXX Since we never invoke kmdb until after the kernel takes
821 821 * over the descriptor tables why not have it use the kernel's
822 822 * selectors?
823 823 */
824 824 if (boothowto & RB_DEBUG) {
825 825 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
826 826 SDP_PAGES, SDP_OP32);
827 827 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
828 828 SDP_PAGES, SDP_OP32);
829 829 }
830 830
831 831 /*
832 832 * Clear write permission for page containing the gdt and install it.
833 833 */
834 834 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
835 835 kbm_read_only((uintptr_t)gdt0, gdtpa);
836 836 xen_set_gdt(ma, NGDT);
837 837
838 838 /*
839 839 * Reload the segment registers to use the new GDT
840 840 */
841 841 load_segment_registers(
842 842 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
843 843
844 844 return (gdt0);
845 845 }
846 846
847 847 #else /* __xpv */
848 848
849 849 static user_desc_t *
850 850 init_gdt(void)
851 851 {
852 852 desctbr_t r_bgdt, r_gdt;
853 853 user_desc_t *bgdt;
854 854
855 855 #if !defined(__lint)
856 856 /*
857 857 * Our gdt is never larger than a single page.
858 858 */
859 859 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
860 860 #endif
861 861 /*
862 862 * XXX this allocation belongs in our caller, not here.
863 863 */
864 864 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
865 865 PAGESIZE, PAGESIZE);
866 866 bzero(gdt0, PAGESIZE);
867 867
868 868 init_gdt_common(gdt0);
869 869
870 870 /*
871 871 * Copy in from boot's gdt to our gdt entries.
872 872 * Entry 0 is null descriptor by definition.
873 873 */
874 874 rd_gdtr(&r_bgdt);
875 875 bgdt = (user_desc_t *)r_bgdt.dtr_base;
876 876 if (bgdt == NULL)
877 877 panic("null boot gdt");
878 878
879 879 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
880 880 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
881 881 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
882 882 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
883 883
884 884 /*
885 885 * Install our new GDT
886 886 */
887 887 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
888 888 r_gdt.dtr_base = (uintptr_t)gdt0;
889 889 wr_gdtr(&r_gdt);
890 890
891 891 /*
892 892 * Reload the segment registers to use the new GDT
893 893 */
894 894 load_segment_registers(
895 895 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
896 896
897 897 return (gdt0);
898 898 }
899 899
900 900 #endif /* __xpv */
901 901 #endif /* __i386 */
902 902
903 903 /*
904 904 * Build kernel IDT.
905 905 *
906 906 * Note that for amd64 we pretty much require every gate to be an interrupt
907 907 * gate which blocks interrupts atomically on entry; that's because of our
908 908 * dependency on using 'swapgs' every time we come into the kernel to find
909 909 * the cpu structure. If we get interrupted just before doing that, %cs could
910 910 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
911 911 * %gsbase is really still pointing at something in userland. Bad things will
912 912 * ensue. We also use interrupt gates for i386 as well even though this is not
913 913 * required for some traps.
914 914 *
915 915 * Perhaps they should have invented a trap gate that does an atomic swapgs?
916 916 */
917 917 static void
918 918 init_idt_common(gate_desc_t *idt)
919 919 {
920 920 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 921 0);
922 922 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
923 923 0);
924 924 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
925 925 0);
926 926 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
927 927 0);
928 928 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
929 929 0);
930 930 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
931 931 TRP_KPL, 0);
932 932 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
933 933 0);
934 934 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
935 935 0);
936 936
937 937 /*
938 938 * double fault handler.
939 939 *
940 940 * Note that on the hypervisor a guest does not receive #df faults.
941 941 * Instead a failsafe event is injected into the guest if its selectors
942 942 * and/or stack is in a broken state. See xen_failsafe_callback.
943 943 */
944 944 #if !defined(__xpv)
945 945 #if defined(__amd64)
946 946
947 947 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
948 948 T_DBLFLT);
949 949
950 950 #elif defined(__i386)
951 951
952 952 /*
953 953 * task gate required.
954 954 */
955 955 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
956 956 0);
957 957
958 958 #endif /* __i386 */
959 959 #endif /* !__xpv */
960 960
961 961 /*
962 962 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
963 963 */
964 964
965 965 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
966 966 0);
967 967 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
968 968 0);
↓ open down ↓ |
794 lines elided |
↑ open up ↑ |
969 969 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
970 970 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
971 971 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
972 972 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
973 973 0);
974 974 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
975 975 TRP_KPL, 0);
976 976 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
977 977 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
978 978
979 + /*
980 + * install "int80" handler at, well, 0x80.
981 + */
982 + set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 + 0);
984 +
979 985 /*
980 986 * install fast trap handler at 210.
981 987 */
982 988 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
983 989 0);
984 990
985 991 /*
986 992 * System call handler.
987 993 */
988 994 #if defined(__amd64)
989 995 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990 996 TRP_UPL, 0);
991 997
992 998 #elif defined(__i386)
993 999 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
994 1000 TRP_UPL, 0);
995 1001 #endif /* __i386 */
996 1002
997 1003 /*
998 1004 * Install the DTrace interrupt handler for the pid provider.
999 1005 */
1000 1006 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 1007 SDT_SYSIGT, TRP_UPL, 0);
1002 1008
1003 1009 /*
1004 - * Prepare interposing descriptor for the syscall handler
1005 - * and cache copy of the default descriptor.
1006 - */
1007 - brand_tbl[0].ih_inum = T_SYSCALLINT;
1008 - brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1010 +- * Prepare interposing descriptors for the branded "int80"
1011 +- * and syscall handlers and cache copies of the default
1012 +- * descriptors.
1013 + */
1014 + brand_tbl[0].ih_inum = T_INT80;
1015 + brand_tbl[0].ih_default_desc = idt0[T_INT80];
1016 + set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1017 + SDT_SYSIGT, TRP_UPL, 0);
1018 +
1019 + brand_tbl[1].ih_inum = T_SYSCALLINT;
1020 + brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
1009 1021
1010 1022 #if defined(__amd64)
1011 - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1023 + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1012 1024 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 1025 #elif defined(__i386)
1014 - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1026 + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1015 1027 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 1028 #endif /* __i386 */
1017 1029
1018 - brand_tbl[1].ih_inum = 0;
1030 + brand_tbl[2].ih_inum = 0;
1019 1031 }
1020 1032
1021 1033 #if defined(__xpv)
1022 1034
1023 1035 static void
1024 1036 init_idt(gate_desc_t *idt)
1025 1037 {
1026 1038 init_idt_common(idt);
1027 1039 }
1028 1040
1029 1041 #else /* __xpv */
1030 1042
1031 1043 static void
1032 1044 init_idt(gate_desc_t *idt)
1033 1045 {
1034 1046 char ivctname[80];
1035 1047 void (*ivctptr)(void);
1036 1048 int i;
1037 1049
1038 1050 /*
1039 1051 * Initialize entire table with 'reserved' trap and then overwrite
1040 1052 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041 1053 * since it can only be generated on a 386 processor. 15 is also
1042 1054 * unsupported and reserved.
1043 1055 */
1044 1056 for (i = 0; i < NIDT; i++)
1045 1057 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046 1058 0);
1047 1059
1048 1060 /*
1049 1061 * 20-31 reserved
1050 1062 */
1051 1063 for (i = 20; i < 32; i++)
1052 1064 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053 1065 0);
1054 1066
1055 1067 /*
1056 1068 * interrupts 32 - 255
1057 1069 */
1058 1070 for (i = 32; i < 256; i++) {
1059 1071 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1060 1072 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061 1073 if (ivctptr == NULL)
1062 1074 panic("kobj_getsymvalue(%s) failed", ivctname);
1063 1075
1064 1076 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1065 1077 }
1066 1078
1067 1079 /*
1068 1080 * Now install the common ones. Note that it will overlay some
1069 1081 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070 1082 */
1071 1083 init_idt_common(idt);
1072 1084 }
1073 1085
1074 1086 #endif /* __xpv */
1075 1087
1076 1088 /*
1077 1089 * The kernel does not deal with LDTs unless a user explicitly creates
1078 1090 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079 1091 * to reference the LDT will therefore cause a #gp. System calls made via the
1080 1092 * obsolete lcall mechanism are emulated by the #gp fault handler.
1081 1093 */
1082 1094 static void
1083 1095 init_ldt(void)
1084 1096 {
1085 1097 #if defined(__xpv)
1086 1098 xen_set_ldt(NULL, 0);
1087 1099 #else
1088 1100 wr_ldtr(0);
1089 1101 #endif
1090 1102 }
1091 1103
1092 1104 #if !defined(__xpv)
1093 1105 #if defined(__amd64)
1094 1106
1095 1107 static void
1096 1108 init_tss(void)
1097 1109 {
1098 1110 /*
1099 1111 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100 1112 * All exceptions but #DF will run on the thread stack.
1101 1113 * Set up the double fault stack here.
1102 1114 */
1103 1115 ktss0->tss_ist1 =
1104 1116 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105 1117
1106 1118 /*
1107 1119 * Set I/O bit map offset equal to size of TSS segment limit
1108 1120 * for no I/O permission map. This will force all user I/O
1109 1121 * instructions to generate #gp fault.
1110 1122 */
1111 1123 ktss0->tss_bitmapbase = sizeof (*ktss0);
1112 1124
1113 1125 /*
1114 1126 * Point %tr to descriptor for ktss0 in gdt.
1115 1127 */
1116 1128 wr_tsr(KTSS_SEL);
1117 1129 }
1118 1130
1119 1131 #elif defined(__i386)
1120 1132
1121 1133 static void
1122 1134 init_tss(void)
1123 1135 {
1124 1136 /*
1125 1137 * ktss0->tss_esp dynamically filled in by resume() on each
1126 1138 * context switch.
1127 1139 */
1128 1140 ktss0->tss_ss0 = KDS_SEL;
1129 1141 ktss0->tss_eip = (uint32_t)_start;
1130 1142 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131 1143 ktss0->tss_cs = KCS_SEL;
1132 1144 ktss0->tss_fs = KFS_SEL;
1133 1145 ktss0->tss_gs = KGS_SEL;
1134 1146 ktss0->tss_ldt = ULDT_SEL;
1135 1147
1136 1148 /*
1137 1149 * Initialize double fault tss.
1138 1150 */
1139 1151 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140 1152 dftss0->tss_ss0 = KDS_SEL;
1141 1153
1142 1154 /*
1143 1155 * tss_cr3 will get initialized in hat_kern_setup() once our page
1144 1156 * tables have been setup.
1145 1157 */
1146 1158 dftss0->tss_eip = (uint32_t)syserrtrap;
1147 1159 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148 1160 dftss0->tss_cs = KCS_SEL;
1149 1161 dftss0->tss_ds = KDS_SEL;
1150 1162 dftss0->tss_es = KDS_SEL;
1151 1163 dftss0->tss_ss = KDS_SEL;
1152 1164 dftss0->tss_fs = KFS_SEL;
1153 1165 dftss0->tss_gs = KGS_SEL;
1154 1166
1155 1167 /*
1156 1168 * Set I/O bit map offset equal to size of TSS segment limit
1157 1169 * for no I/O permission map. This will force all user I/O
1158 1170 * instructions to generate #gp fault.
1159 1171 */
1160 1172 ktss0->tss_bitmapbase = sizeof (*ktss0);
1161 1173
1162 1174 /*
1163 1175 * Point %tr to descriptor for ktss0 in gdt.
1164 1176 */
1165 1177 wr_tsr(KTSS_SEL);
1166 1178 }
1167 1179
1168 1180 #endif /* __i386 */
1169 1181 #endif /* !__xpv */
1170 1182
1171 1183 #if defined(__xpv)
1172 1184
1173 1185 void
1174 1186 init_desctbls(void)
1175 1187 {
1176 1188 uint_t vec;
1177 1189 user_desc_t *gdt;
1178 1190
1179 1191 /*
1180 1192 * Setup and install our GDT.
1181 1193 */
1182 1194 gdt = init_gdt();
1183 1195
1184 1196 /*
1185 1197 * Store static pa of gdt to speed up pa_to_ma() translations
1186 1198 * on lwp context switches.
1187 1199 */
1188 1200 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1189 1201 CPU->cpu_gdt = gdt;
1190 1202 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1191 1203
1192 1204 /*
1193 1205 * Setup and install our IDT.
1194 1206 */
1195 1207 #if !defined(__lint)
1196 1208 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1197 1209 #endif
1198 1210 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1199 1211 PAGESIZE, PAGESIZE);
1200 1212 bzero(idt0, PAGESIZE);
1201 1213 init_idt(idt0);
1202 1214 for (vec = 0; vec < NIDT; vec++)
1203 1215 xen_idt_write(&idt0[vec], vec);
1204 1216
1205 1217 CPU->cpu_idt = idt0;
1206 1218
1207 1219 /*
1208 1220 * set default kernel stack
1209 1221 */
1210 1222 xen_stack_switch(KDS_SEL,
1211 1223 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1212 1224
1213 1225 xen_init_callbacks();
1214 1226
1215 1227 init_ldt();
1216 1228 }
1217 1229
1218 1230 #else /* __xpv */
1219 1231
1220 1232 void
1221 1233 init_desctbls(void)
1222 1234 {
1223 1235 user_desc_t *gdt;
1224 1236 desctbr_t idtr;
1225 1237
1226 1238 /*
1227 1239 * Allocate IDT and TSS structures on unique pages for better
1228 1240 * performance in virtual machines.
1229 1241 */
1230 1242 #if !defined(__lint)
1231 1243 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1232 1244 #endif
1233 1245 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1234 1246 PAGESIZE, PAGESIZE);
1235 1247 bzero(idt0, PAGESIZE);
1236 1248 #if !defined(__lint)
1237 1249 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1238 1250 #endif
1239 1251 ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1240 1252 PAGESIZE, PAGESIZE);
1241 1253 bzero(ktss0, PAGESIZE);
1242 1254
1243 1255 #if defined(__i386)
1244 1256 #if !defined(__lint)
1245 1257 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 1258 #endif
1247 1259 dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248 1260 PAGESIZE, PAGESIZE);
1249 1261 bzero(dftss0, PAGESIZE);
1250 1262 #endif
1251 1263
1252 1264 /*
1253 1265 * Setup and install our GDT.
1254 1266 */
1255 1267 gdt = init_gdt();
1256 1268 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257 1269 CPU->cpu_gdt = gdt;
1258 1270
1259 1271 /*
1260 1272 * Setup and install our IDT.
1261 1273 */
1262 1274 init_idt(idt0);
1263 1275
1264 1276 idtr.dtr_base = (uintptr_t)idt0;
1265 1277 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266 1278 wr_idtr(&idtr);
1267 1279 CPU->cpu_idt = idt0;
1268 1280
1269 1281 #if defined(__i386)
1270 1282 /*
1271 1283 * We maintain a description of idt0 in convenient IDTR format
1272 1284 * for #pf's on some older pentium processors. See pentium_pftrap().
1273 1285 */
1274 1286 idt0_default_r = idtr;
1275 1287 #endif /* __i386 */
1276 1288
1277 1289 init_tss();
1278 1290 CPU->cpu_tss = ktss0;
1279 1291 init_ldt();
1280 1292 }
1281 1293
1282 1294 #endif /* __xpv */
1283 1295
1284 1296 /*
1285 1297 * In the early kernel, we need to set up a simple GDT to run on.
1286 1298 *
1287 1299 * XXPV Can dboot use this too? See dboot_gdt.s
1288 1300 */
1289 1301 void
1290 1302 init_boot_gdt(user_desc_t *bgdt)
1291 1303 {
1292 1304 #if defined(__amd64)
1293 1305 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294 1306 SDP_PAGES, SDP_OP32);
1295 1307 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296 1308 SDP_PAGES, SDP_OP32);
1297 1309 #elif defined(__i386)
1298 1310 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299 1311 SDP_PAGES, SDP_OP32);
1300 1312 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1301 1313 SDP_PAGES, SDP_OP32);
1302 1314 #endif /* __i386 */
1303 1315 }
1304 1316
1305 1317 /*
1306 1318 * Enable interpositioning on the system call path by rewriting the
1307 1319 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1308 1320 * the branded entry points.
1309 1321 */
1310 1322 void
1311 1323 brand_interpositioning_enable(void)
1312 1324 {
1313 1325 gate_desc_t *idt = CPU->cpu_idt;
1314 1326 int i;
1315 1327
1316 1328 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1317 1329
1318 1330 for (i = 0; brand_tbl[i].ih_inum; i++) {
1319 1331 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1320 1332 #if defined(__xpv)
1321 1333 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322 1334 brand_tbl[i].ih_inum);
1323 1335 #endif
1324 1336 }
1325 1337
1326 1338 #if defined(__amd64)
1327 1339 #if defined(__xpv)
1328 1340
1329 1341 /*
1330 1342 * Currently the hypervisor only supports 64-bit syscalls via
1331 1343 * syscall instruction. The 32-bit syscalls are handled by
1332 1344 * interrupt gate above.
1333 1345 */
1334 1346 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335 1347 CALLBACKF_mask_events);
1336 1348
1337 1349 #else
1338 1350
1339 1351 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1340 1352 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341 1353 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342 1354 }
1343 1355
1344 1356 #endif
1345 1357 #endif /* __amd64 */
1346 1358
1347 1359 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1348 1360 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1349 1361 }
1350 1362
1351 1363 /*
1352 1364 * Disable interpositioning on the system call path by rewriting the
1353 1365 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354 1366 * the standard entry points, which bypass the interpositioning hooks.
1355 1367 */
1356 1368 void
1357 1369 brand_interpositioning_disable(void)
1358 1370 {
1359 1371 gate_desc_t *idt = CPU->cpu_idt;
1360 1372 int i;
1361 1373
1362 1374 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363 1375
1364 1376 for (i = 0; brand_tbl[i].ih_inum; i++) {
1365 1377 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 1378 #if defined(__xpv)
1367 1379 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368 1380 brand_tbl[i].ih_inum);
1369 1381 #endif
1370 1382 }
1371 1383
1372 1384 #if defined(__amd64)
1373 1385 #if defined(__xpv)
1374 1386
1375 1387 /*
1376 1388 * See comment above in brand_interpositioning_enable.
1377 1389 */
1378 1390 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379 1391 CALLBACKF_mask_events);
1380 1392
1381 1393 #else
1382 1394
1383 1395 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1384 1396 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385 1397 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386 1398 }
1387 1399
1388 1400 #endif
1389 1401 #endif /* __amd64 */
1390 1402
1391 1403 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1392 1404 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1393 1405 }
↓ open down ↓ |
365 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX