Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/ml/locore.s
+++ new/usr/src/uts/i86pc/ml/locore.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /*
26 - * Copyright (c) 2016, Joyent, Inc. All rights reserved.
26 + * Copyright (c) 2018 Joyent, Inc.
27 27 */
28 28
29 29 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
30 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
31 31 /* All Rights Reserved */
32 32
33 33 /* Copyright (c) 1987, 1988 Microsoft Corporation */
34 34 /* All Rights Reserved */
35 35
36 36
37 37 #include <sys/asm_linkage.h>
38 38 #include <sys/asm_misc.h>
39 39 #include <sys/regset.h>
40 40 #include <sys/privregs.h>
41 41 #include <sys/psw.h>
42 42 #include <sys/reboot.h>
43 43 #include <sys/x86_archext.h>
44 44 #include <sys/machparam.h>
45 45
46 46 #if defined(__lint)
47 47
48 48 #include <sys/types.h>
49 49 #include <sys/thread.h>
50 50 #include <sys/systm.h>
51 51 #include <sys/lgrp.h>
52 52 #include <sys/regset.h>
53 53 #include <sys/link.h>
54 54 #include <sys/bootconf.h>
55 55 #include <sys/bootsvcs.h>
56 56
57 57 #else /* __lint */
58 58
59 59 #include <sys/segments.h>
60 60 #include <sys/pcb.h>
61 61 #include <sys/trap.h>
62 62 #include <sys/ftrace.h>
63 63 #include <sys/traptrace.h>
64 64 #include <sys/clock.h>
65 65 #include <sys/cmn_err.h>
66 66 #include <sys/pit.h>
67 67 #include <sys/panic.h>
68 68
69 69 #if defined(__xpv)
70 70 #include <sys/hypervisor.h>
71 71 #endif
72 72
73 73 #include "assym.h"
74 74
75 75 /*
76 76 * Our assumptions:
77 77 * - We are running in protected-paged mode.
78 78 * - Interrupts are disabled.
79 79 * - The GDT and IDT are the callers; we need our copies.
80 80 * - The kernel's text, initialized data and bss are mapped.
81 81 *
82 82 * Our actions:
83 83 * - Save arguments
84 84 * - Initialize our stack pointer to the thread 0 stack (t0stack)
85 85 * and leave room for a phony "struct regs".
86 86 * - Our GDT and IDT need to get munged.
87 87 * - Since we are using the boot's GDT descriptors, we need
88 88 * to copy them into our GDT before we switch to ours.
89 89 * - We start using our GDT by loading correct values in the
90 90 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
91 91 * gs=KGS_SEL).
92 92 * - The default LDT entry for syscall is set.
93 93 * - We load the default LDT into the hardware LDT register.
94 94 * - We load the default TSS into the hardware task register.
95 95 * - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
96 96 * - mlsetup(%esp) gets called.
97 97 * - We change our appearance to look like the real thread 0.
98 98 * (NOTE: making ourselves to be a real thread may be a noop)
99 99 * - main() gets called. (NOTE: main() never returns).
100 100 *
101 101 * NOW, the real code!
102 102 */
103 103 /*
104 104 * The very first thing in the kernel's text segment must be a jump
105 105 * to the os/fakebop.c startup code.
106 106 */
107 107 .text
108 108 jmp _start
109 109
110 110 /*
111 111 * Globals:
112 112 */
113 113 .globl _locore_start
114 114 .globl mlsetup
115 115 .globl main
116 116 .globl panic
117 117 .globl t0stack
118 118 .globl t0
119 119 .globl sysp
120 120 .globl edata
121 121
122 122 /*
123 123 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
124 124 */
125 125 .globl bootops
126 126 .globl bootopsp
127 127
128 128 /*
129 129 * NOTE: t0stack should be the first thing in the data section so that
130 130 * if it ever overflows, it will fault on the last kernel text page.
131 131 */
132 132 .data
133 133 .comm t0stack, DEFAULTSTKSZ, 32
134 134 .comm t0, 4094, 32
135 135
136 136 #endif /* __lint */
137 137
138 138
139 139 #if defined(__amd64)
140 140
141 141 #if defined(__lint)
142 142
143 143 /* ARGSUSED */
144 144 void
145 145 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
146 146 {}
147 147
148 148 #else /* __lint */
149 149
150 150 /*
151 151 * kobj_init() vectors us back to here with (note) a slightly different
152 152 * set of arguments than _start is given (see lint prototypes above).
153 153 *
154 154 * XXX Make this less vile, please.
155 155 */
156 156 ENTRY_NP(_locore_start)
157 157
158 158 /*
159 159 * %rdi = boot services (should die someday)
160 160 * %rdx = bootops
161 161 * end
162 162 */
163 163
164 164 leaq edata(%rip), %rbp /* reference edata for ksyms */
165 165 movq $0, (%rbp) /* limit stack back trace */
166 166
167 167 /*
168 168 * Initialize our stack pointer to the thread 0 stack (t0stack)
169 169 * and leave room for a "struct regs" for lwp0. Note that the
170 170 * stack doesn't actually align to a 16-byte boundary until just
171 171 * before we call mlsetup because we want to use %rsp to point at
172 172 * our regs structure.
173 173 */
174 174 leaq t0stack(%rip), %rsp
175 175 addq $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
176 176 #if (REGSIZE & 15) == 0
177 177 subq $8, %rsp
178 178 #endif
179 179 /*
180 180 * Save call back for special x86 boot services vector
181 181 */
182 182 movq %rdi, sysp(%rip)
183 183
184 184 movq %rdx, bootops(%rip) /* save bootops */
185 185 movq $bootops, bootopsp(%rip)
186 186
187 187 /*
188 188 * Save arguments and flags, if only for debugging ..
189 189 */
190 190 movq %rdi, REGOFF_RDI(%rsp)
191 191 movq %rsi, REGOFF_RSI(%rsp)
192 192 movq %rdx, REGOFF_RDX(%rsp)
193 193 movq %rcx, REGOFF_RCX(%rsp)
194 194 movq %r8, REGOFF_R8(%rsp)
195 195 movq %r9, REGOFF_R9(%rsp)
196 196 pushf
197 197 popq %r11
198 198 movq %r11, REGOFF_RFL(%rsp)
199 199
200 200 #if !defined(__xpv)
201 201 /*
202 202 * Enable write protect and alignment check faults.
203 203 */
204 204 movq %cr0, %rax
205 205 orq $_CONST(CR0_WP|CR0_AM), %rax
206 206 andq $_BITNOT(CR0_WT|CR0_CE), %rax
207 207 movq %rax, %cr0
208 208 #endif /* __xpv */
209 209
210 210 /*
211 211 * (We just assert this works by virtue of being here)
212 212 */
213 213 bts $X86FSET_CPUID, x86_featureset(%rip)
214 214
215 215 /*
216 216 * mlsetup() gets called with a struct regs as argument, while
217 217 * main takes no args and should never return.
218 218 */
219 219 xorl %ebp, %ebp
220 220 movq %rsp, %rdi
221 221 pushq %rbp
222 222 /* (stack pointer now aligned on 16-byte boundary right here) */
223 223 movq %rsp, %rbp
224 224 call mlsetup
225 225 call main
226 226 /* NOTREACHED */
227 227 leaq __return_from_main(%rip), %rdi
228 228 xorl %eax, %eax
229 229 call panic
230 230 SET_SIZE(_locore_start)
231 231
232 232 #endif /* __amd64 */
233 233 #endif /* __lint */
234 234
235 235 #if !defined(__lint)
236 236
237 237 __return_from_main:
238 238 .string "main() returned"
239 239 __unsupported_cpu:
240 240 .string "486 style cpu detected - no longer supported!"
241 241
242 242 #endif /* !__lint */
243 243
244 244 #if !defined(__amd64)
245 245
246 246 #if defined(__lint)
247 247
248 248 /* ARGSUSED */
249 249 void
250 250 _locore_start(struct boot_syscalls *sysp, struct bootops *bop)
251 251 {}
252 252
253 253 #else /* __lint */
254 254
255 255 /*
256 256 * kobj_init() vectors us back to here with (note) a slightly different
257 257 * set of arguments than _start is given (see lint prototypes above).
258 258 *
259 259 * XXX Make this less vile, please.
260 260 */
261 261 ENTRY_NP(_locore_start)
262 262
263 263 /*
264 264 * %ecx = boot services (should die someday)
265 265 * %ebx = bootops
266 266 */
267 267 mov $edata, %ebp / edata needs to be defined for ksyms
268 268 movl $0, (%ebp) / limit stack back trace
269 269
270 270 /*
271 271 * Initialize our stack pointer to the thread 0 stack (t0stack)
272 272 * and leave room for a phony "struct regs".
273 273 */
274 274 movl $t0stack + DEFAULTSTKSZ - REGSIZE, %esp
275 275
276 276 /*
277 277 * Save call back for special x86 boot services vector
278 278 */
279 279 mov %ecx, sysp / save call back for boot services
280 280
281 281 mov %ebx, bootops / save bootops
282 282 movl $bootops, bootopsp
283 283
284 284
285 285 /*
286 286 * Save all registers and flags
287 287 */
288 288 pushal
289 289 pushfl
290 290
291 291 #if !defined(__xpv)
292 292 /*
293 293 * Override bios settings and enable write protect and
294 294 * alignment check faults.
295 295 */
296 296 movl %cr0, %eax
297 297
298 298 /*
299 299 * enable WP for detecting faults, and enable alignment checking.
300 300 */
301 301 orl $_CONST(CR0_WP|CR0_AM), %eax
302 302 andl $_BITNOT(CR0_WT|CR0_CE), %eax
303 303 movl %eax, %cr0 / set the cr0 register correctly and
304 304 / override the BIOS setup
305 305
306 306 /*
307 307 * If bit 21 of eflags can be flipped, then cpuid is present
308 308 * and enabled.
309 309 */
310 310 pushfl
311 311 popl %ecx
312 312 movl %ecx, %eax
313 313 xorl $PS_ID, %eax / try complemented bit
314 314 pushl %eax
315 315 popfl
316 316 pushfl
317 317 popl %eax
318 318 cmpl %eax, %ecx
319 319 jne have_cpuid
320 320
321 321 /*
322 322 * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
323 323 * div does not modify the cc flags on Cyrix, even though this may
324 324 * also be true for other vendors, this is generally true only for
325 325 * newer models from those vendors that support and do not disable
326 326 * cpuid (usually because cpuid cannot be disabled)
327 327 */
328 328
329 329 /*
330 330 * clear cc flags
331 331 */
332 332 xorb %ah, %ah
333 333 sahf
334 334
335 335 /*
336 336 * perform 5/2 test
337 337 */
338 338 movw $5, %ax
339 339 movb $2, %bl
340 340 divb %bl
341 341
342 342 lahf
343 343 cmpb $2, %ah
344 344 jne cpu_486
345 345
346 346 /*
347 347 * div did not modify the cc flags, chances are the vendor is Cyrix
348 348 * assume the vendor is Cyrix and use the CCR's to enable cpuid
349 349 */
350 350 .set CYRIX_CRI, 0x22 / CR Index Register
351 351 .set CYRIX_CRD, 0x23 / CR Data Register
352 352
353 353 .set CYRIX_CCR3, 0xc3 / Config Control Reg 3
354 354 .set CYRIX_CCR4, 0xe8 / Config Control Reg 4
355 355 .set CYRIX_DIR0, 0xfe / Device Identification Reg 0
356 356 .set CYRIX_DIR1, 0xff / Device Identification Reg 1
357 357
358 358 /*
359 359 * even if the cpu vendor is Cyrix and the motherboard/chipset
360 360 * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
361 361 * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
362 362 * the reads and writes of 0x21 are guaranteed to be off-chip of
363 363 * the cpu
364 364 */
365 365
366 366 /*
367 367 * enable read of ISR at I/O port 0x20
368 368 */
369 369 movb $0xb, %al
370 370 outb $MCMD_PORT
371 371
372 372 /*
373 373 * read IMR and store in %bl
374 374 */
375 375 inb $MIMR_PORT
376 376 movb %al, %bl
377 377
378 378 /*
379 379 * mask out all interrupts so that ISR will not change
380 380 */
381 381 movb $0xff, %al
382 382 outb $MIMR_PORT
383 383
384 384 /*
385 385 * reads of I/O port 0x22 on Cyrix are always directed off-chip
386 386 * make use of I/O pull-up to test for an unknown device on 0x22
387 387 */
388 388 inb $CYRIX_CRI
389 389 cmpb $0xff, %al
390 390 je port_22_free
391 391
392 392 /*
393 393 * motherboard/chipset vendor may be ignoring line A1 of I/O address
394 394 */
395 395 movb %al, %cl
396 396
397 397 /*
398 398 * if the ISR and the value read from 0x22 do not match then we have
399 399 * detected some unknown device, probably a chipset, at 0x22
400 400 */
401 401 inb $MCMD_PORT
402 402 cmpb %al, %cl
403 403 jne restore_IMR
404 404
405 405 port_22_free:
406 406 /*
407 407 * now test to see if some unknown device is using I/O port 0x23
408 408 *
409 409 * read the external I/O port at 0x23
410 410 */
411 411 inb $CYRIX_CRD
412 412
413 413 /*
414 414 * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
415 415 * IMR is 0xff so both tests are performed simultaneously.
416 416 */
417 417 cmpb $0xff, %al
418 418 jne restore_IMR
419 419
420 420 /*
421 421 * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
422 422 * record the type and fix it later if not.
423 423 */
424 424 movl $X86_VENDOR_Cyrix, x86_vendor
425 425 movl $X86_TYPE_CYRIX_486, x86_type
426 426
427 427 /*
428 428 * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
429 429 *
430 430 * load CCR3 index into CCR index register
431 431 */
432 432
433 433 movb $CYRIX_CCR3, %al
434 434 outb $CYRIX_CRI
435 435
436 436 /*
437 437 * If we are not a Cyrix cpu, then we have performed an external I/O
438 438 * cycle. If the CCR index was not valid for this Cyrix model, we may
439 439 * have performed an external I/O cycle as well. In these cases and
440 440 * if the motherboard/chipset vendor ignores I/O address line A1,
441 441 * then the PIC will have IRQ3 set at the lowest priority as a side
442 442 * effect of the above outb. We are reasonalbly confident that there
443 443 * is not an unknown device on I/O port 0x22, so there should have been
444 444 * no unpredictable side-effect of the above outb.
445 445 */
446 446
447 447 /*
448 448 * read CCR3
449 449 */
450 450 inb $CYRIX_CRD
451 451
452 452 /*
453 453 * If we are not a Cyrix cpu the inb above produced an external I/O
454 454 * cycle. If we are a Cyrix model that does not support CCR3 wex
455 455 * produced an external I/O cycle. In all known Cyrix models 6x86 and
456 456 * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
457 457 * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
458 458 * reserved as well. It is highly unlikely that CCR3 contains the value
459 459 * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
460 460 * deduce we are not a Cyrix with support for cpuid if so.
461 461 */
462 462 cmpb $0xff, %al
463 463 je restore_PIC
464 464
465 465 /*
466 466 * There exist 486 ISA Cyrix chips that support CCR3 but do not support
467 467 * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
468 468 * cycles, the exact behavior is model specific and undocumented.
469 469 * Unfortunately these external I/O cycles may confuse some PIC's beyond
470 470 * recovery. Fortunatetly we can use the following undocumented trick:
471 471 * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
472 472 * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
473 473 * to work on all Cyrix cpu's which support cpuid.
474 474 */
475 475 movb %al, %dl
476 476 xorb $0x10, %dl
477 477 movb %al, %cl
478 478
479 479 /*
480 480 * write back CRR3 with toggled bit 4 to CCR3
481 481 */
482 482 movb $CYRIX_CCR3, %al
483 483 outb $CYRIX_CRI
484 484
485 485 movb %dl, %al
486 486 outb $CYRIX_CRD
487 487
488 488 /*
489 489 * read CCR3
490 490 */
491 491 movb $CYRIX_CCR3, %al
492 492 outb $CYRIX_CRI
493 493 inb $CYRIX_CRD
494 494 movb %al, %dl
495 495
496 496 /*
497 497 * restore CCR3
498 498 */
499 499 movb $CYRIX_CCR3, %al
500 500 outb $CYRIX_CRI
501 501
502 502 movb %cl, %al
503 503 outb $CYRIX_CRD
504 504
505 505 /*
506 506 * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
507 507 * case we do not have cpuid anyway
508 508 */
509 509 andb $0x10, %al
510 510 andb $0x10, %dl
511 511 cmpb %al, %dl
512 512 je restore_PIC
513 513
514 514 /*
515 515 * read DIR0
516 516 */
517 517 movb $CYRIX_DIR0, %al
518 518 outb $CYRIX_CRI
519 519 inb $CYRIX_CRD
520 520
521 521 /*
522 522 * test for pull-up
523 523 */
524 524 cmpb $0xff, %al
525 525 je restore_PIC
526 526
527 527 /*
528 528 * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
529 529 * future use. If Cyrix ever produces a cpu that supports cpuid with
530 530 * these ids, the following test will have to change. For now we remain
531 531 * pessimistic since the formats of the CRR's may be different then.
532 532 *
533 533 * test for at least a 6x86, to see if we support both MAPEN and CPUID
534 534 */
535 535 cmpb $0x30, %al
536 536 jb restore_IMR
537 537
538 538 /*
539 539 * enable MAPEN
540 540 */
541 541 movb $CYRIX_CCR3, %al
542 542 outb $CYRIX_CRI
543 543
544 544 andb $0xf, %cl
545 545 movb %cl, %al
546 546 orb $0x10, %al
547 547 outb $CYRIX_CRD
548 548
549 549 /*
550 550 * select CCR4
551 551 */
552 552 movb $CYRIX_CCR4, %al
553 553 outb $CYRIX_CRI
554 554
555 555 /*
556 556 * read CCR4
557 557 */
558 558 inb $CYRIX_CRD
559 559
560 560 /*
561 561 * enable cpuid
562 562 */
563 563 orb $0x80, %al
564 564 movb %al, %dl
565 565
566 566 /*
567 567 * select CCR4
568 568 */
569 569 movb $CYRIX_CCR4, %al
570 570 outb $CYRIX_CRI
571 571
572 572 /*
573 573 * write CCR4
574 574 */
575 575 movb %dl, %al
576 576 outb $CYRIX_CRD
577 577
578 578 /*
579 579 * select CCR3
580 580 */
581 581 movb $CYRIX_CCR3, %al
582 582 outb $CYRIX_CRI
583 583
584 584 /*
585 585 * disable MAPEN and write CCR3
586 586 */
587 587 movb %cl, %al
588 588 outb $CYRIX_CRD
589 589
590 590 /*
591 591 * restore IMR
592 592 */
593 593 movb %bl, %al
594 594 outb $MIMR_PORT
595 595
596 596 /*
597 597 * test to see if cpuid available
598 598 */
599 599 pushfl
600 600 popl %ecx
601 601 movl %ecx, %eax
602 602 xorl $PS_ID, %eax / try complemented bit
603 603 pushl %eax
604 604 popfl
605 605 pushfl
606 606 popl %eax
607 607 cmpl %eax, %ecx
608 608 jne have_cpuid
609 609 jmp cpu_486
610 610
611 611 restore_PIC:
612 612 /*
613 613 * In case the motherboard/chipset vendor is ignoring line A1 of the
614 614 * I/O address, we set the PIC priorities to sane values.
615 615 */
616 616 movb $0xc7, %al / irq 7 lowest priority
617 617 outb $MCMD_PORT
618 618
619 619 restore_IMR:
620 620 movb %bl, %al
621 621 outb $MIMR_PORT
622 622 jmp cpu_486
623 623
624 624 have_cpuid:
625 625 /*
626 626 * cpuid instruction present
627 627 */
628 628 bts $X86FSET_CPUID, x86_featureset / Just to set; Ignore the CF
629 629 movl $0, %eax
630 630 cpuid
631 631
632 632 movl %ebx, cpu_vendor
633 633 movl %edx, cpu_vendor+4
634 634 movl %ecx, cpu_vendor+8
635 635
636 636 /*
637 637 * early cyrix cpus are somewhat strange and need to be
638 638 * probed in curious ways to determine their identity
639 639 */
640 640
641 641 leal cpu_vendor, %esi
642 642 leal CyrixInstead, %edi
643 643 movl $12, %ecx
644 644 repz
645 645 cmpsb
646 646 je vendor_is_cyrix
647 647
648 648 / let mlsetup()/cpuid_pass1() handle everything else in C
649 649
650 650 jmp cpu_done
651 651
652 652 is486:
653 653 /*
654 654 * test to see if a useful cpuid
655 655 */
656 656 testl %eax, %eax
657 657 jz isa486
658 658
659 659 movl $1, %eax
660 660 cpuid
661 661
662 662 movl %eax, %ebx
663 663 andl $0xF00, %ebx
664 664 cmpl $0x400, %ebx
665 665 je isa486
666 666
667 667 rep; ret /* use 2 byte return instruction */
668 668 /* AMD Software Optimization Guide - Section 6.2 */
669 669 isa486:
670 670 /*
671 671 * lose the return address
672 672 */
673 673 popl %eax
674 674 jmp cpu_486
675 675
676 676 vendor_is_cyrix:
677 677 call is486
678 678
679 679 /*
680 680 * Processor signature and feature flags for Cyrix are insane.
681 681 * BIOS can play with semi-documented registers, so cpuid must be used
682 682 * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
683 683 * Keep the family in %ebx and feature flags in %edx until not needed
684 684 */
685 685
686 686 /*
687 687 * read DIR0
688 688 */
689 689 movb $CYRIX_DIR0, %al
690 690 outb $CYRIX_CRI
691 691 inb $CYRIX_CRD
692 692
693 693 /*
694 694 * First we handle the cases where we are a 6x86 or 6x86L.
695 695 * The 6x86 is basically a 486, the only reliable bit in the
696 696 * feature flags is for FPU. The 6x86L is better, unfortunately
697 697 * there is no really good way to distinguish between these two
698 698 * cpu's. We are pessimistic and when in doubt assume 6x86.
699 699 */
700 700
701 701 cmpb $0x40, %al
702 702 jae maybeGX
703 703
704 704 /*
705 705 * We are an M1, either a 6x86 or 6x86L.
706 706 */
707 707 cmpb $0x30, %al
708 708 je maybe6x86L
709 709 cmpb $0x31, %al
710 710 je maybe6x86L
711 711 cmpb $0x34, %al
712 712 je maybe6x86L
713 713 cmpb $0x35, %al
714 714 je maybe6x86L
715 715
716 716 /*
717 717 * although it is possible that we are a 6x86L, the cpu and
718 718 * documentation are so buggy, we just do not care.
719 719 */
720 720 jmp likely6x86
721 721
722 722 maybe6x86L:
723 723 /*
724 724 * read DIR1
725 725 */
726 726 movb $CYRIX_DIR1, %al
727 727 outb $CYRIX_CRI
728 728 inb $CYRIX_CRD
729 729 cmpb $0x22, %al
730 730 jb likely6x86
731 731
732 732 /*
733 733 * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
734 734 */
735 735 movl $X86_TYPE_CYRIX_6x86L, x86_type
736 736 jmp coma_bug
737 737
738 738 likely6x86:
739 739 /*
740 740 * We are likely a 6x86, or a 6x86L without a way of knowing
741 741 *
742 742 * The 6x86 has NO Pentium or Pentium Pro compatible features even
743 743 * though it claims to be a Pentium Pro compatible!
744 744 *
745 745 * The 6x86 core used in the 6x86 may have most of the Pentium system
746 746 * registers and largely conform to the Pentium System Programming
747 747 * Reference. Documentation on these parts is long gone. Treat it as
748 748 * a crippled Pentium and hope for the best.
749 749 */
750 750
751 751 movl $X86_TYPE_CYRIX_6x86, x86_type
752 752 jmp coma_bug
753 753
754 754 maybeGX:
755 755 /*
756 756 * Now we check whether we are a MediaGX or GXm. We have particular
757 757 * reason for concern here. Even though most of the GXm's
758 758 * report having TSC in the cpuid feature flags, the TSC may be
759 759 * horribly broken. What is worse, is that MediaGX's are basically
760 760 * 486's while the good GXm's are more like Pentium Pro's!
761 761 */
762 762
763 763 cmpb $0x50, %al
764 764 jae maybeM2
765 765
766 766 /*
767 767 * We are either a MediaGX (sometimes called a Gx86) or GXm
768 768 */
769 769
770 770 cmpb $41, %al
771 771 je maybeMediaGX
772 772
773 773 cmpb $44, %al
774 774 jb maybeGXm
775 775
776 776 cmpb $47, %al
777 777 jbe maybeMediaGX
778 778
779 779 /*
780 780 * We do not honestly know what we are, so assume a MediaGX
781 781 */
782 782 jmp media_gx
783 783
784 784 maybeGXm:
785 785 /*
786 786 * It is still possible we are either a MediaGX or GXm, trust cpuid
787 787 * family should be 5 on a GXm
788 788 */
789 789 cmpl $0x500, %ebx
790 790 je GXm
791 791
792 792 /*
793 793 * BIOS/Cyrix might set family to 6 on a GXm
794 794 */
795 795 cmpl $0x600, %ebx
796 796 jne media_gx
797 797
798 798 GXm:
799 799 movl $X86_TYPE_CYRIX_GXm, x86_type
800 800 jmp cpu_done
801 801
802 802 maybeMediaGX:
803 803 /*
804 804 * read DIR1
805 805 */
806 806 movb $CYRIX_DIR1, %al
807 807 outb $CYRIX_CRI
808 808 inb $CYRIX_CRD
809 809
810 810 cmpb $0x30, %al
811 811 jae maybeGXm
812 812
813 813 /*
814 814 * we are a MediaGX for which we do not trust cpuid
815 815 */
816 816 media_gx:
817 817 movl $X86_TYPE_CYRIX_MediaGX, x86_type
818 818 jmp cpu_486
819 819
820 820 maybeM2:
821 821 /*
822 822 * Now we check whether we are a 6x86MX or MII. These cpu's are
823 823 * virtually identical, but we care because for the 6x86MX, we
824 824 * must work around the coma bug. Also for 6x86MX prior to revision
825 825 * 1.4, the TSC may have serious bugs.
826 826 */
827 827
828 828 cmpb $0x60, %al
829 829 jae maybeM3
830 830
831 831 /*
832 832 * family should be 6, but BIOS/Cyrix might set it to 5
833 833 */
834 834 cmpl $0x600, %ebx
835 835 ja cpu_486
836 836
837 837 /*
838 838 * read DIR1
839 839 */
840 840 movb $CYRIX_DIR1, %al
841 841 outb $CYRIX_CRI
842 842 inb $CYRIX_CRD
843 843
844 844 cmpb $0x8, %al
845 845 jb cyrix6x86MX
846 846 cmpb $0x80, %al
847 847 jb MII
848 848
849 849 cyrix6x86MX:
850 850 /*
851 851 * It is altogether unclear how the revision stamped on the cpu
852 852 * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
853 853 */
854 854 movl $X86_TYPE_CYRIX_6x86MX, x86_type
855 855 jmp coma_bug
856 856
857 857 MII:
858 858 movl $X86_TYPE_CYRIX_MII, x86_type
859 859 likeMII:
860 860 jmp cpu_done
861 861
862 862 maybeM3:
863 863 /*
864 864 * We are some chip that we cannot identify yet, an MIII perhaps.
865 865 * We will be optimistic and hope that the chip is much like an MII,
866 866 * and that cpuid is sane. Cyrix seemed to have gotten it right in
867 867 * time for the MII, we can only hope it stayed that way.
868 868 * Maybe the BIOS or Cyrix is trying to hint at something
869 869 */
870 870 cmpl $0x500, %ebx
871 871 je GXm
872 872
873 873 cmpb $0x80, %al
874 874 jae likelyM3
875 875
876 876 /*
877 877 * Just test for the features Cyrix is known for
878 878 */
879 879
880 880 jmp MII
881 881
882 882 likelyM3:
883 883 /*
884 884 * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
885 885 * the Cyrix MIII. There may be parts later that use the same ranges
886 886 * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
887 887 * now we will call anything with a DIR0 of 0x80 or higher an MIII.
888 888 * The MIII is supposed to support large pages, but we will believe
889 889 * it when we see it. For now we just enable and test for MII features.
890 890 */
891 891 movl $X86_TYPE_VIA_CYRIX_III, x86_type
892 892 jmp likeMII
893 893
894 894 coma_bug:
895 895
896 896 /*
897 897 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
898 898 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
899 899 * cycles except page table accesses and interrupt ACK cycles do not assert
900 900 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
901 901 * Due to a bug in the cpu core involving over-optimization of branch
902 902 * prediction, register renaming, and execution of instructions down both the
903 903 * X and Y pipes for the xchgl instruction, short loops can be written that
904 904 * never de-assert LOCK# from one invocation of the loop to the next, ad
905 905 * infinitum. The undesirable effect of this situation is that interrupts are
906 906 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
907 907 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
908 908 * longer do, unless they are page table accesses or interrupt ACK cycles.
909 909 * With LOCK# not asserted, these bus cycles are now cached. This can cause
910 910 * undesirable behaviour if the ARR's are not configured correctly. Solaris
911 911 * does not configure the ARR's, nor does it provide any useful mechanism for
912 912 * doing so, thus the ideal workaround is not viable. Fortunately, the only
913 913 * known exploits for this bug involve the xchgl instruction specifically.
914 914 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
915 915 * 6x86MX cpu's which can be used to specify one instruction as a serializing
916 916 * instruction. With the xchgl instruction serialized, LOCK# is still
917 917 * asserted, but it is the sole instruction for which LOCK# is asserted.
918 918 * There is now some added penalty for the xchgl instruction, but the usual
919 919 * bus locking is preserved. This ingenious workaround was discovered by
920 920 * disassembling a binary provided by Cyrix as a workaround for this bug on
921 921 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
922 922 * mentioned in any public errata! The only concern for this workaround is
923 923 * that there may be similar undiscovered bugs with other instructions that
924 924 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
925 925 * fixed this bug sometime late in 1997 and no other exploits other than
926 926 * xchgl have been discovered is good indication that this workaround is
927 927 * reasonable.
928 928 */
929 929
930 930 .set CYRIX_DBR0, 0x30 / Debug Register 0
931 931 .set CYRIX_DBR1, 0x31 / Debug Register 1
932 932 .set CYRIX_DBR2, 0x32 / Debug Register 2
933 933 .set CYRIX_DBR3, 0x33 / Debug Register 3
934 934 .set CYRIX_DOR, 0x3c / Debug Opcode Register
935 935
936 936 /*
937 937 * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
938 938 * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
939 939 * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
940 940 * and 0xff. Then, DOR is loaded with the one byte opcode.
941 941 */
942 942
943 943 /*
944 944 * select CCR3
945 945 */
946 946 movb $CYRIX_CCR3, %al
947 947 outb $CYRIX_CRI
948 948
949 949 /*
950 950 * read CCR3 and mask out MAPEN
951 951 */
952 952 inb $CYRIX_CRD
953 953 andb $0xf, %al
954 954
955 955 /*
956 956 * save masked CCR3 in %ah
957 957 */
958 958 movb %al, %ah
959 959
960 960 /*
961 961 * select CCR3
962 962 */
963 963 movb $CYRIX_CCR3, %al
964 964 outb $CYRIX_CRI
965 965
966 966 /*
967 967 * enable MAPEN
968 968 */
969 969 movb %ah, %al
970 970 orb $0x10, %al
971 971 outb $CYRIX_CRD
972 972
973 973 /*
974 974 * read DBR0
975 975 */
976 976 movb $CYRIX_DBR0, %al
977 977 outb $CYRIX_CRI
978 978 inb $CYRIX_CRD
979 979
980 980 /*
981 981 * disable MATCH and save in %bh
982 982 */
983 983 orb $0x80, %al
984 984 movb %al, %bh
985 985
986 986 /*
987 987 * write DBR0
988 988 */
989 989 movb $CYRIX_DBR0, %al
990 990 outb $CYRIX_CRI
991 991 movb %bh, %al
992 992 outb $CYRIX_CRD
993 993
994 994 /*
995 995 * write DBR1
996 996 */
997 997 movb $CYRIX_DBR1, %al
998 998 outb $CYRIX_CRI
999 999 movb $0xf8, %al
1000 1000 outb $CYRIX_CRD
1001 1001
1002 1002 /*
1003 1003 * write DBR2
1004 1004 */
1005 1005 movb $CYRIX_DBR2, %al
1006 1006 outb $CYRIX_CRI
1007 1007 movb $0x7f, %al
1008 1008 outb $CYRIX_CRD
1009 1009
1010 1010 /*
1011 1011 * write DBR3
1012 1012 */
1013 1013 movb $CYRIX_DBR3, %al
1014 1014 outb $CYRIX_CRI
1015 1015 xorb %al, %al
1016 1016 outb $CYRIX_CRD
1017 1017
1018 1018 /*
1019 1019 * write DOR
1020 1020 */
1021 1021 movb $CYRIX_DOR, %al
1022 1022 outb $CYRIX_CRI
1023 1023 movb $0x87, %al
1024 1024 outb $CYRIX_CRD
1025 1025
1026 1026 /*
1027 1027 * enable MATCH
1028 1028 */
1029 1029 movb $CYRIX_DBR0, %al
1030 1030 outb $CYRIX_CRI
1031 1031 movb %bh, %al
1032 1032 andb $0x7f, %al
1033 1033 outb $CYRIX_CRD
1034 1034
1035 1035 /*
1036 1036 * disable MAPEN
1037 1037 */
1038 1038 movb $0xc3, %al
1039 1039 outb $CYRIX_CRI
1040 1040 movb %ah, %al
1041 1041 outb $CYRIX_CRD
1042 1042
1043 1043 jmp cpu_done
1044 1044
1045 1045 cpu_done:
1046 1046
1047 1047 popfl /* Restore original FLAGS */
1048 1048 popal /* Restore all registers */
1049 1049
1050 1050 #endif /* !__xpv */
1051 1051
1052 1052 /*
1053 1053 * mlsetup(%esp) gets called.
1054 1054 */
1055 1055 pushl %esp
1056 1056 call mlsetup
1057 1057 addl $4, %esp
1058 1058
1059 1059 /*
1060 1060 * We change our appearance to look like the real thread 0.
1061 1061 * (NOTE: making ourselves to be a real thread may be a noop)
1062 1062 * main() gets called. (NOTE: main() never returns).
1063 1063 */
1064 1064 call main
1065 1065 /* NOTREACHED */
1066 1066 pushl $__return_from_main
1067 1067 call panic
1068 1068
1069 1069 /* NOTREACHED */
1070 1070 cpu_486:
1071 1071 pushl $__unsupported_cpu
1072 1072 call panic
1073 1073 SET_SIZE(_locore_start)
1074 1074
1075 1075 #endif /* __lint */
1076 1076 #endif /* !__amd64 */
1077 1077
1078 1078
1079 1079 /*
1080 1080 * For stack layout, see privregs.h
1081 1081 * When cmntrap gets called, the error code and trap number have been pushed.
1082 1082 * When cmntrap_pushed gets called, the entire struct regs has been pushed.
1083 1083 */
1084 1084
1085 1085 #if defined(__lint)
1086 1086
1087 1087 /* ARGSUSED */
1088 1088 void
1089 1089 cmntrap()
1090 1090 {}
1091 1091
1092 1092 #else /* __lint */
1093 1093
1094 1094 .globl trap /* C handler called below */
1095 1095
1096 1096 #if defined(__amd64)
1097 1097
1098 1098 ENTRY_NP2(cmntrap, _cmntrap)
1099 1099
1100 1100 INTR_PUSH
1101 1101
1102 1102 ALTENTRY(cmntrap_pushed)
1103 1103
1104 1104 movq %rsp, %rbp
1105 1105
1106 1106 /*
1107 1107 * - if this is a #pf i.e. T_PGFLT, %r15 is live
1108 1108 * and contains the faulting address i.e. a copy of %cr2
1109 1109 *
1110 1110 * - if this is a #db i.e. T_SGLSTP, %r15 is live
1111 1111 * and contains the value of %db6
1112 1112 */
1113 1113
1114 1114 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1115 1115 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1116 1116 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1117 1117
1118 1118 /*
1119 1119 * We must first check if DTrace has set its NOFAULT bit. This
1120 1120 * regrettably must happen before the trap stack is recorded, because
1121 1121 * this requires a call to getpcstack() and may induce recursion if an
1122 1122 * fbt::getpcstack: enabling is inducing the bad load.
1123 1123 */
1124 1124 movl %gs:CPU_ID, %eax
1125 1125 shlq $CPU_CORE_SHIFT, %rax
1126 1126 leaq cpu_core(%rip), %r8
1127 1127 addq %r8, %rax
1128 1128 movw CPUC_DTRACE_FLAGS(%rax), %cx
1129 1129 testw $CPU_DTRACE_NOFAULT, %cx
1130 1130 jnz .dtrace_induced
1131 1131
1132 1132 TRACE_STACK(%rdi)
1133 1133
1134 1134 movq %rbp, %rdi
1135 1135 movq %r15, %rsi
1136 1136 movl %gs:CPU_ID, %edx
1137 1137
1138 1138 /*
1139 1139 * We know that this isn't a DTrace non-faulting load; we can now safely
1140 1140 * reenable interrupts. (In the case of pagefaults, we enter through an
1141 1141 * interrupt gate.)
1142 1142 */
1143 1143 ENABLE_INTR_FLAGS
1144 1144
1145 1145 call trap /* trap(rp, addr, cpuid) handles all traps */
1146 1146 jmp _sys_rtt
1147 1147
1148 1148 .dtrace_induced:
1149 1149 cmpw $KCS_SEL, REGOFF_CS(%rbp) /* test CS for user-mode trap */
1150 1150 jne 3f /* if from user, panic */
1151 1151
1152 1152 cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp)
1153 1153 je 1f
1154 1154
1155 1155 cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp)
1156 1156 je 0f
1157 1157
1158 1158 cmpl $T_ILLINST, REGOFF_TRAPNO(%rbp)
1159 1159 je 0f
1160 1160
1161 1161 cmpl $T_ZERODIV, REGOFF_TRAPNO(%rbp)
1162 1162 jne 4f /* if not PF/GP/UD/DE, panic */
1163 1163
1164 1164 orw $CPU_DTRACE_DIVZERO, %cx
1165 1165 movw %cx, CPUC_DTRACE_FLAGS(%rax)
1166 1166 jmp 2f
1167 1167
1168 1168 /*
1169 1169 * If we've taken a GPF, we don't (unfortunately) have the address that
1170 1170 * induced the fault. So instead of setting the fault to BADADDR,
1171 1171 * we'll set the fault to ILLOP.
1172 1172 */
1173 1173 0:
1174 1174 orw $CPU_DTRACE_ILLOP, %cx
1175 1175 movw %cx, CPUC_DTRACE_FLAGS(%rax)
1176 1176 jmp 2f
1177 1177 1:
1178 1178 orw $CPU_DTRACE_BADADDR, %cx
↓ open down ↓ |
1142 lines elided |
↑ open up ↑ |
1179 1179 movw %cx, CPUC_DTRACE_FLAGS(%rax) /* set fault to bad addr */
1180 1180 movq %r15, CPUC_DTRACE_ILLVAL(%rax)
1181 1181 /* fault addr is illegal value */
1182 1182 2:
1183 1183 movq REGOFF_RIP(%rbp), %rdi
1184 1184 movq %rdi, %r12
1185 1185 call dtrace_instr_size
1186 1186 addq %rax, %r12
1187 1187 movq %r12, REGOFF_RIP(%rbp)
1188 1188 INTR_POP
1189 - IRET
1189 + jmp tr_iret_auto
1190 1190 /*NOTREACHED*/
1191 1191 3:
1192 1192 leaq dtrace_badflags(%rip), %rdi
1193 1193 xorl %eax, %eax
1194 1194 call panic
1195 1195 4:
1196 1196 leaq dtrace_badtrap(%rip), %rdi
1197 1197 xorl %eax, %eax
1198 1198 call panic
1199 1199 SET_SIZE(cmntrap)
1200 1200 SET_SIZE(_cmntrap)
1201 1201
1202 1202 #elif defined(__i386)
1203 1203
1204 1204
1205 1205 ENTRY_NP2(cmntrap, _cmntrap)
1206 1206
1207 1207 INTR_PUSH
1208 1208
1209 1209 ALTENTRY(cmntrap_pushed)
1210 1210
1211 1211 movl %esp, %ebp
1212 1212
1213 1213 /*
1214 1214 * - if this is a #pf i.e. T_PGFLT, %esi is live
1215 1215 * and contains the faulting address i.e. a copy of %cr2
1216 1216 *
1217 1217 * - if this is a #db i.e. T_SGLSTP, %esi is live
1218 1218 * and contains the value of %db6
1219 1219 */
1220 1220
1221 1221 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1222 1222 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1223 1223 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1224 1224
1225 1225 /*
1226 1226 * We must first check if DTrace has set its NOFAULT bit. This
1227 1227 * regrettably must happen before the trap stack is recorded, because
1228 1228 * this requires a call to getpcstack() and may induce recursion if an
1229 1229 * fbt::getpcstack: enabling is inducing the bad load.
1230 1230 */
1231 1231 movl %gs:CPU_ID, %eax
1232 1232 shll $CPU_CORE_SHIFT, %eax
1233 1233 addl $cpu_core, %eax
1234 1234 movw CPUC_DTRACE_FLAGS(%eax), %cx
1235 1235 testw $CPU_DTRACE_NOFAULT, %cx
1236 1236 jnz .dtrace_induced
1237 1237
1238 1238 TRACE_STACK(%edi)
1239 1239
1240 1240 pushl %gs:CPU_ID
1241 1241 pushl %esi /* fault address for PGFLTs */
1242 1242 pushl %ebp /* ®s */
1243 1243
1244 1244 /*
1245 1245 * We know that this isn't a DTrace non-faulting load; we can now safely
1246 1246 * reenable interrupts. (In the case of pagefaults, we enter through an
1247 1247 * interrupt gate.)
1248 1248 */
1249 1249 ENABLE_INTR_FLAGS
1250 1250
1251 1251 call trap /* trap(rp, addr, cpuid) handles all traps */
1252 1252 addl $12, %esp /* get argument off stack */
1253 1253 jmp _sys_rtt
1254 1254
1255 1255 .dtrace_induced:
1256 1256 cmpw $KCS_SEL, REGOFF_CS(%ebp) /* test CS for user-mode trap */
1257 1257 jne 3f /* if from user, panic */
1258 1258
1259 1259 cmpl $T_PGFLT, REGOFF_TRAPNO(%ebp)
1260 1260 je 1f
1261 1261
1262 1262 cmpl $T_GPFLT, REGOFF_TRAPNO(%ebp)
1263 1263 je 0f
1264 1264
1265 1265 cmpl $T_ZERODIV, REGOFF_TRAPNO(%ebp)
1266 1266 jne 4f /* if not PF/GP/UD/DE, panic */
1267 1267
1268 1268 orw $CPU_DTRACE_DIVZERO, %cx
1269 1269 movw %cx, CPUC_DTRACE_FLAGS(%eax)
1270 1270 jmp 2f
1271 1271
1272 1272 0:
1273 1273 /*
1274 1274 * If we've taken a GPF, we don't (unfortunately) have the address that
1275 1275 * induced the fault. So instead of setting the fault to BADADDR,
1276 1276 * we'll set the fault to ILLOP.
1277 1277 */
1278 1278 orw $CPU_DTRACE_ILLOP, %cx
1279 1279 movw %cx, CPUC_DTRACE_FLAGS(%eax)
1280 1280 jmp 2f
1281 1281 1:
1282 1282 orw $CPU_DTRACE_BADADDR, %cx
1283 1283 movw %cx, CPUC_DTRACE_FLAGS(%eax) /* set fault to bad addr */
1284 1284 movl %esi, CPUC_DTRACE_ILLVAL(%eax)
1285 1285 /* fault addr is illegal value */
1286 1286 2:
1287 1287 pushl REGOFF_EIP(%ebp)
1288 1288 call dtrace_instr_size
1289 1289 addl $4, %esp
1290 1290 movl REGOFF_EIP(%ebp), %ecx
1291 1291 addl %eax, %ecx
1292 1292 movl %ecx, REGOFF_EIP(%ebp)
1293 1293 INTR_POP_KERNEL
1294 1294 IRET
1295 1295 /*NOTREACHED*/
1296 1296 3:
1297 1297 pushl $dtrace_badflags
1298 1298 call panic
1299 1299 4:
1300 1300 pushl $dtrace_badtrap
1301 1301 call panic
1302 1302 SET_SIZE(cmntrap)
1303 1303 SET_SIZE(_cmntrap)
1304 1304
1305 1305 #endif /* __i386 */
1306 1306
1307 1307 /*
1308 1308 * Declare a uintptr_t which has the size of _cmntrap to enable stack
1309 1309 * traceback code to know when a regs structure is on the stack.
1310 1310 */
1311 1311 .globl _cmntrap_size
1312 1312 .align CLONGSIZE
1313 1313 _cmntrap_size:
1314 1314 .NWORD . - _cmntrap
1315 1315 .type _cmntrap_size, @object
1316 1316
1317 1317 dtrace_badflags:
1318 1318 .string "bad DTrace flags"
1319 1319
1320 1320 dtrace_badtrap:
1321 1321 .string "bad DTrace trap"
1322 1322
1323 1323 #endif /* __lint */
1324 1324
1325 1325 #if defined(__lint)
1326 1326
1327 1327 /* ARGSUSED */
1328 1328 void
1329 1329 cmninttrap()
1330 1330 {}
1331 1331
1332 1332 #if !defined(__xpv)
1333 1333 void
1334 1334 bop_trap_handler(void)
1335 1335 {}
1336 1336 #endif
1337 1337
1338 1338 #else /* __lint */
1339 1339
1340 1340 .globl trap /* C handler called below */
1341 1341
1342 1342 #if defined(__amd64)
1343 1343
1344 1344 ENTRY_NP(cmninttrap)
1345 1345
1346 1346 INTR_PUSH
1347 1347 INTGATE_INIT_KERNEL_FLAGS
1348 1348
1349 1349 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1350 1350 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1351 1351 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1352 1352
1353 1353 movq %rsp, %rbp
1354 1354
1355 1355 movl %gs:CPU_ID, %edx
1356 1356 xorl %esi, %esi
1357 1357 movq %rsp, %rdi
1358 1358 call trap /* trap(rp, addr, cpuid) handles all traps */
1359 1359 jmp _sys_rtt
1360 1360 SET_SIZE(cmninttrap)
1361 1361
1362 1362 #if !defined(__xpv)
1363 1363 /*
1364 1364 * Handle traps early in boot. Just revectors into C quickly as
1365 1365 * these are always fatal errors.
1366 1366 *
1367 1367 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
1368 1368 */
1369 1369 ENTRY(bop_trap_handler)
1370 1370 movq %rsp, %rdi
1371 1371 sub $8, %rsp
1372 1372 call bop_trap
1373 1373 SET_SIZE(bop_trap_handler)
1374 1374 #endif
1375 1375
1376 1376 #elif defined(__i386)
1377 1377
1378 1378 ENTRY_NP(cmninttrap)
1379 1379
1380 1380 INTR_PUSH
1381 1381 INTGATE_INIT_KERNEL_FLAGS
1382 1382
1383 1383 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1384 1384 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1385 1385 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1386 1386
1387 1387 movl %esp, %ebp
1388 1388
1389 1389 TRACE_STACK(%edi)
1390 1390
1391 1391 pushl %gs:CPU_ID
1392 1392 pushl $0
1393 1393 pushl %ebp
1394 1394 call trap /* trap(rp, addr, cpuid) handles all traps */
1395 1395 addl $12, %esp
1396 1396 jmp _sys_rtt
1397 1397 SET_SIZE(cmninttrap)
1398 1398
1399 1399 #if !defined(__xpv)
1400 1400 /*
1401 1401 * Handle traps early in boot. Just revectors into C quickly as
1402 1402 * these are always fatal errors.
1403 1403 */
1404 1404 ENTRY(bop_trap_handler)
1405 1405 movl %esp, %eax
1406 1406 pushl %eax
1407 1407 call bop_trap
1408 1408 SET_SIZE(bop_trap_handler)
1409 1409 #endif
1410 1410
1411 1411 #endif /* __i386 */
1412 1412
1413 1413 #endif /* __lint */
1414 1414
1415 1415 #if defined(__lint)
1416 1416
1417 1417 /* ARGSUSED */
1418 1418 void
1419 1419 dtrace_trap()
1420 1420 {}
1421 1421
1422 1422 #else /* __lint */
1423 1423
1424 1424 .globl dtrace_user_probe
1425 1425
1426 1426 #if defined(__amd64)
1427 1427
1428 1428 ENTRY_NP(dtrace_trap)
1429 1429
1430 1430 INTR_PUSH
1431 1431
1432 1432 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1433 1433 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1434 1434 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1435 1435
1436 1436 movq %rsp, %rbp
1437 1437
1438 1438 movl %gs:CPU_ID, %edx
1439 1439 #if defined(__xpv)
1440 1440 movq %gs:CPU_VCPU_INFO, %rsi
1441 1441 movq VCPU_INFO_ARCH_CR2(%rsi), %rsi
1442 1442 #else
1443 1443 movq %cr2, %rsi
1444 1444 #endif
1445 1445 movq %rsp, %rdi
1446 1446
1447 1447 ENABLE_INTR_FLAGS
1448 1448
1449 1449 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1450 1450 jmp _sys_rtt
1451 1451
1452 1452 SET_SIZE(dtrace_trap)
1453 1453
1454 1454 #elif defined(__i386)
1455 1455
1456 1456 ENTRY_NP(dtrace_trap)
1457 1457
1458 1458 INTR_PUSH
1459 1459
1460 1460 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1461 1461 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1462 1462 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1463 1463
1464 1464 movl %esp, %ebp
1465 1465
1466 1466 pushl %gs:CPU_ID
1467 1467 #if defined(__xpv)
1468 1468 movl %gs:CPU_VCPU_INFO, %eax
1469 1469 movl VCPU_INFO_ARCH_CR2(%eax), %eax
1470 1470 #else
1471 1471 movl %cr2, %eax
1472 1472 #endif
1473 1473 pushl %eax
1474 1474 pushl %ebp
1475 1475
1476 1476 ENABLE_INTR_FLAGS
1477 1477
1478 1478 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1479 1479 addl $12, %esp /* get argument off stack */
1480 1480
1481 1481 jmp _sys_rtt
1482 1482 SET_SIZE(dtrace_trap)
1483 1483
1484 1484 #endif /* __i386 */
1485 1485
1486 1486 #endif /* __lint */
1487 1487
1488 1488 /*
1489 1489 * Return from _sys_trap routine.
1490 1490 */
1491 1491
1492 1492 #if defined(__lint)
1493 1493
1494 1494 void
1495 1495 lwp_rtt_initial(void)
1496 1496 {}
1497 1497
1498 1498 void
1499 1499 lwp_rtt(void)
1500 1500 {}
1501 1501
1502 1502 void
1503 1503 _sys_rtt(void)
1504 1504 {}
1505 1505
1506 1506 #else /* __lint */
1507 1507
1508 1508 #if defined(__amd64)
1509 1509
1510 1510 ENTRY_NP(lwp_rtt_initial)
1511 1511 movq %gs:CPU_THREAD, %r15
1512 1512 movq T_STACK(%r15), %rsp /* switch to the thread stack */
1513 1513 movq %rsp, %rbp
1514 1514 call __dtrace_probe___proc_start
1515 1515 jmp _lwp_rtt
1516 1516
1517 1517 ENTRY_NP(lwp_rtt)
1518 1518
1519 1519 /*
1520 1520 * r14 lwp
1521 1521 * rdx lwp->lwp_procp
1522 1522 * r15 curthread
1523 1523 */
1524 1524
1525 1525 movq %gs:CPU_THREAD, %r15
1526 1526 movq T_STACK(%r15), %rsp /* switch to the thread stack */
1527 1527 movq %rsp, %rbp
1528 1528 _lwp_rtt:
1529 1529 call __dtrace_probe___proc_lwp__start
1530 1530 movq %gs:CPU_LWP, %r14
1531 1531 movq LWP_PROCP(%r14), %rdx
1532 1532
1533 1533 /*
1534 1534 * XX64 Is the stack misaligned correctly at this point?
1535 1535 * If not, we need to do a push before calling anything ..
1536 1536 */
1537 1537
1538 1538 #if defined(DEBUG)
1539 1539 /*
1540 1540 * If we were to run lwp_savectx at this point -without-
1541 1541 * pcb_rupdate being set to 1, we'd end up sampling the hardware
1542 1542 * state left by the previous running lwp, rather than setting
1543 1543 * the values requested by the lwp creator. Bad.
1544 1544 */
1545 1545 testb $0x1, PCB_RUPDATE(%r14)
1546 1546 jne 1f
1547 1547 leaq _no_pending_updates(%rip), %rdi
1548 1548 movl $__LINE__, %esi
1549 1549 movq %r14, %rdx
1550 1550 xorl %eax, %eax
1551 1551 call panic
1552 1552 _no_pending_updates:
1553 1553 .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
1554 1554 1:
1555 1555 #endif
1556 1556
1557 1557 /*
1558 1558 * If agent lwp, clear %fs and %gs
1559 1559 */
1560 1560 cmpq %r15, P_AGENTTP(%rdx)
1561 1561 jne 1f
1562 1562 xorl %ecx, %ecx
1563 1563 movq %rcx, REGOFF_FS(%rsp)
1564 1564 movq %rcx, REGOFF_GS(%rsp)
1565 1565 movw %cx, LWP_PCB_FS(%r14)
1566 1566 movw %cx, LWP_PCB_GS(%r14)
1567 1567 1:
1568 1568 call dtrace_systrace_rtt
1569 1569 movq REGOFF_RDX(%rsp), %rsi
1570 1570 movq REGOFF_RAX(%rsp), %rdi
1571 1571 call post_syscall /* post_syscall(rval1, rval2) */
1572 1572
1573 1573 /*
1574 1574 * set up to take fault on first use of fp
1575 1575 */
1576 1576 STTS(%rdi)
1577 1577
1578 1578 /*
1579 1579 * XXX - may want a fast path that avoids sys_rtt_common in the
1580 1580 * most common case.
1581 1581 */
1582 1582 ALTENTRY(_sys_rtt)
1583 1583 CLI(%rax) /* disable interrupts */
1584 1584 ALTENTRY(_sys_rtt_ints_disabled)
1585 1585 movq %rsp, %rdi /* pass rp to sys_rtt_common */
1586 1586 call sys_rtt_common /* do common sys_rtt tasks */
1587 1587 testq %rax, %rax /* returning to userland? */
1588 1588 jz sr_sup
1589 1589
1590 1590 /*
1591 1591 * Return to user
↓ open down ↓ |
392 lines elided |
↑ open up ↑ |
1592 1592 */
1593 1593 ASSERT_UPCALL_MASK_IS_SET
1594 1594 cmpw $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
1595 1595 je sys_rtt_syscall
1596 1596
1597 1597 /*
1598 1598 * Return to 32-bit userland
1599 1599 */
1600 1600 ALTENTRY(sys_rtt_syscall32)
1601 1601 USER32_POP
1602 - IRET
1602 + jmp tr_iret_user
1603 1603 /*NOTREACHED*/
1604 1604
1605 1605 ALTENTRY(sys_rtt_syscall)
1606 1606 /*
1607 1607 * Return to 64-bit userland
1608 1608 */
1609 1609 USER_POP
1610 1610 ALTENTRY(nopop_sys_rtt_syscall)
1611 - IRET
1611 + jmp tr_iret_user
1612 1612 /*NOTREACHED*/
1613 1613 SET_SIZE(nopop_sys_rtt_syscall)
1614 1614
1615 1615 /*
1616 1616 * Return to supervisor
1617 1617 * NOTE: to make the check in trap() that tests if we are executing
1618 1618 * segment register fixup/restore code work properly, sr_sup MUST be
1619 1619 * after _sys_rtt .
1620 1620 */
1621 1621 ALTENTRY(sr_sup)
1622 1622 /*
1623 1623 * Restore regs before doing iretq to kernel mode
1624 1624 */
1625 1625 INTR_POP
1626 - IRET
1626 + jmp tr_iret_kernel
1627 1627 .globl _sys_rtt_end
1628 1628 _sys_rtt_end:
1629 1629 /*NOTREACHED*/
1630 1630 SET_SIZE(sr_sup)
1631 1631 SET_SIZE(_sys_rtt_end)
1632 1632 SET_SIZE(lwp_rtt)
1633 1633 SET_SIZE(lwp_rtt_initial)
1634 1634 SET_SIZE(_sys_rtt_ints_disabled)
1635 1635 SET_SIZE(_sys_rtt)
1636 1636 SET_SIZE(sys_rtt_syscall)
1637 1637 SET_SIZE(sys_rtt_syscall32)
1638 1638
1639 1639 #elif defined(__i386)
1640 1640
1641 1641 ENTRY_NP(lwp_rtt_initial)
1642 1642 movl %gs:CPU_THREAD, %eax
1643 1643 movl T_STACK(%eax), %esp /* switch to the thread stack */
1644 1644 movl %esp, %ebp
1645 1645 call __dtrace_probe___proc_start
1646 1646 jmp _lwp_rtt
1647 1647
1648 1648 ENTRY_NP(lwp_rtt)
1649 1649 movl %gs:CPU_THREAD, %eax
1650 1650 movl T_STACK(%eax), %esp /* switch to the thread stack */
1651 1651 movl %esp, %ebp
1652 1652 _lwp_rtt:
1653 1653 call __dtrace_probe___proc_lwp__start
1654 1654
1655 1655 /*
1656 1656 * If agent lwp, clear %fs and %gs.
1657 1657 */
1658 1658 movl %gs:CPU_LWP, %eax
1659 1659 movl LWP_PROCP(%eax), %edx
1660 1660
1661 1661 cmpl %eax, P_AGENTTP(%edx)
1662 1662 jne 1f
1663 1663 movl $0, REGOFF_FS(%esp)
1664 1664 movl $0, REGOFF_GS(%esp)
1665 1665 1:
1666 1666 call dtrace_systrace_rtt
1667 1667 movl REGOFF_EDX(%esp), %edx
1668 1668 movl REGOFF_EAX(%esp), %eax
1669 1669 pushl %edx
1670 1670 pushl %eax
1671 1671 call post_syscall /* post_syscall(rval1, rval2) */
1672 1672 addl $8, %esp
1673 1673
1674 1674 /*
1675 1675 * set up to take fault on first use of fp
1676 1676 */
1677 1677 STTS(%eax)
1678 1678
1679 1679 /*
1680 1680 * XXX - may want a fast path that avoids sys_rtt_common in the
1681 1681 * most common case.
1682 1682 */
1683 1683 ALTENTRY(_sys_rtt)
1684 1684 CLI(%eax) /* disable interrupts */
1685 1685 ALTENTRY(_sys_rtt_ints_disabled)
1686 1686 pushl %esp /* pass rp to sys_rtt_common */
1687 1687 call sys_rtt_common
1688 1688 addl $4, %esp /* pop arg */
1689 1689 testl %eax, %eax /* test for return to user mode */
1690 1690 jz sr_sup
1691 1691
1692 1692 /*
1693 1693 * Return to User.
1694 1694 */
1695 1695 ALTENTRY(sys_rtt_syscall)
1696 1696 INTR_POP_USER
1697 1697
1698 1698 /*
1699 1699 * There can be no instructions between this label and IRET or
1700 1700 * we could end up breaking linux brand support. See label usage
1701 1701 * in lx_brand_int80_callback for an example.
1702 1702 */
1703 1703 ALTENTRY(nopop_sys_rtt_syscall)
1704 1704 IRET
1705 1705 /*NOTREACHED*/
1706 1706 SET_SIZE(nopop_sys_rtt_syscall)
1707 1707
1708 1708 ALTENTRY(_sys_rtt_end)
1709 1709
1710 1710 /*
1711 1711 * Return to supervisor
1712 1712 */
1713 1713 ALTENTRY(sr_sup)
1714 1714
1715 1715 /*
1716 1716 * Restore regs before doing iret to kernel mode
1717 1717 */
1718 1718 INTR_POP_KERNEL
1719 1719 IRET
1720 1720 /*NOTREACHED*/
1721 1721
1722 1722 SET_SIZE(sr_sup)
1723 1723 SET_SIZE(_sys_rtt_end)
1724 1724 SET_SIZE(lwp_rtt)
1725 1725 SET_SIZE(lwp_rtt_initial)
1726 1726 SET_SIZE(_sys_rtt_ints_disabled)
1727 1727 SET_SIZE(_sys_rtt)
1728 1728 SET_SIZE(sys_rtt_syscall)
1729 1729
1730 1730 #endif /* __i386 */
1731 1731
1732 1732 #endif /* __lint */
1733 1733
1734 1734 #if defined(__lint)
1735 1735
1736 1736 /*
1737 1737 * So why do we have to deal with all this crud in the world of ia32?
1738 1738 *
1739 1739 * Basically there are four classes of ia32 implementations, those that do not
1740 1740 * have a TSC, those that have a marginal TSC that is broken to the extent
1741 1741 * that it is useless, those that have a marginal TSC that is not quite so
1742 1742 * horribly broken and can be used with some care, and those that have a
1743 1743 * reliable TSC. This crud has to be here in order to sift through all the
1744 1744 * variants.
1745 1745 */
1746 1746
1747 1747 /*ARGSUSED*/
1748 1748 uint64_t
1749 1749 freq_tsc(uint32_t *pit_counter)
1750 1750 {
1751 1751 return (0);
1752 1752 }
1753 1753
1754 1754 #else /* __lint */
1755 1755
1756 1756 #if defined(__amd64)
1757 1757
1758 1758 /*
1759 1759 * XX64 quick and dirty port from the i386 version. Since we
1760 1760 * believe the amd64 tsc is more reliable, could this code be
1761 1761 * simpler?
1762 1762 */
1763 1763 ENTRY_NP(freq_tsc)
1764 1764 pushq %rbp
1765 1765 movq %rsp, %rbp
1766 1766 movq %rdi, %r9 /* save pit_counter */
1767 1767 pushq %rbx
1768 1768
1769 1769 / We have a TSC, but we have no way in general to know how reliable it is.
1770 1770 / Usually a marginal TSC behaves appropriately unless not enough time
1771 1771 / elapses between reads. A reliable TSC can be read as often and as rapidly
1772 1772 / as desired. The simplistic approach of reading the TSC counter and
1773 1773 / correlating to the PIT counter cannot be naively followed. Instead estimates
1774 1774 / have to be taken to successively refine a guess at the speed of the cpu
1775 1775 / and then the TSC and PIT counter are correlated. In practice very rarely
1776 1776 / is more than one quick loop required for an estimate. Measures have to be
1777 1777 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1778 1778 / measuring the clock rate of very fast processors.
1779 1779 /
1780 1780 / The following constant can be tuned. It should be such that the loop does
1781 1781 / not take too many nor too few PIT counts to execute. If this value is too
1782 1782 / large, then on slow machines the loop will take a long time, or the PIT
1783 1783 / counter may even wrap. If this value is too small, then on fast machines
1784 1784 / the PIT counter may count so few ticks that the resolution of the PIT
1785 1785 / itself causes a bad guess. Because this code is used in machines with
1786 1786 / marginal TSC's and/or IO, if this value is too small on those, it may
1787 1787 / cause the calculated cpu frequency to vary slightly from boot to boot.
1788 1788 /
1789 1789 / In all cases even if this constant is set inappropriately, the algorithm
1790 1790 / will still work and the caller should be able to handle variances in the
1791 1791 / calculation of cpu frequency, but the calculation will be inefficient and
1792 1792 / take a disproportionate amount of time relative to a well selected value.
1793 1793 / As the slowest supported cpu becomes faster, this constant should be
1794 1794 / carefully increased.
1795 1795
1796 1796 movl $0x8000, %ecx
1797 1797
1798 1798 / to make sure the instruction cache has been warmed
1799 1799 clc
1800 1800
1801 1801 jmp freq_tsc_loop
1802 1802
1803 1803 / The following block of code up to and including the latching of the PIT
1804 1804 / counter after freq_tsc_perf_loop is very critical and very carefully
1805 1805 / written, it should only be modified with great care. freq_tsc_loop to
1806 1806 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1807 1807 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
1808 1808
1809 1809 .align 32
1810 1810 freq_tsc_loop:
1811 1811 / save the loop count in %ebx
1812 1812 movl %ecx, %ebx
1813 1813
1814 1814 / initialize the PIT counter and start a count down
1815 1815 movb $PIT_LOADMODE, %al
1816 1816 outb $PITCTL_PORT
1817 1817 movb $0xff, %al
1818 1818 outb $PITCTR0_PORT
1819 1819 outb $PITCTR0_PORT
1820 1820
1821 1821 / read the TSC and store the TS in %edi:%esi
1822 1822 rdtsc
1823 1823 movl %eax, %esi
1824 1824
1825 1825 freq_tsc_perf_loop:
1826 1826 movl %edx, %edi
1827 1827 movl %eax, %esi
1828 1828 movl %edx, %edi
1829 1829 loop freq_tsc_perf_loop
1830 1830
1831 1831 / read the TSC and store the LSW in %ecx
1832 1832 rdtsc
1833 1833 movl %eax, %ecx
1834 1834
1835 1835 / latch the PIT counter and status
1836 1836 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
1837 1837 outb $PITCTL_PORT
1838 1838
1839 1839 / remember if the icache has been warmed
1840 1840 setc %ah
1841 1841
1842 1842 / read the PIT status
1843 1843 inb $PITCTR0_PORT
1844 1844 shll $8, %eax
1845 1845
1846 1846 / read PIT count
1847 1847 inb $PITCTR0_PORT
1848 1848 shll $8, %eax
1849 1849 inb $PITCTR0_PORT
1850 1850 bswap %eax
1851 1851
1852 1852 / check to see if the PIT count was loaded into the CE
1853 1853 btw $_CONST(PITSTAT_NULLCNT+8), %ax
1854 1854 jc freq_tsc_increase_count
1855 1855
1856 1856 / check to see if PIT counter wrapped
1857 1857 btw $_CONST(PITSTAT_OUTPUT+8), %ax
1858 1858 jnc freq_tsc_pit_did_not_wrap
1859 1859
1860 1860 / halve count
1861 1861 shrl $1, %ebx
1862 1862 movl %ebx, %ecx
1863 1863
1864 1864 / the instruction cache has been warmed
1865 1865 stc
1866 1866
1867 1867 jmp freq_tsc_loop
1868 1868
1869 1869 freq_tsc_increase_count:
1870 1870 shll $1, %ebx
1871 1871 jc freq_tsc_too_fast
1872 1872
1873 1873 movl %ebx, %ecx
1874 1874
1875 1875 / the instruction cache has been warmed
1876 1876 stc
1877 1877
1878 1878 jmp freq_tsc_loop
1879 1879
1880 1880 freq_tsc_pit_did_not_wrap:
1881 1881 roll $16, %eax
1882 1882
1883 1883 cmpw $0x2000, %ax
1884 1884 notw %ax
1885 1885 jb freq_tsc_sufficient_duration
1886 1886
1887 1887 freq_tsc_calculate:
1888 1888 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1889 1889 / then on the second CLK pulse the CE is decremented, therefore mode 0
1890 1890 / is really a (count + 1) counter, ugh
1891 1891 xorl %esi, %esi
1892 1892 movw %ax, %si
1893 1893 incl %esi
1894 1894
1895 1895 movl $0xf000, %eax
1896 1896 mull %ebx
1897 1897
1898 1898 / tuck away (target_pit_count * loop_count)
1899 1899 movl %edx, %ecx
1900 1900 movl %eax, %ebx
1901 1901
1902 1902 movl %esi, %eax
1903 1903 movl $0xffffffff, %edx
1904 1904 mull %edx
1905 1905
1906 1906 addl %esi, %eax
1907 1907 adcl $0, %edx
1908 1908
1909 1909 cmpl %ecx, %edx
1910 1910 ja freq_tsc_div_safe
1911 1911 jb freq_tsc_too_fast
1912 1912
1913 1913 cmpl %ebx, %eax
1914 1914 jbe freq_tsc_too_fast
1915 1915
1916 1916 freq_tsc_div_safe:
1917 1917 movl %ecx, %edx
1918 1918 movl %ebx, %eax
1919 1919
1920 1920 movl %esi, %ecx
1921 1921 divl %ecx
1922 1922
1923 1923 movl %eax, %ecx
1924 1924
1925 1925 / the instruction cache has been warmed
1926 1926 stc
1927 1927
1928 1928 jmp freq_tsc_loop
1929 1929
1930 1930 freq_tsc_sufficient_duration:
1931 1931 / test to see if the icache has been warmed
1932 1932 btl $16, %eax
1933 1933 jnc freq_tsc_calculate
1934 1934
1935 1935 / recall mode 0 is a (count + 1) counter
1936 1936 andl $0xffff, %eax
1937 1937 incl %eax
1938 1938
1939 1939 / save the number of PIT counts
1940 1940 movl %eax, (%r9)
1941 1941
1942 1942 / calculate the number of TS's that elapsed
1943 1943 movl %ecx, %eax
1944 1944 subl %esi, %eax
1945 1945 sbbl %edi, %edx
1946 1946
1947 1947 jmp freq_tsc_end
1948 1948
1949 1949 freq_tsc_too_fast:
1950 1950 / return 0 as a 64 bit quantity
1951 1951 xorl %eax, %eax
1952 1952 xorl %edx, %edx
1953 1953
1954 1954 freq_tsc_end:
1955 1955 shlq $32, %rdx
1956 1956 orq %rdx, %rax
1957 1957
1958 1958 popq %rbx
1959 1959 leaveq
1960 1960 ret
1961 1961 SET_SIZE(freq_tsc)
1962 1962
1963 1963 #elif defined(__i386)
1964 1964
1965 1965 ENTRY_NP(freq_tsc)
1966 1966 pushl %ebp
1967 1967 movl %esp, %ebp
1968 1968 pushl %edi
1969 1969 pushl %esi
1970 1970 pushl %ebx
1971 1971
1972 1972 / We have a TSC, but we have no way in general to know how reliable it is.
1973 1973 / Usually a marginal TSC behaves appropriately unless not enough time
1974 1974 / elapses between reads. A reliable TSC can be read as often and as rapidly
1975 1975 / as desired. The simplistic approach of reading the TSC counter and
1976 1976 / correlating to the PIT counter cannot be naively followed. Instead estimates
1977 1977 / have to be taken to successively refine a guess at the speed of the cpu
1978 1978 / and then the TSC and PIT counter are correlated. In practice very rarely
1979 1979 / is more than one quick loop required for an estimate. Measures have to be
1980 1980 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1981 1981 / measuring the clock rate of very fast processors.
1982 1982 /
1983 1983 / The following constant can be tuned. It should be such that the loop does
1984 1984 / not take too many nor too few PIT counts to execute. If this value is too
1985 1985 / large, then on slow machines the loop will take a long time, or the PIT
1986 1986 / counter may even wrap. If this value is too small, then on fast machines
1987 1987 / the PIT counter may count so few ticks that the resolution of the PIT
1988 1988 / itself causes a bad guess. Because this code is used in machines with
1989 1989 / marginal TSC's and/or IO, if this value is too small on those, it may
1990 1990 / cause the calculated cpu frequency to vary slightly from boot to boot.
1991 1991 /
1992 1992 / In all cases even if this constant is set inappropriately, the algorithm
1993 1993 / will still work and the caller should be able to handle variances in the
1994 1994 / calculation of cpu frequency, but the calculation will be inefficient and
1995 1995 / take a disproportionate amount of time relative to a well selected value.
1996 1996 / As the slowest supported cpu becomes faster, this constant should be
1997 1997 / carefully increased.
1998 1998
1999 1999 movl $0x8000, %ecx
2000 2000
2001 2001 / to make sure the instruction cache has been warmed
2002 2002 clc
2003 2003
2004 2004 jmp freq_tsc_loop
2005 2005
2006 2006 / The following block of code up to and including the latching of the PIT
2007 2007 / counter after freq_tsc_perf_loop is very critical and very carefully
2008 2008 / written, it should only be modified with great care. freq_tsc_loop to
2009 2009 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
2010 2010 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
2011 2011
2012 2012 .align 32
2013 2013 freq_tsc_loop:
2014 2014 / save the loop count in %ebx
2015 2015 movl %ecx, %ebx
2016 2016
2017 2017 / initialize the PIT counter and start a count down
2018 2018 movb $PIT_LOADMODE, %al
2019 2019 outb $PITCTL_PORT
2020 2020 movb $0xff, %al
2021 2021 outb $PITCTR0_PORT
2022 2022 outb $PITCTR0_PORT
2023 2023
2024 2024 / read the TSC and store the TS in %edi:%esi
2025 2025 rdtsc
2026 2026 movl %eax, %esi
2027 2027
2028 2028 freq_tsc_perf_loop:
2029 2029 movl %edx, %edi
2030 2030 movl %eax, %esi
2031 2031 movl %edx, %edi
2032 2032 loop freq_tsc_perf_loop
2033 2033
2034 2034 / read the TSC and store the LSW in %ecx
2035 2035 rdtsc
2036 2036 movl %eax, %ecx
2037 2037
2038 2038 / latch the PIT counter and status
2039 2039 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
2040 2040 outb $PITCTL_PORT
2041 2041
2042 2042 / remember if the icache has been warmed
2043 2043 setc %ah
2044 2044
2045 2045 / read the PIT status
2046 2046 inb $PITCTR0_PORT
2047 2047 shll $8, %eax
2048 2048
2049 2049 / read PIT count
2050 2050 inb $PITCTR0_PORT
2051 2051 shll $8, %eax
2052 2052 inb $PITCTR0_PORT
2053 2053 bswap %eax
2054 2054
2055 2055 / check to see if the PIT count was loaded into the CE
2056 2056 btw $_CONST(PITSTAT_NULLCNT+8), %ax
2057 2057 jc freq_tsc_increase_count
2058 2058
2059 2059 / check to see if PIT counter wrapped
2060 2060 btw $_CONST(PITSTAT_OUTPUT+8), %ax
2061 2061 jnc freq_tsc_pit_did_not_wrap
2062 2062
2063 2063 / halve count
2064 2064 shrl $1, %ebx
2065 2065 movl %ebx, %ecx
2066 2066
2067 2067 / the instruction cache has been warmed
2068 2068 stc
2069 2069
2070 2070 jmp freq_tsc_loop
2071 2071
2072 2072 freq_tsc_increase_count:
2073 2073 shll $1, %ebx
2074 2074 jc freq_tsc_too_fast
2075 2075
2076 2076 movl %ebx, %ecx
2077 2077
2078 2078 / the instruction cache has been warmed
2079 2079 stc
2080 2080
2081 2081 jmp freq_tsc_loop
2082 2082
2083 2083 freq_tsc_pit_did_not_wrap:
2084 2084 roll $16, %eax
2085 2085
2086 2086 cmpw $0x2000, %ax
2087 2087 notw %ax
2088 2088 jb freq_tsc_sufficient_duration
2089 2089
2090 2090 freq_tsc_calculate:
2091 2091 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2092 2092 / then on the second CLK pulse the CE is decremented, therefore mode 0
2093 2093 / is really a (count + 1) counter, ugh
2094 2094 xorl %esi, %esi
2095 2095 movw %ax, %si
2096 2096 incl %esi
2097 2097
2098 2098 movl $0xf000, %eax
2099 2099 mull %ebx
2100 2100
2101 2101 / tuck away (target_pit_count * loop_count)
2102 2102 movl %edx, %ecx
2103 2103 movl %eax, %ebx
2104 2104
2105 2105 movl %esi, %eax
2106 2106 movl $0xffffffff, %edx
2107 2107 mull %edx
2108 2108
2109 2109 addl %esi, %eax
2110 2110 adcl $0, %edx
2111 2111
2112 2112 cmpl %ecx, %edx
2113 2113 ja freq_tsc_div_safe
2114 2114 jb freq_tsc_too_fast
2115 2115
2116 2116 cmpl %ebx, %eax
2117 2117 jbe freq_tsc_too_fast
2118 2118
2119 2119 freq_tsc_div_safe:
2120 2120 movl %ecx, %edx
2121 2121 movl %ebx, %eax
2122 2122
2123 2123 movl %esi, %ecx
2124 2124 divl %ecx
2125 2125
2126 2126 movl %eax, %ecx
2127 2127
2128 2128 / the instruction cache has been warmed
2129 2129 stc
2130 2130
2131 2131 jmp freq_tsc_loop
2132 2132
2133 2133 freq_tsc_sufficient_duration:
2134 2134 / test to see if the icache has been warmed
2135 2135 btl $16, %eax
2136 2136 jnc freq_tsc_calculate
2137 2137
2138 2138 / recall mode 0 is a (count + 1) counter
2139 2139 andl $0xffff, %eax
2140 2140 incl %eax
2141 2141
2142 2142 / save the number of PIT counts
2143 2143 movl 8(%ebp), %ebx
2144 2144 movl %eax, (%ebx)
2145 2145
2146 2146 / calculate the number of TS's that elapsed
2147 2147 movl %ecx, %eax
2148 2148 subl %esi, %eax
2149 2149 sbbl %edi, %edx
2150 2150
2151 2151 jmp freq_tsc_end
2152 2152
2153 2153 freq_tsc_too_fast:
2154 2154 / return 0 as a 64 bit quantity
2155 2155 xorl %eax, %eax
2156 2156 xorl %edx, %edx
2157 2157
2158 2158 freq_tsc_end:
2159 2159 popl %ebx
2160 2160 popl %esi
2161 2161 popl %edi
2162 2162 popl %ebp
2163 2163 ret
2164 2164 SET_SIZE(freq_tsc)
2165 2165
2166 2166 #endif /* __i386 */
2167 2167 #endif /* __lint */
2168 2168
2169 2169 #if !defined(__amd64)
2170 2170 #if defined(__lint)
2171 2171
2172 2172 /*
2173 2173 * We do not have a TSC so we use a block of instructions with well known
2174 2174 * timings.
2175 2175 */
2176 2176
2177 2177 /*ARGSUSED*/
2178 2178 uint64_t
2179 2179 freq_notsc(uint32_t *pit_counter)
2180 2180 {
2181 2181 return (0);
2182 2182 }
2183 2183
2184 2184 #else /* __lint */
2185 2185 ENTRY_NP(freq_notsc)
2186 2186 pushl %ebp
2187 2187 movl %esp, %ebp
2188 2188 pushl %edi
2189 2189 pushl %esi
2190 2190 pushl %ebx
2191 2191
2192 2192 / initial count for the idivl loop
2193 2193 movl $0x1000, %ecx
2194 2194
2195 2195 / load the divisor
2196 2196 movl $1, %ebx
2197 2197
2198 2198 jmp freq_notsc_loop
2199 2199
2200 2200 .align 16
2201 2201 freq_notsc_loop:
2202 2202 / set high 32 bits of dividend to zero
2203 2203 xorl %edx, %edx
2204 2204
2205 2205 / save the loop count in %edi
2206 2206 movl %ecx, %edi
2207 2207
2208 2208 / initialize the PIT counter and start a count down
2209 2209 movb $PIT_LOADMODE, %al
2210 2210 outb $PITCTL_PORT
2211 2211 movb $0xff, %al
2212 2212 outb $PITCTR0_PORT
2213 2213 outb $PITCTR0_PORT
2214 2214
2215 2215 / set low 32 bits of dividend to zero
2216 2216 xorl %eax, %eax
2217 2217
2218 2218 / It is vital that the arguments to idivl be set appropriately because on some
2219 2219 / cpu's this instruction takes more or less clock ticks depending on its
2220 2220 / arguments.
2221 2221 freq_notsc_perf_loop:
2222 2222 idivl %ebx
2223 2223 idivl %ebx
2224 2224 idivl %ebx
2225 2225 idivl %ebx
2226 2226 idivl %ebx
2227 2227 loop freq_notsc_perf_loop
2228 2228
2229 2229 / latch the PIT counter and status
2230 2230 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
2231 2231 outb $PITCTL_PORT
2232 2232
2233 2233 / read the PIT status
2234 2234 inb $PITCTR0_PORT
2235 2235 shll $8, %eax
2236 2236
2237 2237 / read PIT count
2238 2238 inb $PITCTR0_PORT
2239 2239 shll $8, %eax
2240 2240 inb $PITCTR0_PORT
2241 2241 bswap %eax
2242 2242
2243 2243 / check to see if the PIT count was loaded into the CE
2244 2244 btw $_CONST(PITSTAT_NULLCNT+8), %ax
2245 2245 jc freq_notsc_increase_count
2246 2246
2247 2247 / check to see if PIT counter wrapped
2248 2248 btw $_CONST(PITSTAT_OUTPUT+8), %ax
2249 2249 jnc freq_notsc_pit_did_not_wrap
2250 2250
2251 2251 / halve count
2252 2252 shrl $1, %edi
2253 2253 movl %edi, %ecx
2254 2254
2255 2255 jmp freq_notsc_loop
2256 2256
2257 2257 freq_notsc_increase_count:
2258 2258 shll $1, %edi
2259 2259 jc freq_notsc_too_fast
2260 2260
2261 2261 movl %edi, %ecx
2262 2262
2263 2263 jmp freq_notsc_loop
2264 2264
2265 2265 freq_notsc_pit_did_not_wrap:
2266 2266 shrl $16, %eax
2267 2267
2268 2268 cmpw $0x2000, %ax
2269 2269 notw %ax
2270 2270 jb freq_notsc_sufficient_duration
2271 2271
2272 2272 freq_notsc_calculate:
2273 2273 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2274 2274 / then on the second CLK pulse the CE is decremented, therefore mode 0
2275 2275 / is really a (count + 1) counter, ugh
2276 2276 xorl %esi, %esi
2277 2277 movw %ax, %si
2278 2278 incl %esi
2279 2279
2280 2280 movl %edi, %eax
2281 2281 movl $0xf000, %ecx
2282 2282 mull %ecx
2283 2283
2284 2284 / tuck away (target_pit_count * loop_count)
2285 2285 movl %edx, %edi
2286 2286 movl %eax, %ecx
2287 2287
2288 2288 movl %esi, %eax
2289 2289 movl $0xffffffff, %edx
2290 2290 mull %edx
2291 2291
2292 2292 addl %esi, %eax
2293 2293 adcl $0, %edx
2294 2294
2295 2295 cmpl %edi, %edx
2296 2296 ja freq_notsc_div_safe
2297 2297 jb freq_notsc_too_fast
2298 2298
2299 2299 cmpl %ecx, %eax
2300 2300 jbe freq_notsc_too_fast
2301 2301
2302 2302 freq_notsc_div_safe:
2303 2303 movl %edi, %edx
2304 2304 movl %ecx, %eax
2305 2305
2306 2306 movl %esi, %ecx
2307 2307 divl %ecx
2308 2308
2309 2309 movl %eax, %ecx
2310 2310
2311 2311 jmp freq_notsc_loop
2312 2312
2313 2313 freq_notsc_sufficient_duration:
2314 2314 / recall mode 0 is a (count + 1) counter
2315 2315 incl %eax
2316 2316
2317 2317 / save the number of PIT counts
2318 2318 movl 8(%ebp), %ebx
2319 2319 movl %eax, (%ebx)
2320 2320
2321 2321 / calculate the number of cpu clock ticks that elapsed
2322 2322 cmpl $X86_VENDOR_Cyrix, x86_vendor
2323 2323 jz freq_notsc_notcyrix
2324 2324
2325 2325 / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
2326 2326 movl $86, %eax
2327 2327 jmp freq_notsc_calculate_tsc
2328 2328
2329 2329 freq_notsc_notcyrix:
2330 2330 / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
2331 2331 movl $237, %eax
2332 2332
2333 2333 freq_notsc_calculate_tsc:
2334 2334 mull %edi
2335 2335
2336 2336 jmp freq_notsc_end
2337 2337
2338 2338 freq_notsc_too_fast:
2339 2339 / return 0 as a 64 bit quantity
2340 2340 xorl %eax, %eax
2341 2341 xorl %edx, %edx
2342 2342
2343 2343 freq_notsc_end:
2344 2344 popl %ebx
2345 2345 popl %esi
2346 2346 popl %edi
2347 2347 popl %ebp
2348 2348
2349 2349 ret
2350 2350 SET_SIZE(freq_notsc)
2351 2351
2352 2352 #endif /* __lint */
2353 2353 #endif /* !__amd64 */
2354 2354
2355 2355 #if !defined(__lint)
2356 2356 .data
2357 2357 #if !defined(__amd64)
2358 2358 .align 4
2359 2359 cpu_vendor:
2360 2360 .long 0, 0, 0 /* Vendor ID string returned */
2361 2361
2362 2362 .globl CyrixInstead
2363 2363
2364 2364 .globl x86_featureset
2365 2365 .globl x86_type
2366 2366 .globl x86_vendor
2367 2367 #endif
2368 2368
2369 2369 #endif /* __lint */
↓ open down ↓ |
733 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX