1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2018 Joyent, Inc. 27 */ 28 29 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 31 /* All Rights Reserved */ 32 33 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 34 /* All Rights Reserved */ 35 36 37 #include <sys/asm_linkage.h> 38 #include <sys/asm_misc.h> 39 #include <sys/regset.h> 40 #include <sys/privregs.h> 41 #include <sys/psw.h> 42 #include <sys/reboot.h> 43 #include <sys/x86_archext.h> 44 #include <sys/machparam.h> 45 46 #if defined(__lint) 47 48 #include <sys/types.h> 49 #include <sys/thread.h> 50 #include <sys/systm.h> 51 #include <sys/lgrp.h> 52 #include <sys/regset.h> 53 #include <sys/link.h> 54 #include <sys/bootconf.h> 55 #include <sys/bootsvcs.h> 56 57 #else /* __lint */ 58 59 #include <sys/segments.h> 60 #include <sys/pcb.h> 61 #include <sys/trap.h> 62 #include <sys/ftrace.h> 63 #include <sys/traptrace.h> 64 #include <sys/clock.h> 65 #include <sys/cmn_err.h> 66 #include <sys/pit.h> 67 #include <sys/panic.h> 68 69 #if defined(__xpv) 70 #include <sys/hypervisor.h> 71 #endif 72 73 #include "assym.h" 74 75 /* 76 * Our assumptions: 77 * - We are running in protected-paged mode. 78 * - Interrupts are disabled. 79 * - The GDT and IDT are the callers; we need our copies. 80 * - The kernel's text, initialized data and bss are mapped. 81 * 82 * Our actions: 83 * - Save arguments 84 * - Initialize our stack pointer to the thread 0 stack (t0stack) 85 * and leave room for a phony "struct regs". 86 * - Our GDT and IDT need to get munged. 87 * - Since we are using the boot's GDT descriptors, we need 88 * to copy them into our GDT before we switch to ours. 89 * - We start using our GDT by loading correct values in the 90 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL, 91 * gs=KGS_SEL). 92 * - The default LDT entry for syscall is set. 93 * - We load the default LDT into the hardware LDT register. 94 * - We load the default TSS into the hardware task register. 95 * - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc. 96 * - mlsetup(%esp) gets called. 97 * - We change our appearance to look like the real thread 0. 98 * (NOTE: making ourselves to be a real thread may be a noop) 99 * - main() gets called. (NOTE: main() never returns). 100 * 101 * NOW, the real code! 102 */ 103 /* 104 * The very first thing in the kernel's text segment must be a jump 105 * to the os/fakebop.c startup code. 106 */ 107 .text 108 jmp _start 109 110 /* 111 * Globals: 112 */ 113 .globl _locore_start 114 .globl mlsetup 115 .globl main 116 .globl panic 117 .globl t0stack 118 .globl t0 119 .globl sysp 120 .globl edata 121 122 /* 123 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h) 124 */ 125 .globl bootops 126 .globl bootopsp 127 128 /* 129 * NOTE: t0stack should be the first thing in the data section so that 130 * if it ever overflows, it will fault on the last kernel text page. 131 */ 132 .data 133 .comm t0stack, DEFAULTSTKSZ, 32 134 .comm t0, 4094, 32 135 136 #endif /* __lint */ 137 138 139 #if defined(__amd64) 140 141 #if defined(__lint) 142 143 /* ARGSUSED */ 144 void 145 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop) 146 {} 147 148 #else /* __lint */ 149 150 /* 151 * kobj_init() vectors us back to here with (note) a slightly different 152 * set of arguments than _start is given (see lint prototypes above). 153 * 154 * XXX Make this less vile, please. 155 */ 156 ENTRY_NP(_locore_start) 157 158 /* 159 * %rdi = boot services (should die someday) 160 * %rdx = bootops 161 * end 162 */ 163 164 leaq edata(%rip), %rbp /* reference edata for ksyms */ 165 movq $0, (%rbp) /* limit stack back trace */ 166 167 /* 168 * Initialize our stack pointer to the thread 0 stack (t0stack) 169 * and leave room for a "struct regs" for lwp0. Note that the 170 * stack doesn't actually align to a 16-byte boundary until just 171 * before we call mlsetup because we want to use %rsp to point at 172 * our regs structure. 173 */ 174 leaq t0stack(%rip), %rsp 175 addq $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp 176 #if (REGSIZE & 15) == 0 177 subq $8, %rsp 178 #endif 179 /* 180 * Save call back for special x86 boot services vector 181 */ 182 movq %rdi, sysp(%rip) 183 184 movq %rdx, bootops(%rip) /* save bootops */ 185 movq $bootops, bootopsp(%rip) 186 187 /* 188 * Save arguments and flags, if only for debugging .. 189 */ 190 movq %rdi, REGOFF_RDI(%rsp) 191 movq %rsi, REGOFF_RSI(%rsp) 192 movq %rdx, REGOFF_RDX(%rsp) 193 movq %rcx, REGOFF_RCX(%rsp) 194 movq %r8, REGOFF_R8(%rsp) 195 movq %r9, REGOFF_R9(%rsp) 196 pushf 197 popq %r11 198 movq %r11, REGOFF_RFL(%rsp) 199 200 #if !defined(__xpv) 201 /* 202 * Enable write protect and alignment check faults. 203 */ 204 movq %cr0, %rax 205 orq $_CONST(CR0_WP|CR0_AM), %rax 206 andq $_BITNOT(CR0_WT|CR0_CE), %rax 207 movq %rax, %cr0 208 #endif /* __xpv */ 209 210 /* 211 * (We just assert this works by virtue of being here) 212 */ 213 bts $X86FSET_CPUID, x86_featureset(%rip) 214 215 /* 216 * mlsetup() gets called with a struct regs as argument, while 217 * main takes no args and should never return. 218 */ 219 xorl %ebp, %ebp 220 movq %rsp, %rdi 221 pushq %rbp 222 /* (stack pointer now aligned on 16-byte boundary right here) */ 223 movq %rsp, %rbp 224 call mlsetup 225 call main 226 /* NOTREACHED */ 227 leaq __return_from_main(%rip), %rdi 228 xorl %eax, %eax 229 call panic 230 SET_SIZE(_locore_start) 231 232 #endif /* __amd64 */ 233 #endif /* __lint */ 234 235 #if !defined(__lint) 236 237 __return_from_main: 238 .string "main() returned" 239 __unsupported_cpu: 240 .string "486 style cpu detected - no longer supported!" 241 242 #endif /* !__lint */ 243 244 #if !defined(__amd64) 245 246 #if defined(__lint) 247 248 /* ARGSUSED */ 249 void 250 _locore_start(struct boot_syscalls *sysp, struct bootops *bop) 251 {} 252 253 #else /* __lint */ 254 255 /* 256 * kobj_init() vectors us back to here with (note) a slightly different 257 * set of arguments than _start is given (see lint prototypes above). 258 * 259 * XXX Make this less vile, please. 260 */ 261 ENTRY_NP(_locore_start) 262 263 /* 264 * %ecx = boot services (should die someday) 265 * %ebx = bootops 266 */ 267 mov $edata, %ebp / edata needs to be defined for ksyms 268 movl $0, (%ebp) / limit stack back trace 269 270 /* 271 * Initialize our stack pointer to the thread 0 stack (t0stack) 272 * and leave room for a phony "struct regs". 273 */ 274 movl $t0stack + DEFAULTSTKSZ - REGSIZE, %esp 275 276 /* 277 * Save call back for special x86 boot services vector 278 */ 279 mov %ecx, sysp / save call back for boot services 280 281 mov %ebx, bootops / save bootops 282 movl $bootops, bootopsp 283 284 285 /* 286 * Save all registers and flags 287 */ 288 pushal 289 pushfl 290 291 #if !defined(__xpv) 292 /* 293 * Override bios settings and enable write protect and 294 * alignment check faults. 295 */ 296 movl %cr0, %eax 297 298 /* 299 * enable WP for detecting faults, and enable alignment checking. 300 */ 301 orl $_CONST(CR0_WP|CR0_AM), %eax 302 andl $_BITNOT(CR0_WT|CR0_CE), %eax 303 movl %eax, %cr0 / set the cr0 register correctly and 304 / override the BIOS setup 305 306 /* 307 * If bit 21 of eflags can be flipped, then cpuid is present 308 * and enabled. 309 */ 310 pushfl 311 popl %ecx 312 movl %ecx, %eax 313 xorl $PS_ID, %eax / try complemented bit 314 pushl %eax 315 popfl 316 pushfl 317 popl %eax 318 cmpl %eax, %ecx 319 jne have_cpuid 320 321 /* 322 * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test 323 * div does not modify the cc flags on Cyrix, even though this may 324 * also be true for other vendors, this is generally true only for 325 * newer models from those vendors that support and do not disable 326 * cpuid (usually because cpuid cannot be disabled) 327 */ 328 329 /* 330 * clear cc flags 331 */ 332 xorb %ah, %ah 333 sahf 334 335 /* 336 * perform 5/2 test 337 */ 338 movw $5, %ax 339 movb $2, %bl 340 divb %bl 341 342 lahf 343 cmpb $2, %ah 344 jne cpu_486 345 346 /* 347 * div did not modify the cc flags, chances are the vendor is Cyrix 348 * assume the vendor is Cyrix and use the CCR's to enable cpuid 349 */ 350 .set CYRIX_CRI, 0x22 / CR Index Register 351 .set CYRIX_CRD, 0x23 / CR Data Register 352 353 .set CYRIX_CCR3, 0xc3 / Config Control Reg 3 354 .set CYRIX_CCR4, 0xe8 / Config Control Reg 4 355 .set CYRIX_DIR0, 0xfe / Device Identification Reg 0 356 .set CYRIX_DIR1, 0xff / Device Identification Reg 1 357 358 /* 359 * even if the cpu vendor is Cyrix and the motherboard/chipset 360 * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port 361 * 0x21 corresponds with 0x23 and since 0x22 is still untouched, 362 * the reads and writes of 0x21 are guaranteed to be off-chip of 363 * the cpu 364 */ 365 366 /* 367 * enable read of ISR at I/O port 0x20 368 */ 369 movb $0xb, %al 370 outb $MCMD_PORT 371 372 /* 373 * read IMR and store in %bl 374 */ 375 inb $MIMR_PORT 376 movb %al, %bl 377 378 /* 379 * mask out all interrupts so that ISR will not change 380 */ 381 movb $0xff, %al 382 outb $MIMR_PORT 383 384 /* 385 * reads of I/O port 0x22 on Cyrix are always directed off-chip 386 * make use of I/O pull-up to test for an unknown device on 0x22 387 */ 388 inb $CYRIX_CRI 389 cmpb $0xff, %al 390 je port_22_free 391 392 /* 393 * motherboard/chipset vendor may be ignoring line A1 of I/O address 394 */ 395 movb %al, %cl 396 397 /* 398 * if the ISR and the value read from 0x22 do not match then we have 399 * detected some unknown device, probably a chipset, at 0x22 400 */ 401 inb $MCMD_PORT 402 cmpb %al, %cl 403 jne restore_IMR 404 405 port_22_free: 406 /* 407 * now test to see if some unknown device is using I/O port 0x23 408 * 409 * read the external I/O port at 0x23 410 */ 411 inb $CYRIX_CRD 412 413 /* 414 * Test for pull-up at 0x23 or if I/O address line A1 is being ignored. 415 * IMR is 0xff so both tests are performed simultaneously. 416 */ 417 cmpb $0xff, %al 418 jne restore_IMR 419 420 /* 421 * We are a Cyrix part. In case we are some model of Cx486 or a Cx586, 422 * record the type and fix it later if not. 423 */ 424 movl $X86_VENDOR_Cyrix, x86_vendor 425 movl $X86_TYPE_CYRIX_486, x86_type 426 427 /* 428 * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3. 429 * 430 * load CCR3 index into CCR index register 431 */ 432 433 movb $CYRIX_CCR3, %al 434 outb $CYRIX_CRI 435 436 /* 437 * If we are not a Cyrix cpu, then we have performed an external I/O 438 * cycle. If the CCR index was not valid for this Cyrix model, we may 439 * have performed an external I/O cycle as well. In these cases and 440 * if the motherboard/chipset vendor ignores I/O address line A1, 441 * then the PIC will have IRQ3 set at the lowest priority as a side 442 * effect of the above outb. We are reasonalbly confident that there 443 * is not an unknown device on I/O port 0x22, so there should have been 444 * no unpredictable side-effect of the above outb. 445 */ 446 447 /* 448 * read CCR3 449 */ 450 inb $CYRIX_CRD 451 452 /* 453 * If we are not a Cyrix cpu the inb above produced an external I/O 454 * cycle. If we are a Cyrix model that does not support CCR3 wex 455 * produced an external I/O cycle. In all known Cyrix models 6x86 and 456 * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all 457 * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are 458 * reserved as well. It is highly unlikely that CCR3 contains the value 459 * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and 460 * deduce we are not a Cyrix with support for cpuid if so. 461 */ 462 cmpb $0xff, %al 463 je restore_PIC 464 465 /* 466 * There exist 486 ISA Cyrix chips that support CCR3 but do not support 467 * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O 468 * cycles, the exact behavior is model specific and undocumented. 469 * Unfortunately these external I/O cycles may confuse some PIC's beyond 470 * recovery. Fortunatetly we can use the following undocumented trick: 471 * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported. 472 * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed 473 * to work on all Cyrix cpu's which support cpuid. 474 */ 475 movb %al, %dl 476 xorb $0x10, %dl 477 movb %al, %cl 478 479 /* 480 * write back CRR3 with toggled bit 4 to CCR3 481 */ 482 movb $CYRIX_CCR3, %al 483 outb $CYRIX_CRI 484 485 movb %dl, %al 486 outb $CYRIX_CRD 487 488 /* 489 * read CCR3 490 */ 491 movb $CYRIX_CCR3, %al 492 outb $CYRIX_CRI 493 inb $CYRIX_CRD 494 movb %al, %dl 495 496 /* 497 * restore CCR3 498 */ 499 movb $CYRIX_CCR3, %al 500 outb $CYRIX_CRI 501 502 movb %cl, %al 503 outb $CYRIX_CRD 504 505 /* 506 * if bit 4 was not toggled DIR0 and DIR1 are not supported in which 507 * case we do not have cpuid anyway 508 */ 509 andb $0x10, %al 510 andb $0x10, %dl 511 cmpb %al, %dl 512 je restore_PIC 513 514 /* 515 * read DIR0 516 */ 517 movb $CYRIX_DIR0, %al 518 outb $CYRIX_CRI 519 inb $CYRIX_CRD 520 521 /* 522 * test for pull-up 523 */ 524 cmpb $0xff, %al 525 je restore_PIC 526 527 /* 528 * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for 529 * future use. If Cyrix ever produces a cpu that supports cpuid with 530 * these ids, the following test will have to change. For now we remain 531 * pessimistic since the formats of the CRR's may be different then. 532 * 533 * test for at least a 6x86, to see if we support both MAPEN and CPUID 534 */ 535 cmpb $0x30, %al 536 jb restore_IMR 537 538 /* 539 * enable MAPEN 540 */ 541 movb $CYRIX_CCR3, %al 542 outb $CYRIX_CRI 543 544 andb $0xf, %cl 545 movb %cl, %al 546 orb $0x10, %al 547 outb $CYRIX_CRD 548 549 /* 550 * select CCR4 551 */ 552 movb $CYRIX_CCR4, %al 553 outb $CYRIX_CRI 554 555 /* 556 * read CCR4 557 */ 558 inb $CYRIX_CRD 559 560 /* 561 * enable cpuid 562 */ 563 orb $0x80, %al 564 movb %al, %dl 565 566 /* 567 * select CCR4 568 */ 569 movb $CYRIX_CCR4, %al 570 outb $CYRIX_CRI 571 572 /* 573 * write CCR4 574 */ 575 movb %dl, %al 576 outb $CYRIX_CRD 577 578 /* 579 * select CCR3 580 */ 581 movb $CYRIX_CCR3, %al 582 outb $CYRIX_CRI 583 584 /* 585 * disable MAPEN and write CCR3 586 */ 587 movb %cl, %al 588 outb $CYRIX_CRD 589 590 /* 591 * restore IMR 592 */ 593 movb %bl, %al 594 outb $MIMR_PORT 595 596 /* 597 * test to see if cpuid available 598 */ 599 pushfl 600 popl %ecx 601 movl %ecx, %eax 602 xorl $PS_ID, %eax / try complemented bit 603 pushl %eax 604 popfl 605 pushfl 606 popl %eax 607 cmpl %eax, %ecx 608 jne have_cpuid 609 jmp cpu_486 610 611 restore_PIC: 612 /* 613 * In case the motherboard/chipset vendor is ignoring line A1 of the 614 * I/O address, we set the PIC priorities to sane values. 615 */ 616 movb $0xc7, %al / irq 7 lowest priority 617 outb $MCMD_PORT 618 619 restore_IMR: 620 movb %bl, %al 621 outb $MIMR_PORT 622 jmp cpu_486 623 624 have_cpuid: 625 /* 626 * cpuid instruction present 627 */ 628 bts $X86FSET_CPUID, x86_featureset / Just to set; Ignore the CF 629 movl $0, %eax 630 cpuid 631 632 movl %ebx, cpu_vendor 633 movl %edx, cpu_vendor+4 634 movl %ecx, cpu_vendor+8 635 636 /* 637 * early cyrix cpus are somewhat strange and need to be 638 * probed in curious ways to determine their identity 639 */ 640 641 leal cpu_vendor, %esi 642 leal CyrixInstead, %edi 643 movl $12, %ecx 644 repz 645 cmpsb 646 je vendor_is_cyrix 647 648 / let mlsetup()/cpuid_pass1() handle everything else in C 649 650 jmp cpu_done 651 652 is486: 653 /* 654 * test to see if a useful cpuid 655 */ 656 testl %eax, %eax 657 jz isa486 658 659 movl $1, %eax 660 cpuid 661 662 movl %eax, %ebx 663 andl $0xF00, %ebx 664 cmpl $0x400, %ebx 665 je isa486 666 667 rep; ret /* use 2 byte return instruction */ 668 /* AMD Software Optimization Guide - Section 6.2 */ 669 isa486: 670 /* 671 * lose the return address 672 */ 673 popl %eax 674 jmp cpu_486 675 676 vendor_is_cyrix: 677 call is486 678 679 /* 680 * Processor signature and feature flags for Cyrix are insane. 681 * BIOS can play with semi-documented registers, so cpuid must be used 682 * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1 683 * Keep the family in %ebx and feature flags in %edx until not needed 684 */ 685 686 /* 687 * read DIR0 688 */ 689 movb $CYRIX_DIR0, %al 690 outb $CYRIX_CRI 691 inb $CYRIX_CRD 692 693 /* 694 * First we handle the cases where we are a 6x86 or 6x86L. 695 * The 6x86 is basically a 486, the only reliable bit in the 696 * feature flags is for FPU. The 6x86L is better, unfortunately 697 * there is no really good way to distinguish between these two 698 * cpu's. We are pessimistic and when in doubt assume 6x86. 699 */ 700 701 cmpb $0x40, %al 702 jae maybeGX 703 704 /* 705 * We are an M1, either a 6x86 or 6x86L. 706 */ 707 cmpb $0x30, %al 708 je maybe6x86L 709 cmpb $0x31, %al 710 je maybe6x86L 711 cmpb $0x34, %al 712 je maybe6x86L 713 cmpb $0x35, %al 714 je maybe6x86L 715 716 /* 717 * although it is possible that we are a 6x86L, the cpu and 718 * documentation are so buggy, we just do not care. 719 */ 720 jmp likely6x86 721 722 maybe6x86L: 723 /* 724 * read DIR1 725 */ 726 movb $CYRIX_DIR1, %al 727 outb $CYRIX_CRI 728 inb $CYRIX_CRD 729 cmpb $0x22, %al 730 jb likely6x86 731 732 /* 733 * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags 734 */ 735 movl $X86_TYPE_CYRIX_6x86L, x86_type 736 jmp coma_bug 737 738 likely6x86: 739 /* 740 * We are likely a 6x86, or a 6x86L without a way of knowing 741 * 742 * The 6x86 has NO Pentium or Pentium Pro compatible features even 743 * though it claims to be a Pentium Pro compatible! 744 * 745 * The 6x86 core used in the 6x86 may have most of the Pentium system 746 * registers and largely conform to the Pentium System Programming 747 * Reference. Documentation on these parts is long gone. Treat it as 748 * a crippled Pentium and hope for the best. 749 */ 750 751 movl $X86_TYPE_CYRIX_6x86, x86_type 752 jmp coma_bug 753 754 maybeGX: 755 /* 756 * Now we check whether we are a MediaGX or GXm. We have particular 757 * reason for concern here. Even though most of the GXm's 758 * report having TSC in the cpuid feature flags, the TSC may be 759 * horribly broken. What is worse, is that MediaGX's are basically 760 * 486's while the good GXm's are more like Pentium Pro's! 761 */ 762 763 cmpb $0x50, %al 764 jae maybeM2 765 766 /* 767 * We are either a MediaGX (sometimes called a Gx86) or GXm 768 */ 769 770 cmpb $41, %al 771 je maybeMediaGX 772 773 cmpb $44, %al 774 jb maybeGXm 775 776 cmpb $47, %al 777 jbe maybeMediaGX 778 779 /* 780 * We do not honestly know what we are, so assume a MediaGX 781 */ 782 jmp media_gx 783 784 maybeGXm: 785 /* 786 * It is still possible we are either a MediaGX or GXm, trust cpuid 787 * family should be 5 on a GXm 788 */ 789 cmpl $0x500, %ebx 790 je GXm 791 792 /* 793 * BIOS/Cyrix might set family to 6 on a GXm 794 */ 795 cmpl $0x600, %ebx 796 jne media_gx 797 798 GXm: 799 movl $X86_TYPE_CYRIX_GXm, x86_type 800 jmp cpu_done 801 802 maybeMediaGX: 803 /* 804 * read DIR1 805 */ 806 movb $CYRIX_DIR1, %al 807 outb $CYRIX_CRI 808 inb $CYRIX_CRD 809 810 cmpb $0x30, %al 811 jae maybeGXm 812 813 /* 814 * we are a MediaGX for which we do not trust cpuid 815 */ 816 media_gx: 817 movl $X86_TYPE_CYRIX_MediaGX, x86_type 818 jmp cpu_486 819 820 maybeM2: 821 /* 822 * Now we check whether we are a 6x86MX or MII. These cpu's are 823 * virtually identical, but we care because for the 6x86MX, we 824 * must work around the coma bug. Also for 6x86MX prior to revision 825 * 1.4, the TSC may have serious bugs. 826 */ 827 828 cmpb $0x60, %al 829 jae maybeM3 830 831 /* 832 * family should be 6, but BIOS/Cyrix might set it to 5 833 */ 834 cmpl $0x600, %ebx 835 ja cpu_486 836 837 /* 838 * read DIR1 839 */ 840 movb $CYRIX_DIR1, %al 841 outb $CYRIX_CRI 842 inb $CYRIX_CRD 843 844 cmpb $0x8, %al 845 jb cyrix6x86MX 846 cmpb $0x80, %al 847 jb MII 848 849 cyrix6x86MX: 850 /* 851 * It is altogether unclear how the revision stamped on the cpu 852 * maps to the values in DIR0 and DIR1. Just assume TSC is broken. 853 */ 854 movl $X86_TYPE_CYRIX_6x86MX, x86_type 855 jmp coma_bug 856 857 MII: 858 movl $X86_TYPE_CYRIX_MII, x86_type 859 likeMII: 860 jmp cpu_done 861 862 maybeM3: 863 /* 864 * We are some chip that we cannot identify yet, an MIII perhaps. 865 * We will be optimistic and hope that the chip is much like an MII, 866 * and that cpuid is sane. Cyrix seemed to have gotten it right in 867 * time for the MII, we can only hope it stayed that way. 868 * Maybe the BIOS or Cyrix is trying to hint at something 869 */ 870 cmpl $0x500, %ebx 871 je GXm 872 873 cmpb $0x80, %al 874 jae likelyM3 875 876 /* 877 * Just test for the features Cyrix is known for 878 */ 879 880 jmp MII 881 882 likelyM3: 883 /* 884 * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka 885 * the Cyrix MIII. There may be parts later that use the same ranges 886 * for DIR0 with special values in DIR1, maybe the VIA CIII, but for 887 * now we will call anything with a DIR0 of 0x80 or higher an MIII. 888 * The MIII is supposed to support large pages, but we will believe 889 * it when we see it. For now we just enable and test for MII features. 890 */ 891 movl $X86_TYPE_VIA_CYRIX_III, x86_type 892 jmp likeMII 893 894 coma_bug: 895 896 /* 897 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some 898 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus 899 * cycles except page table accesses and interrupt ACK cycles do not assert 900 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0. 901 * Due to a bug in the cpu core involving over-optimization of branch 902 * prediction, register renaming, and execution of instructions down both the 903 * X and Y pipes for the xchgl instruction, short loops can be written that 904 * never de-assert LOCK# from one invocation of the loop to the next, ad 905 * infinitum. The undesirable effect of this situation is that interrupts are 906 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to 907 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no 908 * longer do, unless they are page table accesses or interrupt ACK cycles. 909 * With LOCK# not asserted, these bus cycles are now cached. This can cause 910 * undesirable behaviour if the ARR's are not configured correctly. Solaris 911 * does not configure the ARR's, nor does it provide any useful mechanism for 912 * doing so, thus the ideal workaround is not viable. Fortunately, the only 913 * known exploits for this bug involve the xchgl instruction specifically. 914 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and 915 * 6x86MX cpu's which can be used to specify one instruction as a serializing 916 * instruction. With the xchgl instruction serialized, LOCK# is still 917 * asserted, but it is the sole instruction for which LOCK# is asserted. 918 * There is now some added penalty for the xchgl instruction, but the usual 919 * bus locking is preserved. This ingenious workaround was discovered by 920 * disassembling a binary provided by Cyrix as a workaround for this bug on 921 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually 922 * mentioned in any public errata! The only concern for this workaround is 923 * that there may be similar undiscovered bugs with other instructions that 924 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix 925 * fixed this bug sometime late in 1997 and no other exploits other than 926 * xchgl have been discovered is good indication that this workaround is 927 * reasonable. 928 */ 929 930 .set CYRIX_DBR0, 0x30 / Debug Register 0 931 .set CYRIX_DBR1, 0x31 / Debug Register 1 932 .set CYRIX_DBR2, 0x32 / Debug Register 2 933 .set CYRIX_DBR3, 0x33 / Debug Register 3 934 .set CYRIX_DOR, 0x3c / Debug Opcode Register 935 936 /* 937 * What is known about DBR1, DBR2, DBR3, and DOR is that for normal 938 * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode 939 * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f, 940 * and 0xff. Then, DOR is loaded with the one byte opcode. 941 */ 942 943 /* 944 * select CCR3 945 */ 946 movb $CYRIX_CCR3, %al 947 outb $CYRIX_CRI 948 949 /* 950 * read CCR3 and mask out MAPEN 951 */ 952 inb $CYRIX_CRD 953 andb $0xf, %al 954 955 /* 956 * save masked CCR3 in %ah 957 */ 958 movb %al, %ah 959 960 /* 961 * select CCR3 962 */ 963 movb $CYRIX_CCR3, %al 964 outb $CYRIX_CRI 965 966 /* 967 * enable MAPEN 968 */ 969 movb %ah, %al 970 orb $0x10, %al 971 outb $CYRIX_CRD 972 973 /* 974 * read DBR0 975 */ 976 movb $CYRIX_DBR0, %al 977 outb $CYRIX_CRI 978 inb $CYRIX_CRD 979 980 /* 981 * disable MATCH and save in %bh 982 */ 983 orb $0x80, %al 984 movb %al, %bh 985 986 /* 987 * write DBR0 988 */ 989 movb $CYRIX_DBR0, %al 990 outb $CYRIX_CRI 991 movb %bh, %al 992 outb $CYRIX_CRD 993 994 /* 995 * write DBR1 996 */ 997 movb $CYRIX_DBR1, %al 998 outb $CYRIX_CRI 999 movb $0xf8, %al 1000 outb $CYRIX_CRD 1001 1002 /* 1003 * write DBR2 1004 */ 1005 movb $CYRIX_DBR2, %al 1006 outb $CYRIX_CRI 1007 movb $0x7f, %al 1008 outb $CYRIX_CRD 1009 1010 /* 1011 * write DBR3 1012 */ 1013 movb $CYRIX_DBR3, %al 1014 outb $CYRIX_CRI 1015 xorb %al, %al 1016 outb $CYRIX_CRD 1017 1018 /* 1019 * write DOR 1020 */ 1021 movb $CYRIX_DOR, %al 1022 outb $CYRIX_CRI 1023 movb $0x87, %al 1024 outb $CYRIX_CRD 1025 1026 /* 1027 * enable MATCH 1028 */ 1029 movb $CYRIX_DBR0, %al 1030 outb $CYRIX_CRI 1031 movb %bh, %al 1032 andb $0x7f, %al 1033 outb $CYRIX_CRD 1034 1035 /* 1036 * disable MAPEN 1037 */ 1038 movb $0xc3, %al 1039 outb $CYRIX_CRI 1040 movb %ah, %al 1041 outb $CYRIX_CRD 1042 1043 jmp cpu_done 1044 1045 cpu_done: 1046 1047 popfl /* Restore original FLAGS */ 1048 popal /* Restore all registers */ 1049 1050 #endif /* !__xpv */ 1051 1052 /* 1053 * mlsetup(%esp) gets called. 1054 */ 1055 pushl %esp 1056 call mlsetup 1057 addl $4, %esp 1058 1059 /* 1060 * We change our appearance to look like the real thread 0. 1061 * (NOTE: making ourselves to be a real thread may be a noop) 1062 * main() gets called. (NOTE: main() never returns). 1063 */ 1064 call main 1065 /* NOTREACHED */ 1066 pushl $__return_from_main 1067 call panic 1068 1069 /* NOTREACHED */ 1070 cpu_486: 1071 pushl $__unsupported_cpu 1072 call panic 1073 SET_SIZE(_locore_start) 1074 1075 #endif /* __lint */ 1076 #endif /* !__amd64 */ 1077 1078 1079 /* 1080 * For stack layout, see privregs.h 1081 * When cmntrap gets called, the error code and trap number have been pushed. 1082 * When cmntrap_pushed gets called, the entire struct regs has been pushed. 1083 */ 1084 1085 #if defined(__lint) 1086 1087 /* ARGSUSED */ 1088 void 1089 cmntrap() 1090 {} 1091 1092 #else /* __lint */ 1093 1094 .globl trap /* C handler called below */ 1095 1096 #if defined(__amd64) 1097 1098 ENTRY_NP2(cmntrap, _cmntrap) 1099 1100 INTR_PUSH 1101 1102 ALTENTRY(cmntrap_pushed) 1103 1104 movq %rsp, %rbp 1105 1106 /* 1107 * - if this is a #pf i.e. T_PGFLT, %r15 is live 1108 * and contains the faulting address i.e. a copy of %cr2 1109 * 1110 * - if this is a #db i.e. T_SGLSTP, %r15 is live 1111 * and contains the value of %db6 1112 */ 1113 1114 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */ 1115 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */ 1116 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */ 1117 1118 /* 1119 * We must first check if DTrace has set its NOFAULT bit. This 1120 * regrettably must happen before the trap stack is recorded, because 1121 * this requires a call to getpcstack() and may induce recursion if an 1122 * fbt::getpcstack: enabling is inducing the bad load. 1123 */ 1124 movl %gs:CPU_ID, %eax 1125 shlq $CPU_CORE_SHIFT, %rax 1126 leaq cpu_core(%rip), %r8 1127 addq %r8, %rax 1128 movw CPUC_DTRACE_FLAGS(%rax), %cx 1129 testw $CPU_DTRACE_NOFAULT, %cx 1130 jnz .dtrace_induced 1131 1132 TRACE_STACK(%rdi) 1133 1134 movq %rbp, %rdi 1135 movq %r15, %rsi 1136 movl %gs:CPU_ID, %edx 1137 1138 /* 1139 * We know that this isn't a DTrace non-faulting load; we can now safely 1140 * reenable interrupts. (In the case of pagefaults, we enter through an 1141 * interrupt gate.) 1142 */ 1143 ENABLE_INTR_FLAGS 1144 1145 call trap /* trap(rp, addr, cpuid) handles all traps */ 1146 jmp _sys_rtt 1147 1148 .dtrace_induced: 1149 cmpw $KCS_SEL, REGOFF_CS(%rbp) /* test CS for user-mode trap */ 1150 jne 3f /* if from user, panic */ 1151 1152 cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp) 1153 je 1f 1154 1155 cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp) 1156 je 0f 1157 1158 cmpl $T_ILLINST, REGOFF_TRAPNO(%rbp) 1159 je 0f 1160 1161 cmpl $T_ZERODIV, REGOFF_TRAPNO(%rbp) 1162 jne 4f /* if not PF/GP/UD/DE, panic */ 1163 1164 orw $CPU_DTRACE_DIVZERO, %cx 1165 movw %cx, CPUC_DTRACE_FLAGS(%rax) 1166 jmp 2f 1167 1168 /* 1169 * If we've taken a GPF, we don't (unfortunately) have the address that 1170 * induced the fault. So instead of setting the fault to BADADDR, 1171 * we'll set the fault to ILLOP. 1172 */ 1173 0: 1174 orw $CPU_DTRACE_ILLOP, %cx 1175 movw %cx, CPUC_DTRACE_FLAGS(%rax) 1176 jmp 2f 1177 1: 1178 orw $CPU_DTRACE_BADADDR, %cx 1179 movw %cx, CPUC_DTRACE_FLAGS(%rax) /* set fault to bad addr */ 1180 movq %r15, CPUC_DTRACE_ILLVAL(%rax) 1181 /* fault addr is illegal value */ 1182 2: 1183 movq REGOFF_RIP(%rbp), %rdi 1184 movq %rdi, %r12 1185 call dtrace_instr_size 1186 addq %rax, %r12 1187 movq %r12, REGOFF_RIP(%rbp) 1188 INTR_POP 1189 jmp tr_iret_auto 1190 /*NOTREACHED*/ 1191 3: 1192 leaq dtrace_badflags(%rip), %rdi 1193 xorl %eax, %eax 1194 call panic 1195 4: 1196 leaq dtrace_badtrap(%rip), %rdi 1197 xorl %eax, %eax 1198 call panic 1199 SET_SIZE(cmntrap) 1200 SET_SIZE(_cmntrap) 1201 1202 #elif defined(__i386) 1203 1204 1205 ENTRY_NP2(cmntrap, _cmntrap) 1206 1207 INTR_PUSH 1208 1209 ALTENTRY(cmntrap_pushed) 1210 1211 movl %esp, %ebp 1212 1213 /* 1214 * - if this is a #pf i.e. T_PGFLT, %esi is live 1215 * and contains the faulting address i.e. a copy of %cr2 1216 * 1217 * - if this is a #db i.e. T_SGLSTP, %esi is live 1218 * and contains the value of %db6 1219 */ 1220 1221 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */ 1222 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */ 1223 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */ 1224 1225 /* 1226 * We must first check if DTrace has set its NOFAULT bit. This 1227 * regrettably must happen before the trap stack is recorded, because 1228 * this requires a call to getpcstack() and may induce recursion if an 1229 * fbt::getpcstack: enabling is inducing the bad load. 1230 */ 1231 movl %gs:CPU_ID, %eax 1232 shll $CPU_CORE_SHIFT, %eax 1233 addl $cpu_core, %eax 1234 movw CPUC_DTRACE_FLAGS(%eax), %cx 1235 testw $CPU_DTRACE_NOFAULT, %cx 1236 jnz .dtrace_induced 1237 1238 TRACE_STACK(%edi) 1239 1240 pushl %gs:CPU_ID 1241 pushl %esi /* fault address for PGFLTs */ 1242 pushl %ebp /* ®s */ 1243 1244 /* 1245 * We know that this isn't a DTrace non-faulting load; we can now safely 1246 * reenable interrupts. (In the case of pagefaults, we enter through an 1247 * interrupt gate.) 1248 */ 1249 ENABLE_INTR_FLAGS 1250 1251 call trap /* trap(rp, addr, cpuid) handles all traps */ 1252 addl $12, %esp /* get argument off stack */ 1253 jmp _sys_rtt 1254 1255 .dtrace_induced: 1256 cmpw $KCS_SEL, REGOFF_CS(%ebp) /* test CS for user-mode trap */ 1257 jne 3f /* if from user, panic */ 1258 1259 cmpl $T_PGFLT, REGOFF_TRAPNO(%ebp) 1260 je 1f 1261 1262 cmpl $T_GPFLT, REGOFF_TRAPNO(%ebp) 1263 je 0f 1264 1265 cmpl $T_ZERODIV, REGOFF_TRAPNO(%ebp) 1266 jne 4f /* if not PF/GP/UD/DE, panic */ 1267 1268 orw $CPU_DTRACE_DIVZERO, %cx 1269 movw %cx, CPUC_DTRACE_FLAGS(%eax) 1270 jmp 2f 1271 1272 0: 1273 /* 1274 * If we've taken a GPF, we don't (unfortunately) have the address that 1275 * induced the fault. So instead of setting the fault to BADADDR, 1276 * we'll set the fault to ILLOP. 1277 */ 1278 orw $CPU_DTRACE_ILLOP, %cx 1279 movw %cx, CPUC_DTRACE_FLAGS(%eax) 1280 jmp 2f 1281 1: 1282 orw $CPU_DTRACE_BADADDR, %cx 1283 movw %cx, CPUC_DTRACE_FLAGS(%eax) /* set fault to bad addr */ 1284 movl %esi, CPUC_DTRACE_ILLVAL(%eax) 1285 /* fault addr is illegal value */ 1286 2: 1287 pushl REGOFF_EIP(%ebp) 1288 call dtrace_instr_size 1289 addl $4, %esp 1290 movl REGOFF_EIP(%ebp), %ecx 1291 addl %eax, %ecx 1292 movl %ecx, REGOFF_EIP(%ebp) 1293 INTR_POP_KERNEL 1294 IRET 1295 /*NOTREACHED*/ 1296 3: 1297 pushl $dtrace_badflags 1298 call panic 1299 4: 1300 pushl $dtrace_badtrap 1301 call panic 1302 SET_SIZE(cmntrap) 1303 SET_SIZE(_cmntrap) 1304 1305 #endif /* __i386 */ 1306 1307 /* 1308 * Declare a uintptr_t which has the size of _cmntrap to enable stack 1309 * traceback code to know when a regs structure is on the stack. 1310 */ 1311 .globl _cmntrap_size 1312 .align CLONGSIZE 1313 _cmntrap_size: 1314 .NWORD . - _cmntrap 1315 .type _cmntrap_size, @object 1316 1317 dtrace_badflags: 1318 .string "bad DTrace flags" 1319 1320 dtrace_badtrap: 1321 .string "bad DTrace trap" 1322 1323 #endif /* __lint */ 1324 1325 #if defined(__lint) 1326 1327 /* ARGSUSED */ 1328 void 1329 cmninttrap() 1330 {} 1331 1332 #if !defined(__xpv) 1333 void 1334 bop_trap_handler(void) 1335 {} 1336 #endif 1337 1338 #else /* __lint */ 1339 1340 .globl trap /* C handler called below */ 1341 1342 #if defined(__amd64) 1343 1344 ENTRY_NP(cmninttrap) 1345 1346 INTR_PUSH 1347 INTGATE_INIT_KERNEL_FLAGS 1348 1349 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */ 1350 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */ 1351 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */ 1352 1353 movq %rsp, %rbp 1354 1355 movl %gs:CPU_ID, %edx 1356 xorl %esi, %esi 1357 movq %rsp, %rdi 1358 call trap /* trap(rp, addr, cpuid) handles all traps */ 1359 jmp _sys_rtt 1360 SET_SIZE(cmninttrap) 1361 1362 #if !defined(__xpv) 1363 /* 1364 * Handle traps early in boot. Just revectors into C quickly as 1365 * these are always fatal errors. 1366 * 1367 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap(). 1368 */ 1369 ENTRY(bop_trap_handler) 1370 movq %rsp, %rdi 1371 sub $8, %rsp 1372 call bop_trap 1373 SET_SIZE(bop_trap_handler) 1374 #endif 1375 1376 #elif defined(__i386) 1377 1378 ENTRY_NP(cmninttrap) 1379 1380 INTR_PUSH 1381 INTGATE_INIT_KERNEL_FLAGS 1382 1383 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */ 1384 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */ 1385 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */ 1386 1387 movl %esp, %ebp 1388 1389 TRACE_STACK(%edi) 1390 1391 pushl %gs:CPU_ID 1392 pushl $0 1393 pushl %ebp 1394 call trap /* trap(rp, addr, cpuid) handles all traps */ 1395 addl $12, %esp 1396 jmp _sys_rtt 1397 SET_SIZE(cmninttrap) 1398 1399 #if !defined(__xpv) 1400 /* 1401 * Handle traps early in boot. Just revectors into C quickly as 1402 * these are always fatal errors. 1403 */ 1404 ENTRY(bop_trap_handler) 1405 movl %esp, %eax 1406 pushl %eax 1407 call bop_trap 1408 SET_SIZE(bop_trap_handler) 1409 #endif 1410 1411 #endif /* __i386 */ 1412 1413 #endif /* __lint */ 1414 1415 #if defined(__lint) 1416 1417 /* ARGSUSED */ 1418 void 1419 dtrace_trap() 1420 {} 1421 1422 #else /* __lint */ 1423 1424 .globl dtrace_user_probe 1425 1426 #if defined(__amd64) 1427 1428 ENTRY_NP(dtrace_trap) 1429 1430 INTR_PUSH 1431 1432 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */ 1433 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */ 1434 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */ 1435 1436 movq %rsp, %rbp 1437 1438 movl %gs:CPU_ID, %edx 1439 #if defined(__xpv) 1440 movq %gs:CPU_VCPU_INFO, %rsi 1441 movq VCPU_INFO_ARCH_CR2(%rsi), %rsi 1442 #else 1443 movq %cr2, %rsi 1444 #endif 1445 movq %rsp, %rdi 1446 1447 ENABLE_INTR_FLAGS 1448 1449 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */ 1450 jmp _sys_rtt 1451 1452 SET_SIZE(dtrace_trap) 1453 1454 #elif defined(__i386) 1455 1456 ENTRY_NP(dtrace_trap) 1457 1458 INTR_PUSH 1459 1460 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */ 1461 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */ 1462 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */ 1463 1464 movl %esp, %ebp 1465 1466 pushl %gs:CPU_ID 1467 #if defined(__xpv) 1468 movl %gs:CPU_VCPU_INFO, %eax 1469 movl VCPU_INFO_ARCH_CR2(%eax), %eax 1470 #else 1471 movl %cr2, %eax 1472 #endif 1473 pushl %eax 1474 pushl %ebp 1475 1476 ENABLE_INTR_FLAGS 1477 1478 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */ 1479 addl $12, %esp /* get argument off stack */ 1480 1481 jmp _sys_rtt 1482 SET_SIZE(dtrace_trap) 1483 1484 #endif /* __i386 */ 1485 1486 #endif /* __lint */ 1487 1488 /* 1489 * Return from _sys_trap routine. 1490 */ 1491 1492 #if defined(__lint) 1493 1494 void 1495 lwp_rtt_initial(void) 1496 {} 1497 1498 void 1499 lwp_rtt(void) 1500 {} 1501 1502 void 1503 _sys_rtt(void) 1504 {} 1505 1506 #else /* __lint */ 1507 1508 #if defined(__amd64) 1509 1510 ENTRY_NP(lwp_rtt_initial) 1511 movq %gs:CPU_THREAD, %r15 1512 movq T_STACK(%r15), %rsp /* switch to the thread stack */ 1513 movq %rsp, %rbp 1514 call __dtrace_probe___proc_start 1515 jmp _lwp_rtt 1516 1517 ENTRY_NP(lwp_rtt) 1518 1519 /* 1520 * r14 lwp 1521 * rdx lwp->lwp_procp 1522 * r15 curthread 1523 */ 1524 1525 movq %gs:CPU_THREAD, %r15 1526 movq T_STACK(%r15), %rsp /* switch to the thread stack */ 1527 movq %rsp, %rbp 1528 _lwp_rtt: 1529 call __dtrace_probe___proc_lwp__start 1530 movq %gs:CPU_LWP, %r14 1531 movq LWP_PROCP(%r14), %rdx 1532 1533 /* 1534 * XX64 Is the stack misaligned correctly at this point? 1535 * If not, we need to do a push before calling anything .. 1536 */ 1537 1538 #if defined(DEBUG) 1539 /* 1540 * If we were to run lwp_savectx at this point -without- 1541 * pcb_rupdate being set to 1, we'd end up sampling the hardware 1542 * state left by the previous running lwp, rather than setting 1543 * the values requested by the lwp creator. Bad. 1544 */ 1545 testb $0x1, PCB_RUPDATE(%r14) 1546 jne 1f 1547 leaq _no_pending_updates(%rip), %rdi 1548 movl $__LINE__, %esi 1549 movq %r14, %rdx 1550 xorl %eax, %eax 1551 call panic 1552 _no_pending_updates: 1553 .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1" 1554 1: 1555 #endif 1556 1557 /* 1558 * If agent lwp, clear %fs and %gs 1559 */ 1560 cmpq %r15, P_AGENTTP(%rdx) 1561 jne 1f 1562 xorl %ecx, %ecx 1563 movq %rcx, REGOFF_FS(%rsp) 1564 movq %rcx, REGOFF_GS(%rsp) 1565 movw %cx, LWP_PCB_FS(%r14) 1566 movw %cx, LWP_PCB_GS(%r14) 1567 1: 1568 call dtrace_systrace_rtt 1569 movq REGOFF_RDX(%rsp), %rsi 1570 movq REGOFF_RAX(%rsp), %rdi 1571 call post_syscall /* post_syscall(rval1, rval2) */ 1572 1573 /* 1574 * set up to take fault on first use of fp 1575 */ 1576 STTS(%rdi) 1577 1578 /* 1579 * XXX - may want a fast path that avoids sys_rtt_common in the 1580 * most common case. 1581 */ 1582 ALTENTRY(_sys_rtt) 1583 CLI(%rax) /* disable interrupts */ 1584 ALTENTRY(_sys_rtt_ints_disabled) 1585 movq %rsp, %rdi /* pass rp to sys_rtt_common */ 1586 call sys_rtt_common /* do common sys_rtt tasks */ 1587 testq %rax, %rax /* returning to userland? */ 1588 jz sr_sup 1589 1590 /* 1591 * Return to user 1592 */ 1593 ASSERT_UPCALL_MASK_IS_SET 1594 cmpw $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */ 1595 je sys_rtt_syscall 1596 1597 /* 1598 * Return to 32-bit userland 1599 */ 1600 ALTENTRY(sys_rtt_syscall32) 1601 USER32_POP 1602 jmp tr_iret_user 1603 /*NOTREACHED*/ 1604 1605 ALTENTRY(sys_rtt_syscall) 1606 /* 1607 * Return to 64-bit userland 1608 */ 1609 USER_POP 1610 ALTENTRY(nopop_sys_rtt_syscall) 1611 jmp tr_iret_user 1612 /*NOTREACHED*/ 1613 SET_SIZE(nopop_sys_rtt_syscall) 1614 1615 /* 1616 * Return to supervisor 1617 * NOTE: to make the check in trap() that tests if we are executing 1618 * segment register fixup/restore code work properly, sr_sup MUST be 1619 * after _sys_rtt . 1620 */ 1621 ALTENTRY(sr_sup) 1622 /* 1623 * Restore regs before doing iretq to kernel mode 1624 */ 1625 INTR_POP 1626 jmp tr_iret_kernel 1627 .globl _sys_rtt_end 1628 _sys_rtt_end: 1629 /*NOTREACHED*/ 1630 SET_SIZE(sr_sup) 1631 SET_SIZE(_sys_rtt_end) 1632 SET_SIZE(lwp_rtt) 1633 SET_SIZE(lwp_rtt_initial) 1634 SET_SIZE(_sys_rtt_ints_disabled) 1635 SET_SIZE(_sys_rtt) 1636 SET_SIZE(sys_rtt_syscall) 1637 SET_SIZE(sys_rtt_syscall32) 1638 1639 #elif defined(__i386) 1640 1641 ENTRY_NP(lwp_rtt_initial) 1642 movl %gs:CPU_THREAD, %eax 1643 movl T_STACK(%eax), %esp /* switch to the thread stack */ 1644 movl %esp, %ebp 1645 call __dtrace_probe___proc_start 1646 jmp _lwp_rtt 1647 1648 ENTRY_NP(lwp_rtt) 1649 movl %gs:CPU_THREAD, %eax 1650 movl T_STACK(%eax), %esp /* switch to the thread stack */ 1651 movl %esp, %ebp 1652 _lwp_rtt: 1653 call __dtrace_probe___proc_lwp__start 1654 1655 /* 1656 * If agent lwp, clear %fs and %gs. 1657 */ 1658 movl %gs:CPU_LWP, %eax 1659 movl LWP_PROCP(%eax), %edx 1660 1661 cmpl %eax, P_AGENTTP(%edx) 1662 jne 1f 1663 movl $0, REGOFF_FS(%esp) 1664 movl $0, REGOFF_GS(%esp) 1665 1: 1666 call dtrace_systrace_rtt 1667 movl REGOFF_EDX(%esp), %edx 1668 movl REGOFF_EAX(%esp), %eax 1669 pushl %edx 1670 pushl %eax 1671 call post_syscall /* post_syscall(rval1, rval2) */ 1672 addl $8, %esp 1673 1674 /* 1675 * set up to take fault on first use of fp 1676 */ 1677 STTS(%eax) 1678 1679 /* 1680 * XXX - may want a fast path that avoids sys_rtt_common in the 1681 * most common case. 1682 */ 1683 ALTENTRY(_sys_rtt) 1684 CLI(%eax) /* disable interrupts */ 1685 ALTENTRY(_sys_rtt_ints_disabled) 1686 pushl %esp /* pass rp to sys_rtt_common */ 1687 call sys_rtt_common 1688 addl $4, %esp /* pop arg */ 1689 testl %eax, %eax /* test for return to user mode */ 1690 jz sr_sup 1691 1692 /* 1693 * Return to User. 1694 */ 1695 ALTENTRY(sys_rtt_syscall) 1696 INTR_POP_USER 1697 1698 /* 1699 * There can be no instructions between this label and IRET or 1700 * we could end up breaking linux brand support. See label usage 1701 * in lx_brand_int80_callback for an example. 1702 */ 1703 ALTENTRY(nopop_sys_rtt_syscall) 1704 IRET 1705 /*NOTREACHED*/ 1706 SET_SIZE(nopop_sys_rtt_syscall) 1707 1708 ALTENTRY(_sys_rtt_end) 1709 1710 /* 1711 * Return to supervisor 1712 */ 1713 ALTENTRY(sr_sup) 1714 1715 /* 1716 * Restore regs before doing iret to kernel mode 1717 */ 1718 INTR_POP_KERNEL 1719 IRET 1720 /*NOTREACHED*/ 1721 1722 SET_SIZE(sr_sup) 1723 SET_SIZE(_sys_rtt_end) 1724 SET_SIZE(lwp_rtt) 1725 SET_SIZE(lwp_rtt_initial) 1726 SET_SIZE(_sys_rtt_ints_disabled) 1727 SET_SIZE(_sys_rtt) 1728 SET_SIZE(sys_rtt_syscall) 1729 1730 #endif /* __i386 */ 1731 1732 #endif /* __lint */ 1733 1734 #if defined(__lint) 1735 1736 /* 1737 * So why do we have to deal with all this crud in the world of ia32? 1738 * 1739 * Basically there are four classes of ia32 implementations, those that do not 1740 * have a TSC, those that have a marginal TSC that is broken to the extent 1741 * that it is useless, those that have a marginal TSC that is not quite so 1742 * horribly broken and can be used with some care, and those that have a 1743 * reliable TSC. This crud has to be here in order to sift through all the 1744 * variants. 1745 */ 1746 1747 /*ARGSUSED*/ 1748 uint64_t 1749 freq_tsc(uint32_t *pit_counter) 1750 { 1751 return (0); 1752 } 1753 1754 #else /* __lint */ 1755 1756 #if defined(__amd64) 1757 1758 /* 1759 * XX64 quick and dirty port from the i386 version. Since we 1760 * believe the amd64 tsc is more reliable, could this code be 1761 * simpler? 1762 */ 1763 ENTRY_NP(freq_tsc) 1764 pushq %rbp 1765 movq %rsp, %rbp 1766 movq %rdi, %r9 /* save pit_counter */ 1767 pushq %rbx 1768 1769 / We have a TSC, but we have no way in general to know how reliable it is. 1770 / Usually a marginal TSC behaves appropriately unless not enough time 1771 / elapses between reads. A reliable TSC can be read as often and as rapidly 1772 / as desired. The simplistic approach of reading the TSC counter and 1773 / correlating to the PIT counter cannot be naively followed. Instead estimates 1774 / have to be taken to successively refine a guess at the speed of the cpu 1775 / and then the TSC and PIT counter are correlated. In practice very rarely 1776 / is more than one quick loop required for an estimate. Measures have to be 1777 / taken to prevent the PIT counter from wrapping beyond its resolution and for 1778 / measuring the clock rate of very fast processors. 1779 / 1780 / The following constant can be tuned. It should be such that the loop does 1781 / not take too many nor too few PIT counts to execute. If this value is too 1782 / large, then on slow machines the loop will take a long time, or the PIT 1783 / counter may even wrap. If this value is too small, then on fast machines 1784 / the PIT counter may count so few ticks that the resolution of the PIT 1785 / itself causes a bad guess. Because this code is used in machines with 1786 / marginal TSC's and/or IO, if this value is too small on those, it may 1787 / cause the calculated cpu frequency to vary slightly from boot to boot. 1788 / 1789 / In all cases even if this constant is set inappropriately, the algorithm 1790 / will still work and the caller should be able to handle variances in the 1791 / calculation of cpu frequency, but the calculation will be inefficient and 1792 / take a disproportionate amount of time relative to a well selected value. 1793 / As the slowest supported cpu becomes faster, this constant should be 1794 / carefully increased. 1795 1796 movl $0x8000, %ecx 1797 1798 / to make sure the instruction cache has been warmed 1799 clc 1800 1801 jmp freq_tsc_loop 1802 1803 / The following block of code up to and including the latching of the PIT 1804 / counter after freq_tsc_perf_loop is very critical and very carefully 1805 / written, it should only be modified with great care. freq_tsc_loop to 1806 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in 1807 / freq_tsc_perf_loop up to the unlatching of the PIT counter. 1808 1809 .align 32 1810 freq_tsc_loop: 1811 / save the loop count in %ebx 1812 movl %ecx, %ebx 1813 1814 / initialize the PIT counter and start a count down 1815 movb $PIT_LOADMODE, %al 1816 outb $PITCTL_PORT 1817 movb $0xff, %al 1818 outb $PITCTR0_PORT 1819 outb $PITCTR0_PORT 1820 1821 / read the TSC and store the TS in %edi:%esi 1822 rdtsc 1823 movl %eax, %esi 1824 1825 freq_tsc_perf_loop: 1826 movl %edx, %edi 1827 movl %eax, %esi 1828 movl %edx, %edi 1829 loop freq_tsc_perf_loop 1830 1831 / read the TSC and store the LSW in %ecx 1832 rdtsc 1833 movl %eax, %ecx 1834 1835 / latch the PIT counter and status 1836 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al 1837 outb $PITCTL_PORT 1838 1839 / remember if the icache has been warmed 1840 setc %ah 1841 1842 / read the PIT status 1843 inb $PITCTR0_PORT 1844 shll $8, %eax 1845 1846 / read PIT count 1847 inb $PITCTR0_PORT 1848 shll $8, %eax 1849 inb $PITCTR0_PORT 1850 bswap %eax 1851 1852 / check to see if the PIT count was loaded into the CE 1853 btw $_CONST(PITSTAT_NULLCNT+8), %ax 1854 jc freq_tsc_increase_count 1855 1856 / check to see if PIT counter wrapped 1857 btw $_CONST(PITSTAT_OUTPUT+8), %ax 1858 jnc freq_tsc_pit_did_not_wrap 1859 1860 / halve count 1861 shrl $1, %ebx 1862 movl %ebx, %ecx 1863 1864 / the instruction cache has been warmed 1865 stc 1866 1867 jmp freq_tsc_loop 1868 1869 freq_tsc_increase_count: 1870 shll $1, %ebx 1871 jc freq_tsc_too_fast 1872 1873 movl %ebx, %ecx 1874 1875 / the instruction cache has been warmed 1876 stc 1877 1878 jmp freq_tsc_loop 1879 1880 freq_tsc_pit_did_not_wrap: 1881 roll $16, %eax 1882 1883 cmpw $0x2000, %ax 1884 notw %ax 1885 jb freq_tsc_sufficient_duration 1886 1887 freq_tsc_calculate: 1888 / in mode 0, the PIT loads the count into the CE on the first CLK pulse, 1889 / then on the second CLK pulse the CE is decremented, therefore mode 0 1890 / is really a (count + 1) counter, ugh 1891 xorl %esi, %esi 1892 movw %ax, %si 1893 incl %esi 1894 1895 movl $0xf000, %eax 1896 mull %ebx 1897 1898 / tuck away (target_pit_count * loop_count) 1899 movl %edx, %ecx 1900 movl %eax, %ebx 1901 1902 movl %esi, %eax 1903 movl $0xffffffff, %edx 1904 mull %edx 1905 1906 addl %esi, %eax 1907 adcl $0, %edx 1908 1909 cmpl %ecx, %edx 1910 ja freq_tsc_div_safe 1911 jb freq_tsc_too_fast 1912 1913 cmpl %ebx, %eax 1914 jbe freq_tsc_too_fast 1915 1916 freq_tsc_div_safe: 1917 movl %ecx, %edx 1918 movl %ebx, %eax 1919 1920 movl %esi, %ecx 1921 divl %ecx 1922 1923 movl %eax, %ecx 1924 1925 / the instruction cache has been warmed 1926 stc 1927 1928 jmp freq_tsc_loop 1929 1930 freq_tsc_sufficient_duration: 1931 / test to see if the icache has been warmed 1932 btl $16, %eax 1933 jnc freq_tsc_calculate 1934 1935 / recall mode 0 is a (count + 1) counter 1936 andl $0xffff, %eax 1937 incl %eax 1938 1939 / save the number of PIT counts 1940 movl %eax, (%r9) 1941 1942 / calculate the number of TS's that elapsed 1943 movl %ecx, %eax 1944 subl %esi, %eax 1945 sbbl %edi, %edx 1946 1947 jmp freq_tsc_end 1948 1949 freq_tsc_too_fast: 1950 / return 0 as a 64 bit quantity 1951 xorl %eax, %eax 1952 xorl %edx, %edx 1953 1954 freq_tsc_end: 1955 shlq $32, %rdx 1956 orq %rdx, %rax 1957 1958 popq %rbx 1959 leaveq 1960 ret 1961 SET_SIZE(freq_tsc) 1962 1963 #elif defined(__i386) 1964 1965 ENTRY_NP(freq_tsc) 1966 pushl %ebp 1967 movl %esp, %ebp 1968 pushl %edi 1969 pushl %esi 1970 pushl %ebx 1971 1972 / We have a TSC, but we have no way in general to know how reliable it is. 1973 / Usually a marginal TSC behaves appropriately unless not enough time 1974 / elapses between reads. A reliable TSC can be read as often and as rapidly 1975 / as desired. The simplistic approach of reading the TSC counter and 1976 / correlating to the PIT counter cannot be naively followed. Instead estimates 1977 / have to be taken to successively refine a guess at the speed of the cpu 1978 / and then the TSC and PIT counter are correlated. In practice very rarely 1979 / is more than one quick loop required for an estimate. Measures have to be 1980 / taken to prevent the PIT counter from wrapping beyond its resolution and for 1981 / measuring the clock rate of very fast processors. 1982 / 1983 / The following constant can be tuned. It should be such that the loop does 1984 / not take too many nor too few PIT counts to execute. If this value is too 1985 / large, then on slow machines the loop will take a long time, or the PIT 1986 / counter may even wrap. If this value is too small, then on fast machines 1987 / the PIT counter may count so few ticks that the resolution of the PIT 1988 / itself causes a bad guess. Because this code is used in machines with 1989 / marginal TSC's and/or IO, if this value is too small on those, it may 1990 / cause the calculated cpu frequency to vary slightly from boot to boot. 1991 / 1992 / In all cases even if this constant is set inappropriately, the algorithm 1993 / will still work and the caller should be able to handle variances in the 1994 / calculation of cpu frequency, but the calculation will be inefficient and 1995 / take a disproportionate amount of time relative to a well selected value. 1996 / As the slowest supported cpu becomes faster, this constant should be 1997 / carefully increased. 1998 1999 movl $0x8000, %ecx 2000 2001 / to make sure the instruction cache has been warmed 2002 clc 2003 2004 jmp freq_tsc_loop 2005 2006 / The following block of code up to and including the latching of the PIT 2007 / counter after freq_tsc_perf_loop is very critical and very carefully 2008 / written, it should only be modified with great care. freq_tsc_loop to 2009 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in 2010 / freq_tsc_perf_loop up to the unlatching of the PIT counter. 2011 2012 .align 32 2013 freq_tsc_loop: 2014 / save the loop count in %ebx 2015 movl %ecx, %ebx 2016 2017 / initialize the PIT counter and start a count down 2018 movb $PIT_LOADMODE, %al 2019 outb $PITCTL_PORT 2020 movb $0xff, %al 2021 outb $PITCTR0_PORT 2022 outb $PITCTR0_PORT 2023 2024 / read the TSC and store the TS in %edi:%esi 2025 rdtsc 2026 movl %eax, %esi 2027 2028 freq_tsc_perf_loop: 2029 movl %edx, %edi 2030 movl %eax, %esi 2031 movl %edx, %edi 2032 loop freq_tsc_perf_loop 2033 2034 / read the TSC and store the LSW in %ecx 2035 rdtsc 2036 movl %eax, %ecx 2037 2038 / latch the PIT counter and status 2039 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al 2040 outb $PITCTL_PORT 2041 2042 / remember if the icache has been warmed 2043 setc %ah 2044 2045 / read the PIT status 2046 inb $PITCTR0_PORT 2047 shll $8, %eax 2048 2049 / read PIT count 2050 inb $PITCTR0_PORT 2051 shll $8, %eax 2052 inb $PITCTR0_PORT 2053 bswap %eax 2054 2055 / check to see if the PIT count was loaded into the CE 2056 btw $_CONST(PITSTAT_NULLCNT+8), %ax 2057 jc freq_tsc_increase_count 2058 2059 / check to see if PIT counter wrapped 2060 btw $_CONST(PITSTAT_OUTPUT+8), %ax 2061 jnc freq_tsc_pit_did_not_wrap 2062 2063 / halve count 2064 shrl $1, %ebx 2065 movl %ebx, %ecx 2066 2067 / the instruction cache has been warmed 2068 stc 2069 2070 jmp freq_tsc_loop 2071 2072 freq_tsc_increase_count: 2073 shll $1, %ebx 2074 jc freq_tsc_too_fast 2075 2076 movl %ebx, %ecx 2077 2078 / the instruction cache has been warmed 2079 stc 2080 2081 jmp freq_tsc_loop 2082 2083 freq_tsc_pit_did_not_wrap: 2084 roll $16, %eax 2085 2086 cmpw $0x2000, %ax 2087 notw %ax 2088 jb freq_tsc_sufficient_duration 2089 2090 freq_tsc_calculate: 2091 / in mode 0, the PIT loads the count into the CE on the first CLK pulse, 2092 / then on the second CLK pulse the CE is decremented, therefore mode 0 2093 / is really a (count + 1) counter, ugh 2094 xorl %esi, %esi 2095 movw %ax, %si 2096 incl %esi 2097 2098 movl $0xf000, %eax 2099 mull %ebx 2100 2101 / tuck away (target_pit_count * loop_count) 2102 movl %edx, %ecx 2103 movl %eax, %ebx 2104 2105 movl %esi, %eax 2106 movl $0xffffffff, %edx 2107 mull %edx 2108 2109 addl %esi, %eax 2110 adcl $0, %edx 2111 2112 cmpl %ecx, %edx 2113 ja freq_tsc_div_safe 2114 jb freq_tsc_too_fast 2115 2116 cmpl %ebx, %eax 2117 jbe freq_tsc_too_fast 2118 2119 freq_tsc_div_safe: 2120 movl %ecx, %edx 2121 movl %ebx, %eax 2122 2123 movl %esi, %ecx 2124 divl %ecx 2125 2126 movl %eax, %ecx 2127 2128 / the instruction cache has been warmed 2129 stc 2130 2131 jmp freq_tsc_loop 2132 2133 freq_tsc_sufficient_duration: 2134 / test to see if the icache has been warmed 2135 btl $16, %eax 2136 jnc freq_tsc_calculate 2137 2138 / recall mode 0 is a (count + 1) counter 2139 andl $0xffff, %eax 2140 incl %eax 2141 2142 / save the number of PIT counts 2143 movl 8(%ebp), %ebx 2144 movl %eax, (%ebx) 2145 2146 / calculate the number of TS's that elapsed 2147 movl %ecx, %eax 2148 subl %esi, %eax 2149 sbbl %edi, %edx 2150 2151 jmp freq_tsc_end 2152 2153 freq_tsc_too_fast: 2154 / return 0 as a 64 bit quantity 2155 xorl %eax, %eax 2156 xorl %edx, %edx 2157 2158 freq_tsc_end: 2159 popl %ebx 2160 popl %esi 2161 popl %edi 2162 popl %ebp 2163 ret 2164 SET_SIZE(freq_tsc) 2165 2166 #endif /* __i386 */ 2167 #endif /* __lint */ 2168 2169 #if !defined(__amd64) 2170 #if defined(__lint) 2171 2172 /* 2173 * We do not have a TSC so we use a block of instructions with well known 2174 * timings. 2175 */ 2176 2177 /*ARGSUSED*/ 2178 uint64_t 2179 freq_notsc(uint32_t *pit_counter) 2180 { 2181 return (0); 2182 } 2183 2184 #else /* __lint */ 2185 ENTRY_NP(freq_notsc) 2186 pushl %ebp 2187 movl %esp, %ebp 2188 pushl %edi 2189 pushl %esi 2190 pushl %ebx 2191 2192 / initial count for the idivl loop 2193 movl $0x1000, %ecx 2194 2195 / load the divisor 2196 movl $1, %ebx 2197 2198 jmp freq_notsc_loop 2199 2200 .align 16 2201 freq_notsc_loop: 2202 / set high 32 bits of dividend to zero 2203 xorl %edx, %edx 2204 2205 / save the loop count in %edi 2206 movl %ecx, %edi 2207 2208 / initialize the PIT counter and start a count down 2209 movb $PIT_LOADMODE, %al 2210 outb $PITCTL_PORT 2211 movb $0xff, %al 2212 outb $PITCTR0_PORT 2213 outb $PITCTR0_PORT 2214 2215 / set low 32 bits of dividend to zero 2216 xorl %eax, %eax 2217 2218 / It is vital that the arguments to idivl be set appropriately because on some 2219 / cpu's this instruction takes more or less clock ticks depending on its 2220 / arguments. 2221 freq_notsc_perf_loop: 2222 idivl %ebx 2223 idivl %ebx 2224 idivl %ebx 2225 idivl %ebx 2226 idivl %ebx 2227 loop freq_notsc_perf_loop 2228 2229 / latch the PIT counter and status 2230 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al 2231 outb $PITCTL_PORT 2232 2233 / read the PIT status 2234 inb $PITCTR0_PORT 2235 shll $8, %eax 2236 2237 / read PIT count 2238 inb $PITCTR0_PORT 2239 shll $8, %eax 2240 inb $PITCTR0_PORT 2241 bswap %eax 2242 2243 / check to see if the PIT count was loaded into the CE 2244 btw $_CONST(PITSTAT_NULLCNT+8), %ax 2245 jc freq_notsc_increase_count 2246 2247 / check to see if PIT counter wrapped 2248 btw $_CONST(PITSTAT_OUTPUT+8), %ax 2249 jnc freq_notsc_pit_did_not_wrap 2250 2251 / halve count 2252 shrl $1, %edi 2253 movl %edi, %ecx 2254 2255 jmp freq_notsc_loop 2256 2257 freq_notsc_increase_count: 2258 shll $1, %edi 2259 jc freq_notsc_too_fast 2260 2261 movl %edi, %ecx 2262 2263 jmp freq_notsc_loop 2264 2265 freq_notsc_pit_did_not_wrap: 2266 shrl $16, %eax 2267 2268 cmpw $0x2000, %ax 2269 notw %ax 2270 jb freq_notsc_sufficient_duration 2271 2272 freq_notsc_calculate: 2273 / in mode 0, the PIT loads the count into the CE on the first CLK pulse, 2274 / then on the second CLK pulse the CE is decremented, therefore mode 0 2275 / is really a (count + 1) counter, ugh 2276 xorl %esi, %esi 2277 movw %ax, %si 2278 incl %esi 2279 2280 movl %edi, %eax 2281 movl $0xf000, %ecx 2282 mull %ecx 2283 2284 / tuck away (target_pit_count * loop_count) 2285 movl %edx, %edi 2286 movl %eax, %ecx 2287 2288 movl %esi, %eax 2289 movl $0xffffffff, %edx 2290 mull %edx 2291 2292 addl %esi, %eax 2293 adcl $0, %edx 2294 2295 cmpl %edi, %edx 2296 ja freq_notsc_div_safe 2297 jb freq_notsc_too_fast 2298 2299 cmpl %ecx, %eax 2300 jbe freq_notsc_too_fast 2301 2302 freq_notsc_div_safe: 2303 movl %edi, %edx 2304 movl %ecx, %eax 2305 2306 movl %esi, %ecx 2307 divl %ecx 2308 2309 movl %eax, %ecx 2310 2311 jmp freq_notsc_loop 2312 2313 freq_notsc_sufficient_duration: 2314 / recall mode 0 is a (count + 1) counter 2315 incl %eax 2316 2317 / save the number of PIT counts 2318 movl 8(%ebp), %ebx 2319 movl %eax, (%ebx) 2320 2321 / calculate the number of cpu clock ticks that elapsed 2322 cmpl $X86_VENDOR_Cyrix, x86_vendor 2323 jz freq_notsc_notcyrix 2324 2325 / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores 2326 movl $86, %eax 2327 jmp freq_notsc_calculate_tsc 2328 2329 freq_notsc_notcyrix: 2330 / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums 2331 movl $237, %eax 2332 2333 freq_notsc_calculate_tsc: 2334 mull %edi 2335 2336 jmp freq_notsc_end 2337 2338 freq_notsc_too_fast: 2339 / return 0 as a 64 bit quantity 2340 xorl %eax, %eax 2341 xorl %edx, %edx 2342 2343 freq_notsc_end: 2344 popl %ebx 2345 popl %esi 2346 popl %edi 2347 popl %ebp 2348 2349 ret 2350 SET_SIZE(freq_notsc) 2351 2352 #endif /* __lint */ 2353 #endif /* !__amd64 */ 2354 2355 #if !defined(__lint) 2356 .data 2357 #if !defined(__amd64) 2358 .align 4 2359 cpu_vendor: 2360 .long 0, 0, 0 /* Vendor ID string returned */ 2361 2362 .globl CyrixInstead 2363 2364 .globl x86_featureset 2365 .globl x86_type 2366 .globl x86_vendor 2367 #endif 2368 2369 #endif /* __lint */