1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2018 Joyent, Inc.
  27  */
  28 
  29 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  30 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T       */
  31 /*        All Rights Reserved                                   */
  32 
  33 /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
  34 /*        All Rights Reserved                                   */
  35 
  36 
  37 #include <sys/asm_linkage.h>
  38 #include <sys/asm_misc.h>
  39 #include <sys/regset.h>
  40 #include <sys/privregs.h>
  41 #include <sys/psw.h>
  42 #include <sys/reboot.h>
  43 #include <sys/x86_archext.h>
  44 #include <sys/machparam.h>
  45 
  46 #if defined(__lint)
  47 
  48 #include <sys/types.h>
  49 #include <sys/thread.h>
  50 #include <sys/systm.h>
  51 #include <sys/lgrp.h>
  52 #include <sys/regset.h>
  53 #include <sys/link.h>
  54 #include <sys/bootconf.h>
  55 #include <sys/bootsvcs.h>
  56 
  57 #else   /* __lint */
  58 
  59 #include <sys/segments.h>
  60 #include <sys/pcb.h>
  61 #include <sys/trap.h>
  62 #include <sys/ftrace.h>
  63 #include <sys/traptrace.h>
  64 #include <sys/clock.h>
  65 #include <sys/cmn_err.h>
  66 #include <sys/pit.h>
  67 #include <sys/panic.h>
  68 
  69 #if defined(__xpv)
  70 #include <sys/hypervisor.h>
  71 #endif
  72 
  73 #include "assym.h"
  74 
  75 /*
  76  * Our assumptions:
  77  *      - We are running in protected-paged mode.
  78  *      - Interrupts are disabled.
  79  *      - The GDT and IDT are the callers; we need our copies.
  80  *      - The kernel's text, initialized data and bss are mapped.
  81  *
  82  * Our actions:
  83  *      - Save arguments
  84  *      - Initialize our stack pointer to the thread 0 stack (t0stack)
  85  *        and leave room for a phony "struct regs".
  86  *      - Our GDT and IDT need to get munged.
  87  *      - Since we are using the boot's GDT descriptors, we need
  88  *        to copy them into our GDT before we switch to ours.
  89  *      - We start using our GDT by loading correct values in the
  90  *        selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
  91  *        gs=KGS_SEL).
  92  *      - The default LDT entry for syscall is set.
  93  *      - We load the default LDT into the hardware LDT register.
  94  *      - We load the default TSS into the hardware task register.
  95  *      - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
  96  *      - mlsetup(%esp) gets called.
  97  *      - We change our appearance to look like the real thread 0.
  98  *        (NOTE: making ourselves to be a real thread may be a noop)
  99  *      - main() gets called.  (NOTE: main() never returns).
 100  *
 101  * NOW, the real code!
 102  */
 103         /*
 104          * The very first thing in the kernel's text segment must be a jump
 105          * to the os/fakebop.c startup code.
 106          */
 107         .text
 108         jmp     _start
 109 
 110         /*
 111          * Globals:
 112          */
 113         .globl  _locore_start
 114         .globl  mlsetup
 115         .globl  main
 116         .globl  panic
 117         .globl  t0stack
 118         .globl  t0
 119         .globl  sysp
 120         .globl  edata
 121 
 122         /*
 123          * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
 124          */
 125         .globl  bootops
 126         .globl  bootopsp
 127 
 128         /*
 129          * NOTE: t0stack should be the first thing in the data section so that
 130          * if it ever overflows, it will fault on the last kernel text page.
 131          */
 132         .data
 133         .comm   t0stack, DEFAULTSTKSZ, 32
 134         .comm   t0, 4094, 32
 135 
 136 #endif  /* __lint */
 137 
 138 
 139 #if defined(__amd64)
 140 
 141 #if defined(__lint)
 142 
 143 /* ARGSUSED */
 144 void
 145 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
 146 {}
 147 
 148 #else   /* __lint */
 149 
 150         /*
 151          * kobj_init() vectors us back to here with (note) a slightly different
 152          * set of arguments than _start is given (see lint prototypes above).
 153          *
 154          * XXX  Make this less vile, please.
 155          */
 156         ENTRY_NP(_locore_start)
 157 
 158         /*
 159          * %rdi = boot services (should die someday)
 160          * %rdx = bootops
 161          * end
 162          */
 163 
 164         leaq    edata(%rip), %rbp       /* reference edata for ksyms */
 165         movq    $0, (%rbp)              /* limit stack back trace */
 166 
 167         /*
 168          * Initialize our stack pointer to the thread 0 stack (t0stack)
 169          * and leave room for a "struct regs" for lwp0.  Note that the
 170          * stack doesn't actually align to a 16-byte boundary until just
 171          * before we call mlsetup because we want to use %rsp to point at
 172          * our regs structure.
 173          */
 174         leaq    t0stack(%rip), %rsp
 175         addq    $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
 176 #if (REGSIZE & 15) == 0
 177         subq    $8, %rsp
 178 #endif
 179         /*
 180          * Save call back for special x86 boot services vector
 181          */
 182         movq    %rdi, sysp(%rip)
 183 
 184         movq    %rdx, bootops(%rip)             /* save bootops */
 185         movq    $bootops, bootopsp(%rip)
 186 
 187         /*
 188          * Save arguments and flags, if only for debugging ..
 189          */
 190         movq    %rdi, REGOFF_RDI(%rsp)
 191         movq    %rsi, REGOFF_RSI(%rsp)
 192         movq    %rdx, REGOFF_RDX(%rsp)
 193         movq    %rcx, REGOFF_RCX(%rsp)
 194         movq    %r8, REGOFF_R8(%rsp)
 195         movq    %r9, REGOFF_R9(%rsp)
 196         pushf
 197         popq    %r11
 198         movq    %r11, REGOFF_RFL(%rsp)
 199 
 200 #if !defined(__xpv)
 201         /*
 202          * Enable write protect and alignment check faults.
 203          */
 204         movq    %cr0, %rax
 205         orq     $_CONST(CR0_WP|CR0_AM), %rax
 206         andq    $_BITNOT(CR0_WT|CR0_CE), %rax
 207         movq    %rax, %cr0
 208 #endif  /* __xpv */
 209 
 210         /*
 211          * (We just assert this works by virtue of being here)
 212          */
 213         bts     $X86FSET_CPUID, x86_featureset(%rip)
 214 
 215         /*
 216          * mlsetup() gets called with a struct regs as argument, while
 217          * main takes no args and should never return.
 218          */
 219         xorl    %ebp, %ebp
 220         movq    %rsp, %rdi
 221         pushq   %rbp
 222         /* (stack pointer now aligned on 16-byte boundary right here) */
 223         movq    %rsp, %rbp
 224         call    mlsetup
 225         call    main
 226         /* NOTREACHED */
 227         leaq    __return_from_main(%rip), %rdi
 228         xorl    %eax, %eax
 229         call    panic
 230         SET_SIZE(_locore_start)
 231 
 232 #endif  /* __amd64 */
 233 #endif  /* __lint */
 234 
 235 #if !defined(__lint)
 236 
 237 __return_from_main:
 238         .string "main() returned"
 239 __unsupported_cpu:
 240         .string "486 style cpu detected - no longer supported!"
 241 
 242 #if defined(DEBUG)
 243 _no_pending_updates:
 244         .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
 245 #endif
 246 
 247 #endif  /* !__lint */
 248 
 249 #if !defined(__amd64)
 250 
 251 #if defined(__lint)
 252 
 253 /* ARGSUSED */
 254 void
 255 _locore_start(struct boot_syscalls *sysp, struct bootops *bop)
 256 {}
 257 
 258 #else   /* __lint */
 259 
 260         /*
 261          * kobj_init() vectors us back to here with (note) a slightly different
 262          * set of arguments than _start is given (see lint prototypes above).
 263          *
 264          * XXX  Make this less vile, please.
 265          */
 266         ENTRY_NP(_locore_start)
 267 
 268         /*
 269          *      %ecx = boot services (should die someday)
 270          *      %ebx = bootops
 271          */
 272         mov     $edata, %ebp            / edata needs to be defined for ksyms
 273         movl    $0, (%ebp)              / limit stack back trace
 274 
 275         /*
 276          * Initialize our stack pointer to the thread 0 stack (t0stack)
 277          * and leave room for a phony "struct regs".
 278          */
 279         movl    $t0stack + DEFAULTSTKSZ - REGSIZE, %esp
 280 
 281         /*
 282          * Save call back for special x86 boot services vector
 283          */
 284         mov     %ecx, sysp              / save call back for boot services
 285 
 286         mov     %ebx, bootops           / save bootops
 287         movl    $bootops, bootopsp
 288 
 289 
 290         /*
 291          * Save all registers and flags
 292          */
 293         pushal
 294         pushfl
 295 
 296 #if !defined(__xpv)
 297         /*
 298          * Override bios settings and enable write protect and
 299          * alignment check faults.
 300          */
 301         movl    %cr0, %eax
 302 
 303         /*
 304          * enable WP for detecting faults, and enable alignment checking.
 305          */
 306         orl     $_CONST(CR0_WP|CR0_AM), %eax
 307         andl    $_BITNOT(CR0_WT|CR0_CE), %eax
 308         movl    %eax, %cr0              / set the cr0 register correctly and
 309                                         / override the BIOS setup
 310 
 311         /*
 312          * If bit 21 of eflags can be flipped, then cpuid is present
 313          * and enabled.
 314          */
 315         pushfl
 316         popl    %ecx
 317         movl    %ecx, %eax
 318         xorl    $PS_ID, %eax            / try complemented bit
 319         pushl   %eax
 320         popfl
 321         pushfl
 322         popl    %eax
 323         cmpl    %eax, %ecx
 324         jne     have_cpuid
 325 
 326         /*
 327          * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
 328          * div does not modify the cc flags on Cyrix, even though this may
 329          * also be true for other vendors, this is generally true only for
 330          * newer models from those vendors that support and do not disable
 331          * cpuid (usually because cpuid cannot be disabled)
 332          */
 333 
 334         /*
 335          * clear cc flags
 336          */
 337         xorb    %ah, %ah
 338         sahf
 339 
 340         /*
 341          * perform 5/2 test
 342          */
 343         movw    $5, %ax
 344         movb    $2, %bl
 345         divb    %bl
 346 
 347         lahf
 348         cmpb    $2, %ah
 349         jne     cpu_486
 350 
 351         /*
 352          * div did not modify the cc flags, chances are the vendor is Cyrix
 353          * assume the vendor is Cyrix and use the CCR's to enable cpuid
 354          */
 355         .set    CYRIX_CRI, 0x22         / CR Index Register
 356         .set    CYRIX_CRD, 0x23         / CR Data Register
 357 
 358         .set    CYRIX_CCR3, 0xc3        / Config Control Reg 3
 359         .set    CYRIX_CCR4, 0xe8        / Config Control Reg 4
 360         .set    CYRIX_DIR0, 0xfe        / Device Identification Reg 0
 361         .set    CYRIX_DIR1, 0xff        / Device Identification Reg 1
 362 
 363         /*
 364          * even if the cpu vendor is Cyrix and the motherboard/chipset
 365          * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
 366          * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
 367          * the reads and writes of 0x21 are guaranteed to be off-chip of
 368          * the cpu
 369          */
 370 
 371         /*
 372          * enable read of ISR at I/O port 0x20
 373          */
 374         movb    $0xb, %al
 375         outb    $MCMD_PORT
 376 
 377         /*
 378          * read IMR and store in %bl
 379          */
 380         inb     $MIMR_PORT
 381         movb    %al, %bl
 382 
 383         /*
 384          * mask out all interrupts so that ISR will not change
 385          */
 386         movb    $0xff, %al
 387         outb    $MIMR_PORT
 388 
 389         /*
 390          * reads of I/O port 0x22 on Cyrix are always directed off-chip
 391          * make use of I/O pull-up to test for an unknown device on 0x22
 392          */
 393         inb     $CYRIX_CRI
 394         cmpb    $0xff, %al
 395         je      port_22_free
 396 
 397         /*
 398          * motherboard/chipset vendor may be ignoring line A1 of I/O address
 399          */
 400         movb    %al, %cl
 401 
 402         /*
 403          * if the ISR and the value read from 0x22 do not match then we have
 404          * detected some unknown device, probably a chipset, at 0x22
 405          */
 406         inb     $MCMD_PORT
 407         cmpb    %al, %cl
 408         jne     restore_IMR
 409 
 410 port_22_free:
 411         /*
 412          * now test to see if some unknown device is using I/O port 0x23
 413          *
 414          * read the external I/O port at 0x23
 415          */
 416         inb     $CYRIX_CRD
 417 
 418         /*
 419          * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
 420          * IMR is 0xff so both tests are performed simultaneously.
 421          */
 422         cmpb    $0xff, %al
 423         jne     restore_IMR
 424 
 425         /*
 426          * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
 427          * record the type and fix it later if not.
 428          */
 429         movl    $X86_VENDOR_Cyrix, x86_vendor
 430         movl    $X86_TYPE_CYRIX_486, x86_type
 431 
 432         /*
 433          * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
 434          *
 435          * load CCR3 index into CCR index register
 436          */
 437 
 438         movb    $CYRIX_CCR3, %al
 439         outb    $CYRIX_CRI
 440 
 441         /*
 442          * If we are not a Cyrix cpu, then we have performed an external I/O
 443          * cycle. If the CCR index was not valid for this Cyrix model, we may
 444          * have performed an external I/O cycle as well. In these cases and
 445          * if the motherboard/chipset vendor ignores I/O address line A1,
 446          * then the PIC will have IRQ3 set at the lowest priority as a side
 447          * effect of the above outb. We are reasonalbly confident that there
 448          * is not an unknown device on I/O port 0x22, so there should have been
 449          * no unpredictable side-effect of the above outb.
 450          */
 451 
 452         /*
 453          * read CCR3
 454          */
 455         inb     $CYRIX_CRD
 456 
 457         /*
 458          * If we are not a Cyrix cpu the inb above produced an external I/O
 459          * cycle. If we are a Cyrix model that does not support CCR3 wex
 460          * produced an external I/O cycle. In all known Cyrix models 6x86 and
 461          * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
 462          * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
 463          * reserved as well. It is highly unlikely that CCR3 contains the value
 464          * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
 465          * deduce we are not a Cyrix with support for cpuid if so.
 466          */
 467         cmpb    $0xff, %al
 468         je      restore_PIC
 469 
 470         /*
 471          * There exist 486 ISA Cyrix chips that support CCR3 but do not support
 472          * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
 473          * cycles, the exact behavior is model specific and undocumented.
 474          * Unfortunately these external I/O cycles may confuse some PIC's beyond
 475          * recovery. Fortunatetly we can use the following undocumented trick:
 476          * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
 477          * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
 478          * to work on all Cyrix cpu's which support cpuid.
 479          */
 480         movb    %al, %dl
 481         xorb    $0x10, %dl
 482         movb    %al, %cl
 483 
 484         /*
 485          * write back CRR3 with toggled bit 4 to CCR3
 486          */
 487         movb    $CYRIX_CCR3, %al
 488         outb    $CYRIX_CRI
 489 
 490         movb    %dl, %al
 491         outb    $CYRIX_CRD
 492 
 493         /*
 494          * read CCR3
 495          */
 496         movb    $CYRIX_CCR3, %al
 497         outb    $CYRIX_CRI
 498         inb     $CYRIX_CRD
 499         movb    %al, %dl
 500 
 501         /*
 502          * restore CCR3
 503          */
 504         movb    $CYRIX_CCR3, %al
 505         outb    $CYRIX_CRI
 506 
 507         movb    %cl, %al
 508         outb    $CYRIX_CRD
 509 
 510         /*
 511          * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
 512          * case we do not have cpuid anyway
 513          */
 514         andb    $0x10, %al
 515         andb    $0x10, %dl
 516         cmpb    %al, %dl
 517         je      restore_PIC
 518 
 519         /*
 520          * read DIR0
 521          */
 522         movb    $CYRIX_DIR0, %al
 523         outb    $CYRIX_CRI
 524         inb     $CYRIX_CRD
 525 
 526         /*
 527          * test for pull-up
 528          */
 529         cmpb    $0xff, %al
 530         je      restore_PIC
 531 
 532         /*
 533          * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
 534          * future use. If Cyrix ever produces a cpu that supports cpuid with
 535          * these ids, the following test will have to change. For now we remain
 536          * pessimistic since the formats of the CRR's may be different then.
 537          *
 538          * test for at least a 6x86, to see if we support both MAPEN and CPUID
 539          */
 540         cmpb    $0x30, %al
 541         jb      restore_IMR
 542 
 543         /*
 544          * enable MAPEN
 545          */
 546         movb    $CYRIX_CCR3, %al
 547         outb    $CYRIX_CRI
 548 
 549         andb    $0xf, %cl
 550         movb    %cl, %al
 551         orb     $0x10, %al
 552         outb    $CYRIX_CRD
 553 
 554         /*
 555          * select CCR4
 556          */
 557         movb    $CYRIX_CCR4, %al
 558         outb    $CYRIX_CRI
 559 
 560         /*
 561          * read CCR4
 562          */
 563         inb     $CYRIX_CRD
 564 
 565         /*
 566          * enable cpuid
 567          */
 568         orb     $0x80, %al
 569         movb    %al, %dl
 570 
 571         /*
 572          * select CCR4
 573          */
 574         movb    $CYRIX_CCR4, %al
 575         outb    $CYRIX_CRI
 576 
 577         /*
 578          * write CCR4
 579          */
 580         movb    %dl, %al
 581         outb    $CYRIX_CRD
 582 
 583         /*
 584          * select CCR3
 585          */
 586         movb    $CYRIX_CCR3, %al
 587         outb    $CYRIX_CRI
 588 
 589         /*
 590          * disable MAPEN and write CCR3
 591          */
 592         movb    %cl, %al
 593         outb    $CYRIX_CRD
 594 
 595         /*
 596          * restore IMR
 597          */
 598         movb    %bl, %al
 599         outb    $MIMR_PORT
 600 
 601         /*
 602          * test to see if cpuid available
 603          */
 604         pushfl
 605         popl    %ecx
 606         movl    %ecx, %eax
 607         xorl    $PS_ID, %eax            / try complemented bit
 608         pushl   %eax
 609         popfl
 610         pushfl
 611         popl    %eax
 612         cmpl    %eax, %ecx
 613         jne     have_cpuid
 614         jmp     cpu_486
 615 
 616 restore_PIC:
 617         /*
 618          * In case the motherboard/chipset vendor is ignoring line A1 of the
 619          * I/O address, we set the PIC priorities to sane values.
 620          */
 621         movb    $0xc7, %al      / irq 7 lowest priority
 622         outb    $MCMD_PORT
 623 
 624 restore_IMR:
 625         movb    %bl, %al
 626         outb    $MIMR_PORT
 627         jmp     cpu_486
 628 
 629 have_cpuid:
 630         /*
 631          * cpuid instruction present
 632          */
 633         bts     $X86FSET_CPUID, x86_featureset  / Just to set; Ignore the CF
 634         movl    $0, %eax
 635         cpuid
 636 
 637         movl    %ebx, cpu_vendor
 638         movl    %edx, cpu_vendor+4
 639         movl    %ecx, cpu_vendor+8
 640 
 641         /*
 642          * early cyrix cpus are somewhat strange and need to be
 643          * probed in curious ways to determine their identity
 644          */
 645 
 646         leal    cpu_vendor, %esi
 647         leal    CyrixInstead, %edi
 648         movl    $12, %ecx
 649         repz
 650           cmpsb
 651         je      vendor_is_cyrix
 652 
 653         / let mlsetup()/cpuid_pass1() handle everything else in C
 654 
 655         jmp     cpu_done
 656 
 657 is486:
 658         /*
 659          * test to see if a useful cpuid
 660          */
 661         testl   %eax, %eax
 662         jz      isa486
 663 
 664         movl    $1, %eax
 665         cpuid
 666 
 667         movl    %eax, %ebx
 668         andl    $0xF00, %ebx
 669         cmpl    $0x400, %ebx
 670         je      isa486
 671 
 672         rep;    ret     /* use 2 byte return instruction */
 673                         /* AMD Software Optimization Guide - Section 6.2 */
 674 isa486:
 675         /*
 676          * lose the return address
 677          */
 678         popl    %eax
 679         jmp     cpu_486
 680 
 681 vendor_is_cyrix:
 682         call    is486
 683 
 684         /*
 685          * Processor signature and feature flags for Cyrix are insane.
 686          * BIOS can play with semi-documented registers, so cpuid must be used
 687          * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
 688          * Keep the family in %ebx and feature flags in %edx until not needed
 689          */
 690 
 691         /*
 692          * read DIR0
 693          */
 694         movb    $CYRIX_DIR0, %al
 695         outb    $CYRIX_CRI
 696         inb     $CYRIX_CRD
 697 
 698         /*
 699          * First we handle the cases where we are a 6x86 or 6x86L.
 700          * The 6x86 is basically a 486, the only reliable bit in the
 701          * feature flags is for FPU. The 6x86L is better, unfortunately
 702          * there is no really good way to distinguish between these two
 703          * cpu's. We are pessimistic and when in doubt assume 6x86.
 704          */
 705 
 706         cmpb    $0x40, %al
 707         jae     maybeGX
 708 
 709         /*
 710          * We are an M1, either a 6x86 or 6x86L.
 711          */
 712         cmpb    $0x30, %al
 713         je      maybe6x86L
 714         cmpb    $0x31, %al
 715         je      maybe6x86L
 716         cmpb    $0x34, %al
 717         je      maybe6x86L
 718         cmpb    $0x35, %al
 719         je      maybe6x86L
 720 
 721         /*
 722          * although it is possible that we are a 6x86L, the cpu and
 723          * documentation are so buggy, we just do not care.
 724          */
 725         jmp     likely6x86
 726 
 727 maybe6x86L:
 728         /*
 729          *  read DIR1
 730          */
 731         movb    $CYRIX_DIR1, %al
 732         outb    $CYRIX_CRI
 733         inb     $CYRIX_CRD
 734         cmpb    $0x22, %al
 735         jb      likely6x86
 736 
 737         /*
 738          * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
 739          */
 740         movl    $X86_TYPE_CYRIX_6x86L, x86_type
 741         jmp     coma_bug
 742 
 743 likely6x86:
 744         /*
 745          * We are likely a 6x86, or a 6x86L without a way of knowing
 746          *
 747          * The 6x86 has NO Pentium or Pentium Pro compatible features even
 748          * though it claims to be a Pentium Pro compatible!
 749          *
 750          * The 6x86 core used in the 6x86 may have most of the Pentium system
 751          * registers and largely conform to the Pentium System Programming
 752          * Reference. Documentation on these parts is long gone. Treat it as
 753          * a crippled Pentium and hope for the best.
 754          */
 755 
 756         movl    $X86_TYPE_CYRIX_6x86, x86_type
 757         jmp     coma_bug
 758 
 759 maybeGX:
 760         /*
 761          * Now we check whether we are a MediaGX or GXm. We have particular
 762          * reason for concern here. Even though most of the GXm's
 763          * report having TSC in the cpuid feature flags, the TSC may be
 764          * horribly broken. What is worse, is that MediaGX's are basically
 765          * 486's while the good GXm's are more like Pentium Pro's!
 766          */
 767 
 768         cmpb    $0x50, %al
 769         jae     maybeM2
 770 
 771         /*
 772          * We are either a MediaGX (sometimes called a Gx86) or GXm
 773          */
 774 
 775         cmpb    $41, %al
 776         je      maybeMediaGX
 777 
 778         cmpb    $44, %al
 779         jb      maybeGXm
 780 
 781         cmpb    $47, %al
 782         jbe     maybeMediaGX
 783 
 784         /*
 785          * We do not honestly know what we are, so assume a MediaGX
 786          */
 787         jmp     media_gx
 788 
 789 maybeGXm:
 790         /*
 791          * It is still possible we are either a MediaGX or GXm, trust cpuid
 792          * family should be 5 on a GXm
 793          */
 794         cmpl    $0x500, %ebx
 795         je      GXm
 796 
 797         /*
 798          * BIOS/Cyrix might set family to 6 on a GXm
 799          */
 800         cmpl    $0x600, %ebx
 801         jne     media_gx
 802 
 803 GXm:
 804         movl    $X86_TYPE_CYRIX_GXm, x86_type
 805         jmp     cpu_done
 806 
 807 maybeMediaGX:
 808         /*
 809          * read DIR1
 810          */
 811         movb    $CYRIX_DIR1, %al
 812         outb    $CYRIX_CRI
 813         inb     $CYRIX_CRD
 814 
 815         cmpb    $0x30, %al
 816         jae     maybeGXm
 817 
 818         /*
 819          * we are a MediaGX for which we do not trust cpuid
 820          */
 821 media_gx:
 822         movl    $X86_TYPE_CYRIX_MediaGX, x86_type
 823         jmp     cpu_486
 824 
 825 maybeM2:
 826         /*
 827          * Now we check whether we are a 6x86MX or MII. These cpu's are
 828          * virtually identical, but we care because for the 6x86MX, we
 829          * must work around the coma bug. Also for 6x86MX prior to revision
 830          * 1.4, the TSC may have serious bugs.
 831          */
 832 
 833         cmpb    $0x60, %al
 834         jae     maybeM3
 835 
 836         /*
 837          * family should be 6, but BIOS/Cyrix might set it to 5
 838          */
 839         cmpl    $0x600, %ebx
 840         ja      cpu_486
 841 
 842         /*
 843          *  read DIR1
 844          */
 845         movb    $CYRIX_DIR1, %al
 846         outb    $CYRIX_CRI
 847         inb     $CYRIX_CRD
 848 
 849         cmpb    $0x8, %al
 850         jb      cyrix6x86MX
 851         cmpb    $0x80, %al
 852         jb      MII
 853 
 854 cyrix6x86MX:
 855         /*
 856          * It is altogether unclear how the revision stamped on the cpu
 857          * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
 858          */
 859         movl    $X86_TYPE_CYRIX_6x86MX, x86_type
 860         jmp     coma_bug
 861 
 862 MII:
 863         movl    $X86_TYPE_CYRIX_MII, x86_type
 864 likeMII:
 865         jmp     cpu_done
 866 
 867 maybeM3:
 868         /*
 869          * We are some chip that we cannot identify yet, an MIII perhaps.
 870          * We will be optimistic and hope that the chip is much like an MII,
 871          * and that cpuid is sane. Cyrix seemed to have gotten it right in
 872          * time for the MII, we can only hope it stayed that way.
 873          * Maybe the BIOS or Cyrix is trying to hint at something
 874          */
 875         cmpl    $0x500, %ebx
 876         je      GXm
 877 
 878         cmpb    $0x80, %al
 879         jae     likelyM3
 880 
 881         /*
 882          * Just test for the features Cyrix is known for
 883          */
 884 
 885         jmp     MII
 886 
 887 likelyM3:
 888         /*
 889          * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
 890          * the Cyrix MIII. There may be parts later that use the same ranges
 891          * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
 892          * now we will call anything with a DIR0 of 0x80 or higher an MIII.
 893          * The MIII is supposed to support large pages, but we will believe
 894          * it when we see it. For now we just enable and test for MII features.
 895          */
 896         movl    $X86_TYPE_VIA_CYRIX_III, x86_type
 897         jmp     likeMII
 898 
 899 coma_bug:
 900 
 901 /*
 902  * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
 903  * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
 904  * cycles except page table accesses and interrupt ACK cycles do not assert
 905  * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
 906  * Due to a bug in the cpu core involving over-optimization of branch
 907  * prediction, register renaming, and execution of instructions down both the
 908  * X and Y pipes for the xchgl instruction, short loops can be written that
 909  * never de-assert LOCK# from one invocation of the loop to the next, ad
 910  * infinitum. The undesirable effect of this situation is that interrupts are
 911  * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
 912  * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
 913  * longer do, unless they are page table accesses or interrupt ACK cycles.
 914  * With LOCK# not asserted, these bus cycles are now cached. This can cause
 915  * undesirable behaviour if the ARR's are not configured correctly. Solaris
 916  * does not configure the ARR's, nor does it provide any useful mechanism for
 917  * doing so, thus the ideal workaround is not viable. Fortunately, the only
 918  * known exploits for this bug involve the xchgl instruction specifically.
 919  * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
 920  * 6x86MX cpu's which can be used to specify one instruction as a serializing
 921  * instruction. With the xchgl instruction serialized, LOCK# is still
 922  * asserted, but it is the sole instruction for which LOCK# is asserted.
 923  * There is now some added penalty for the xchgl instruction, but the usual
 924  * bus locking is preserved. This ingenious workaround was discovered by
 925  * disassembling a binary provided by Cyrix as a workaround for this bug on
 926  * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
 927  * mentioned in any public errata! The only concern for this workaround is
 928  * that there may be similar undiscovered bugs with other instructions that
 929  * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
 930  * fixed this bug sometime late in 1997 and no other exploits other than
 931  * xchgl have been discovered is good indication that this workaround is
 932  * reasonable.
 933  */
 934 
 935         .set    CYRIX_DBR0, 0x30        / Debug Register 0
 936         .set    CYRIX_DBR1, 0x31        / Debug Register 1
 937         .set    CYRIX_DBR2, 0x32        / Debug Register 2
 938         .set    CYRIX_DBR3, 0x33        / Debug Register 3
 939         .set    CYRIX_DOR, 0x3c         / Debug Opcode Register
 940 
 941         /*
 942          * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
 943          * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
 944          * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
 945          * and 0xff. Then, DOR is loaded with the one byte opcode.
 946          */
 947 
 948         /*
 949          * select CCR3
 950          */
 951         movb    $CYRIX_CCR3, %al
 952         outb    $CYRIX_CRI
 953 
 954         /*
 955          * read CCR3 and mask out MAPEN
 956          */
 957         inb     $CYRIX_CRD
 958         andb    $0xf, %al
 959 
 960         /*
 961          * save masked CCR3 in %ah
 962          */
 963         movb    %al, %ah
 964 
 965         /*
 966          * select CCR3
 967          */
 968         movb    $CYRIX_CCR3, %al
 969         outb    $CYRIX_CRI
 970 
 971         /*
 972          * enable MAPEN
 973          */
 974         movb    %ah, %al
 975         orb     $0x10, %al
 976         outb    $CYRIX_CRD
 977 
 978         /*
 979          * read DBR0
 980          */
 981         movb    $CYRIX_DBR0, %al
 982         outb    $CYRIX_CRI
 983         inb     $CYRIX_CRD
 984 
 985         /*
 986          * disable MATCH and save in %bh
 987          */
 988         orb     $0x80, %al
 989         movb    %al, %bh
 990 
 991         /*
 992          * write DBR0
 993          */
 994         movb    $CYRIX_DBR0, %al
 995         outb    $CYRIX_CRI
 996         movb    %bh, %al
 997         outb    $CYRIX_CRD
 998 
 999         /*
1000          * write DBR1
1001          */
1002         movb    $CYRIX_DBR1, %al
1003         outb    $CYRIX_CRI
1004         movb    $0xf8, %al
1005         outb    $CYRIX_CRD
1006 
1007         /*
1008          * write DBR2
1009          */
1010         movb    $CYRIX_DBR2, %al
1011         outb    $CYRIX_CRI
1012         movb    $0x7f, %al
1013         outb    $CYRIX_CRD
1014 
1015         /*
1016          * write DBR3
1017          */
1018         movb    $CYRIX_DBR3, %al
1019         outb    $CYRIX_CRI
1020         xorb    %al, %al
1021         outb    $CYRIX_CRD
1022 
1023         /*
1024          * write DOR
1025          */
1026         movb    $CYRIX_DOR, %al
1027         outb    $CYRIX_CRI
1028         movb    $0x87, %al
1029         outb    $CYRIX_CRD
1030 
1031         /*
1032          * enable MATCH
1033          */
1034         movb    $CYRIX_DBR0, %al
1035         outb    $CYRIX_CRI
1036         movb    %bh, %al
1037         andb    $0x7f, %al
1038         outb    $CYRIX_CRD
1039 
1040         /*
1041          * disable MAPEN
1042          */
1043         movb    $0xc3, %al
1044         outb    $CYRIX_CRI
1045         movb    %ah, %al
1046         outb    $CYRIX_CRD
1047 
1048         jmp     cpu_done
1049 
1050 cpu_done:
1051 
1052         popfl                                   /* Restore original FLAGS */
1053         popal                                   /* Restore all registers */
1054 
1055 #endif  /* !__xpv */
1056 
1057         /*
1058          *  mlsetup(%esp) gets called.
1059          */
1060         pushl   %esp
1061         call    mlsetup
1062         addl    $4, %esp
1063 
1064         /*
1065          * We change our appearance to look like the real thread 0.
1066          * (NOTE: making ourselves to be a real thread may be a noop)
1067          * main() gets called.  (NOTE: main() never returns).
1068          */
1069         call    main
1070         /* NOTREACHED */
1071         pushl   $__return_from_main
1072         call    panic
1073 
1074         /* NOTREACHED */
1075 cpu_486:
1076         pushl   $__unsupported_cpu
1077         call    panic
1078         SET_SIZE(_locore_start)
1079 
1080 #endif  /* __lint */
1081 #endif  /* !__amd64 */
1082 
1083 
1084 /*
1085  *  For stack layout, see privregs.h
1086  *  When cmntrap gets called, the error code and trap number have been pushed.
1087  *  When cmntrap_pushed gets called, the entire struct regs has been pushed.
1088  */
1089 
1090 #if defined(__lint)
1091 
1092 /* ARGSUSED */
1093 void
1094 cmntrap()
1095 {}
1096 
1097 #else   /* __lint */
1098 
1099         .globl  trap            /* C handler called below */
1100 
1101 #if defined(__amd64)
1102 
1103         ENTRY_NP2(cmntrap, _cmntrap)
1104 
1105         INTR_PUSH
1106 
1107         ALTENTRY(cmntrap_pushed)
1108 
1109         movq    %rsp, %rbp
1110 
1111         /*
1112          * - if this is a #pf i.e. T_PGFLT, %r15 is live
1113          *   and contains the faulting address i.e. a copy of %cr2
1114          *
1115          * - if this is a #db i.e. T_SGLSTP, %r15 is live
1116          *   and contains the value of %db6
1117          */
1118 
1119         TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1120         TRACE_REGS(%rdi, %rsp, %rbx, %rcx)      /* Uses label 9 */
1121         TRACE_STAMP(%rdi)               /* Clobbers %eax, %edx, uses 9 */
1122 
1123         /*
1124          * We must first check if DTrace has set its NOFAULT bit.  This
1125          * regrettably must happen before the trap stack is recorded, because
1126          * this requires a call to getpcstack() and may induce recursion if an
1127          * fbt::getpcstack: enabling is inducing the bad load.
1128          */
1129         movl    %gs:CPU_ID, %eax
1130         shlq    $CPU_CORE_SHIFT, %rax
1131         leaq    cpu_core(%rip), %r8
1132         addq    %r8, %rax
1133         movw    CPUC_DTRACE_FLAGS(%rax), %cx
1134         testw   $CPU_DTRACE_NOFAULT, %cx
1135         jnz     .dtrace_induced
1136 
1137         TRACE_STACK(%rdi)
1138 
1139         movq    %rbp, %rdi
1140         movq    %r15, %rsi
1141         movl    %gs:CPU_ID, %edx
1142 
1143         /*
1144          * We know that this isn't a DTrace non-faulting load; we can now safely
1145          * reenable interrupts.  (In the case of pagefaults, we enter through an
1146          * interrupt gate.)
1147          */
1148         ENABLE_INTR_FLAGS
1149 
1150         call    trap            /* trap(rp, addr, cpuid) handles all traps */
1151         jmp     _sys_rtt
1152 
1153 .dtrace_induced:
1154         cmpw    $KCS_SEL, REGOFF_CS(%rbp)       /* test CS for user-mode trap */
1155         jne     3f                              /* if from user, panic */
1156 
1157         cmpl    $T_PGFLT, REGOFF_TRAPNO(%rbp)
1158         je      1f
1159 
1160         cmpl    $T_GPFLT, REGOFF_TRAPNO(%rbp)
1161         je      0f
1162 
1163         cmpl    $T_ILLINST, REGOFF_TRAPNO(%rbp)
1164         je      0f
1165 
1166         cmpl    $T_ZERODIV, REGOFF_TRAPNO(%rbp)
1167         jne     4f                              /* if not PF/GP/UD/DE, panic */
1168 
1169         orw     $CPU_DTRACE_DIVZERO, %cx
1170         movw    %cx, CPUC_DTRACE_FLAGS(%rax)
1171         jmp     2f
1172 
1173         /*
1174          * If we've taken a GPF, we don't (unfortunately) have the address that
1175          * induced the fault.  So instead of setting the fault to BADADDR,
1176          * we'll set the fault to ILLOP.
1177          */
1178 0:
1179         orw     $CPU_DTRACE_ILLOP, %cx
1180         movw    %cx, CPUC_DTRACE_FLAGS(%rax)
1181         jmp     2f
1182 1:
1183         orw     $CPU_DTRACE_BADADDR, %cx
1184         movw    %cx, CPUC_DTRACE_FLAGS(%rax)    /* set fault to bad addr */
1185         movq    %r15, CPUC_DTRACE_ILLVAL(%rax)
1186                                             /* fault addr is illegal value */
1187 2:
1188         movq    REGOFF_RIP(%rbp), %rdi
1189         movq    %rdi, %r12
1190         call    dtrace_instr_size
1191         addq    %rax, %r12
1192         movq    %r12, REGOFF_RIP(%rbp)
1193         INTR_POP
1194         jmp     tr_iret_auto
1195         /*NOTREACHED*/
1196 3:
1197         leaq    dtrace_badflags(%rip), %rdi
1198         xorl    %eax, %eax
1199         call    panic
1200 4:
1201         leaq    dtrace_badtrap(%rip), %rdi
1202         xorl    %eax, %eax
1203         call    panic
1204         SET_SIZE(cmntrap_pushed)
1205         SET_SIZE(cmntrap)
1206         SET_SIZE(_cmntrap)
1207 
1208 #elif defined(__i386)
1209 
1210 
1211         ENTRY_NP2(cmntrap, _cmntrap)
1212 
1213         INTR_PUSH
1214 
1215         ALTENTRY(cmntrap_pushed)
1216 
1217         movl    %esp, %ebp
1218 
1219         /*
1220          * - if this is a #pf i.e. T_PGFLT, %esi is live
1221          *   and contains the faulting address i.e. a copy of %cr2
1222          *
1223          * - if this is a #db i.e. T_SGLSTP, %esi is live
1224          *   and contains the value of %db6
1225          */
1226 
1227         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1228         TRACE_REGS(%edi, %esp, %ebx, %ecx)      /* Uses label 9 */
1229         TRACE_STAMP(%edi)               /* Clobbers %eax, %edx, uses 9 */
1230 
1231         /*
1232          * We must first check if DTrace has set its NOFAULT bit.  This
1233          * regrettably must happen before the trap stack is recorded, because
1234          * this requires a call to getpcstack() and may induce recursion if an
1235          * fbt::getpcstack: enabling is inducing the bad load.
1236          */
1237         movl    %gs:CPU_ID, %eax
1238         shll    $CPU_CORE_SHIFT, %eax
1239         addl    $cpu_core, %eax
1240         movw    CPUC_DTRACE_FLAGS(%eax), %cx
1241         testw   $CPU_DTRACE_NOFAULT, %cx
1242         jnz     .dtrace_induced
1243 
1244         TRACE_STACK(%edi)
1245 
1246         pushl   %gs:CPU_ID
1247         pushl   %esi            /* fault address for PGFLTs */
1248         pushl   %ebp            /* &regs */
1249 
1250         /*
1251          * We know that this isn't a DTrace non-faulting load; we can now safely
1252          * reenable interrupts.  (In the case of pagefaults, we enter through an
1253          * interrupt gate.)
1254          */
1255         ENABLE_INTR_FLAGS
1256 
1257         call    trap            /* trap(rp, addr, cpuid) handles all traps */
1258         addl    $12, %esp       /* get argument off stack */
1259         jmp     _sys_rtt
1260 
1261 .dtrace_induced:
1262         cmpw    $KCS_SEL, REGOFF_CS(%ebp)       /* test CS for user-mode trap */
1263         jne     3f                              /* if from user, panic */
1264 
1265         cmpl    $T_PGFLT, REGOFF_TRAPNO(%ebp)
1266         je      1f
1267 
1268         cmpl    $T_GPFLT, REGOFF_TRAPNO(%ebp)
1269         je      0f
1270 
1271         cmpl    $T_ZERODIV, REGOFF_TRAPNO(%ebp)
1272         jne     4f                              /* if not PF/GP/UD/DE, panic */
1273 
1274         orw     $CPU_DTRACE_DIVZERO, %cx
1275         movw    %cx, CPUC_DTRACE_FLAGS(%eax)
1276         jmp     2f
1277 
1278 0:
1279         /*
1280          * If we've taken a GPF, we don't (unfortunately) have the address that
1281          * induced the fault.  So instead of setting the fault to BADADDR,
1282          * we'll set the fault to ILLOP.
1283          */
1284         orw     $CPU_DTRACE_ILLOP, %cx
1285         movw    %cx, CPUC_DTRACE_FLAGS(%eax)
1286         jmp     2f
1287 1:
1288         orw     $CPU_DTRACE_BADADDR, %cx
1289         movw    %cx, CPUC_DTRACE_FLAGS(%eax)    /* set fault to bad addr */
1290         movl    %esi, CPUC_DTRACE_ILLVAL(%eax)
1291                                             /* fault addr is illegal value */
1292 2:
1293         pushl   REGOFF_EIP(%ebp)
1294         call    dtrace_instr_size
1295         addl    $4, %esp
1296         movl    REGOFF_EIP(%ebp), %ecx
1297         addl    %eax, %ecx
1298         movl    %ecx, REGOFF_EIP(%ebp)
1299         INTR_POP_KERNEL
1300         IRET
1301         /*NOTREACHED*/
1302 3:
1303         pushl   $dtrace_badflags
1304         call    panic
1305 4:
1306         pushl   $dtrace_badtrap
1307         call    panic
1308         SET_SIZE(cmntrap)
1309         SET_SIZE(_cmntrap)
1310 
1311 #endif  /* __i386 */
1312 
1313 /*
1314  * Declare a uintptr_t which has the size of _cmntrap to enable stack
1315  * traceback code to know when a regs structure is on the stack.
1316  */
1317         .globl  _cmntrap_size
1318         .align  CLONGSIZE
1319 _cmntrap_size:
1320         .NWORD  . - _cmntrap
1321         .type   _cmntrap_size, @object
1322 
1323 dtrace_badflags:
1324         .string "bad DTrace flags"
1325 
1326 dtrace_badtrap:
1327         .string "bad DTrace trap"
1328 
1329 #endif  /* __lint */
1330 
1331 #if defined(__lint)
1332 
1333 /* ARGSUSED */
1334 void
1335 cmninttrap()
1336 {}
1337 
1338 #if !defined(__xpv)
1339 void
1340 bop_trap_handler(void)
1341 {}
1342 #endif
1343 
1344 #else   /* __lint */
1345 
1346         .globl  trap            /* C handler called below */
1347 
1348 #if defined(__amd64)
1349 
1350         ENTRY_NP(cmninttrap)
1351 
1352         INTR_PUSH
1353         INTGATE_INIT_KERNEL_FLAGS
1354 
1355         TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1356         TRACE_REGS(%rdi, %rsp, %rbx, %rcx)      /* Uses label 9 */
1357         TRACE_STAMP(%rdi)               /* Clobbers %eax, %edx, uses 9 */
1358 
1359         movq    %rsp, %rbp
1360 
1361         movl    %gs:CPU_ID, %edx
1362         xorl    %esi, %esi
1363         movq    %rsp, %rdi
1364         call    trap            /* trap(rp, addr, cpuid) handles all traps */
1365         jmp     _sys_rtt
1366         SET_SIZE(cmninttrap)
1367 
1368 #if !defined(__xpv)
1369         /*
1370          * Handle traps early in boot. Just revectors into C quickly as
1371          * these are always fatal errors.
1372          *
1373          * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
1374          */
1375         ENTRY(bop_trap_handler)
1376         movq    %rsp, %rdi
1377         sub     $8, %rsp
1378         call    bop_trap
1379         SET_SIZE(bop_trap_handler)
1380 #endif
1381 
1382 #elif defined(__i386)
1383 
1384         ENTRY_NP(cmninttrap)
1385 
1386         INTR_PUSH
1387         INTGATE_INIT_KERNEL_FLAGS
1388 
1389         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1390         TRACE_REGS(%edi, %esp, %ebx, %ecx)      /* Uses label 9 */
1391         TRACE_STAMP(%edi)               /* Clobbers %eax, %edx, uses 9 */
1392 
1393         movl    %esp, %ebp
1394 
1395         TRACE_STACK(%edi)
1396 
1397         pushl   %gs:CPU_ID
1398         pushl   $0
1399         pushl   %ebp
1400         call    trap            /* trap(rp, addr, cpuid) handles all traps */
1401         addl    $12, %esp
1402         jmp     _sys_rtt
1403         SET_SIZE(cmninttrap)
1404 
1405 #if !defined(__xpv)
1406         /*
1407          * Handle traps early in boot. Just revectors into C quickly as
1408          * these are always fatal errors.
1409          */
1410         ENTRY(bop_trap_handler)
1411         movl    %esp, %eax
1412         pushl   %eax
1413         call    bop_trap
1414         SET_SIZE(bop_trap_handler)
1415 #endif
1416 
1417 #endif  /* __i386 */
1418 
1419 #endif  /* __lint */
1420 
1421 #if defined(__lint)
1422 
1423 /* ARGSUSED */
1424 void
1425 dtrace_trap()
1426 {}
1427 
1428 #else   /* __lint */
1429 
1430         .globl  dtrace_user_probe
1431 
1432 #if defined(__amd64)
1433 
1434         ENTRY_NP(dtrace_trap)
1435 
1436         INTR_PUSH
1437 
1438         TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1439         TRACE_REGS(%rdi, %rsp, %rbx, %rcx)      /* Uses label 9 */
1440         TRACE_STAMP(%rdi)               /* Clobbers %eax, %edx, uses 9 */
1441 
1442         movq    %rsp, %rbp
1443 
1444         movl    %gs:CPU_ID, %edx
1445 #if defined(__xpv)
1446         movq    %gs:CPU_VCPU_INFO, %rsi
1447         movq    VCPU_INFO_ARCH_CR2(%rsi), %rsi
1448 #else
1449         movq    %cr2, %rsi
1450 #endif
1451         movq    %rsp, %rdi
1452 
1453         ENABLE_INTR_FLAGS
1454 
1455         call    dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1456         jmp     _sys_rtt
1457 
1458         SET_SIZE(dtrace_trap)
1459 
1460 #elif defined(__i386)
1461 
1462         ENTRY_NP(dtrace_trap)
1463 
1464         INTR_PUSH
1465 
1466         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1467         TRACE_REGS(%edi, %esp, %ebx, %ecx)      /* Uses label 9 */
1468         TRACE_STAMP(%edi)               /* Clobbers %eax, %edx, uses 9 */
1469 
1470         movl    %esp, %ebp
1471 
1472         pushl   %gs:CPU_ID
1473 #if defined(__xpv)
1474         movl    %gs:CPU_VCPU_INFO, %eax
1475         movl    VCPU_INFO_ARCH_CR2(%eax), %eax
1476 #else
1477         movl    %cr2, %eax
1478 #endif
1479         pushl   %eax
1480         pushl   %ebp
1481 
1482         ENABLE_INTR_FLAGS
1483 
1484         call    dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1485         addl    $12, %esp               /* get argument off stack */
1486 
1487         jmp     _sys_rtt
1488         SET_SIZE(dtrace_trap)
1489 
1490 #endif  /* __i386 */
1491 
1492 #endif  /* __lint */
1493 
1494 /*
1495  * Return from _sys_trap routine.
1496  */
1497 
1498 #if defined(__lint)
1499 
1500 void
1501 lwp_rtt_initial(void)
1502 {}
1503 
1504 void
1505 lwp_rtt(void)
1506 {}
1507 
1508 void
1509 _sys_rtt(void)
1510 {}
1511 
1512 #else   /* __lint */
1513 
1514         ENTRY_NP(lwp_rtt_initial)
1515         movq    %gs:CPU_THREAD, %r15
1516         movq    T_STACK(%r15), %rsp     /* switch to the thread stack */
1517         movq    %rsp, %rbp
1518         call    __dtrace_probe___proc_start
1519         jmp     _lwp_rtt
1520 
1521         ENTRY_NP(lwp_rtt)
1522 
1523         /*
1524          * r14  lwp
1525          * rdx  lwp->lwp_procp
1526          * r15  curthread
1527          */
1528 
1529         movq    %gs:CPU_THREAD, %r15
1530         movq    T_STACK(%r15), %rsp     /* switch to the thread stack */
1531         movq    %rsp, %rbp
1532 _lwp_rtt:
1533         call    __dtrace_probe___proc_lwp__start
1534         movq    %gs:CPU_LWP, %r14
1535         movq    LWP_PROCP(%r14), %rdx
1536 
1537         /*
1538          * XX64 Is the stack misaligned correctly at this point?
1539          *      If not, we need to do a push before calling anything ..
1540          */
1541 
1542 #if defined(DEBUG)
1543         /*
1544          * If we were to run lwp_savectx at this point -without-
1545          * pcb_rupdate being set to 1, we'd end up sampling the hardware
1546          * state left by the previous running lwp, rather than setting
1547          * the values requested by the lwp creator.  Bad.
1548          */
1549         testb   $0x1, PCB_RUPDATE(%r14)
1550         jne     1f
1551         leaq    _no_pending_updates(%rip), %rdi
1552         movl    $__LINE__, %esi
1553         movq    %r14, %rdx
1554         xorl    %eax, %eax
1555         call    panic
1556 1:
1557 #endif
1558 
1559         /*
1560          * If agent lwp, clear %fs and %gs
1561          */
1562         cmpq    %r15, P_AGENTTP(%rdx)
1563         jne     1f
1564         xorl    %ecx, %ecx
1565         movq    %rcx, REGOFF_FS(%rsp)
1566         movq    %rcx, REGOFF_GS(%rsp)
1567         movw    %cx, LWP_PCB_FS(%r14)
1568         movw    %cx, LWP_PCB_GS(%r14)
1569 1:
1570         call    dtrace_systrace_rtt
1571         movq    REGOFF_RDX(%rsp), %rsi
1572         movq    REGOFF_RAX(%rsp), %rdi
1573         call    post_syscall            /* post_syscall(rval1, rval2) */
1574 
1575         /*
1576          * XXX - may want a fast path that avoids sys_rtt_common in the
1577          * most common case.
1578          */
1579         ALTENTRY(_sys_rtt)
1580         CLI(%rax)                       /* disable interrupts */
1581         ALTENTRY(_sys_rtt_ints_disabled)
1582         movq    %rsp, %rdi              /* pass rp to sys_rtt_common */
1583         call    sys_rtt_common          /* do common sys_rtt tasks */
1584         testq   %rax, %rax              /* returning to userland? */
1585         jz      sr_sup
1586 
1587         /*
1588          * Return to user
1589          */
1590         ASSERT_UPCALL_MASK_IS_SET
1591         cmpw    $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
1592         je      sys_rtt_syscall
1593 
1594         /*
1595          * Return to 32-bit userland
1596          */
1597         ALTENTRY(sys_rtt_syscall32)
1598         USER32_POP
1599         jmp     tr_iret_user
1600         /*NOTREACHED*/
1601 
1602         ALTENTRY(sys_rtt_syscall)
1603         /*
1604          * Return to 64-bit userland
1605          */
1606         USER_POP
1607         ALTENTRY(nopop_sys_rtt_syscall)
1608         jmp     tr_iret_user
1609         /*NOTREACHED*/
1610         SET_SIZE(nopop_sys_rtt_syscall)
1611 
1612         /*
1613          * Return to supervisor
1614          * NOTE: to make the check in trap() that tests if we are executing
1615          * segment register fixup/restore code work properly, sr_sup MUST be
1616          * after _sys_rtt .
1617          */
1618         ALTENTRY(sr_sup)
1619         /*
1620          * Restore regs before doing iretq to kernel mode
1621          */
1622         INTR_POP
1623         jmp     tr_iret_kernel
1624         .globl  _sys_rtt_end
1625 _sys_rtt_end:
1626         /*NOTREACHED*/
1627         SET_SIZE(sr_sup)
1628         SET_SIZE(_sys_rtt_end)
1629         SET_SIZE(lwp_rtt)
1630         SET_SIZE(lwp_rtt_initial)
1631         SET_SIZE(_sys_rtt_ints_disabled)
1632         SET_SIZE(_sys_rtt)
1633         SET_SIZE(sys_rtt_syscall)
1634         SET_SIZE(sys_rtt_syscall32)
1635 
1636 #endif  /* __lint */
1637 
1638 #if defined(__lint)
1639 
1640 /*
1641  * So why do we have to deal with all this crud in the world of ia32?
1642  *
1643  * Basically there are four classes of ia32 implementations, those that do not
1644  * have a TSC, those that have a marginal TSC that is broken to the extent
1645  * that it is useless, those that have a marginal TSC that is not quite so
1646  * horribly broken and can be used with some care, and those that have a
1647  * reliable TSC. This crud has to be here in order to sift through all the
1648  * variants.
1649  */
1650 
1651 /*ARGSUSED*/
1652 uint64_t
1653 freq_tsc(uint32_t *pit_counter)
1654 {
1655         return (0);
1656 }
1657 
1658 #else   /* __lint */
1659 
1660 #if defined(__amd64)
1661 
1662         /*
1663          * XX64 quick and dirty port from the i386 version. Since we
1664          * believe the amd64 tsc is more reliable, could this code be
1665          * simpler?
1666          */
1667         ENTRY_NP(freq_tsc)
1668         pushq   %rbp
1669         movq    %rsp, %rbp
1670         movq    %rdi, %r9       /* save pit_counter */
1671         pushq   %rbx
1672 
1673 / We have a TSC, but we have no way in general to know how reliable it is.
1674 / Usually a marginal TSC behaves appropriately unless not enough time
1675 / elapses between reads. A reliable TSC can be read as often and as rapidly
1676 / as desired. The simplistic approach of reading the TSC counter and
1677 / correlating to the PIT counter cannot be naively followed. Instead estimates
1678 / have to be taken to successively refine a guess at the speed of the cpu
1679 / and then the TSC and PIT counter are correlated. In practice very rarely
1680 / is more than one quick loop required for an estimate. Measures have to be
1681 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1682 / measuring the clock rate of very fast processors.
1683 /
1684 / The following constant can be tuned. It should be such that the loop does
1685 / not take too many nor too few PIT counts to execute. If this value is too
1686 / large, then on slow machines the loop will take a long time, or the PIT
1687 / counter may even wrap. If this value is too small, then on fast machines
1688 / the PIT counter may count so few ticks that the resolution of the PIT
1689 / itself causes a bad guess. Because this code is used in machines with
1690 / marginal TSC's and/or IO, if this value is too small on those, it may
1691 / cause the calculated cpu frequency to vary slightly from boot to boot.
1692 /
1693 / In all cases even if this constant is set inappropriately, the algorithm
1694 / will still work and the caller should be able to handle variances in the
1695 / calculation of cpu frequency, but the calculation will be inefficient and
1696 / take a disproportionate amount of time relative to a well selected value.
1697 / As the slowest supported cpu becomes faster, this constant should be
1698 / carefully increased.
1699 
1700         movl    $0x8000, %ecx
1701 
1702         / to make sure the instruction cache has been warmed
1703         clc
1704 
1705         jmp     freq_tsc_loop
1706 
1707 / The following block of code up to and including the latching of the PIT
1708 / counter after freq_tsc_perf_loop is very critical and very carefully
1709 / written, it should only be modified with great care. freq_tsc_loop to
1710 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1711 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
1712 
1713         .align  32
1714 freq_tsc_loop:
1715         / save the loop count in %ebx
1716         movl    %ecx, %ebx
1717 
1718         / initialize the PIT counter and start a count down
1719         movb    $PIT_LOADMODE, %al
1720         outb    $PITCTL_PORT
1721         movb    $0xff, %al
1722         outb    $PITCTR0_PORT
1723         outb    $PITCTR0_PORT
1724 
1725         / read the TSC and store the TS in %edi:%esi
1726         rdtsc
1727         movl    %eax, %esi
1728 
1729 freq_tsc_perf_loop:
1730         movl    %edx, %edi
1731         movl    %eax, %esi
1732         movl    %edx, %edi
1733         loop    freq_tsc_perf_loop
1734 
1735         / read the TSC and store the LSW in %ecx
1736         rdtsc
1737         movl    %eax, %ecx
1738 
1739         / latch the PIT counter and status
1740         movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
1741         outb    $PITCTL_PORT
1742 
1743         / remember if the icache has been warmed
1744         setc    %ah
1745 
1746         / read the PIT status
1747         inb     $PITCTR0_PORT
1748         shll    $8, %eax
1749 
1750         / read PIT count
1751         inb     $PITCTR0_PORT
1752         shll    $8, %eax
1753         inb     $PITCTR0_PORT
1754         bswap   %eax
1755 
1756         / check to see if the PIT count was loaded into the CE
1757         btw     $_CONST(PITSTAT_NULLCNT+8), %ax
1758         jc      freq_tsc_increase_count
1759 
1760         / check to see if PIT counter wrapped
1761         btw     $_CONST(PITSTAT_OUTPUT+8), %ax
1762         jnc     freq_tsc_pit_did_not_wrap
1763 
1764         / halve count
1765         shrl    $1, %ebx
1766         movl    %ebx, %ecx
1767 
1768         / the instruction cache has been warmed
1769         stc
1770 
1771         jmp     freq_tsc_loop
1772 
1773 freq_tsc_increase_count:
1774         shll    $1, %ebx
1775         jc      freq_tsc_too_fast
1776 
1777         movl    %ebx, %ecx
1778 
1779         / the instruction cache has been warmed
1780         stc
1781 
1782         jmp     freq_tsc_loop
1783 
1784 freq_tsc_pit_did_not_wrap:
1785         roll    $16, %eax
1786 
1787         cmpw    $0x2000, %ax
1788         notw    %ax
1789         jb      freq_tsc_sufficient_duration
1790 
1791 freq_tsc_calculate:
1792         / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1793         / then on the second CLK pulse the CE is decremented, therefore mode 0
1794         / is really a (count + 1) counter, ugh
1795         xorl    %esi, %esi
1796         movw    %ax, %si
1797         incl    %esi
1798 
1799         movl    $0xf000, %eax
1800         mull    %ebx
1801 
1802         / tuck away (target_pit_count * loop_count)
1803         movl    %edx, %ecx
1804         movl    %eax, %ebx
1805 
1806         movl    %esi, %eax
1807         movl    $0xffffffff, %edx
1808         mull    %edx
1809 
1810         addl    %esi, %eax
1811         adcl    $0, %edx
1812 
1813         cmpl    %ecx, %edx
1814         ja      freq_tsc_div_safe
1815         jb      freq_tsc_too_fast
1816 
1817         cmpl    %ebx, %eax
1818         jbe     freq_tsc_too_fast
1819 
1820 freq_tsc_div_safe:
1821         movl    %ecx, %edx
1822         movl    %ebx, %eax
1823 
1824         movl    %esi, %ecx
1825         divl    %ecx
1826 
1827         movl    %eax, %ecx
1828 
1829         / the instruction cache has been warmed
1830         stc
1831 
1832         jmp     freq_tsc_loop
1833 
1834 freq_tsc_sufficient_duration:
1835         / test to see if the icache has been warmed
1836         btl     $16, %eax
1837         jnc     freq_tsc_calculate
1838 
1839         / recall mode 0 is a (count + 1) counter
1840         andl    $0xffff, %eax
1841         incl    %eax
1842 
1843         / save the number of PIT counts
1844         movl    %eax, (%r9)
1845 
1846         / calculate the number of TS's that elapsed
1847         movl    %ecx, %eax
1848         subl    %esi, %eax
1849         sbbl    %edi, %edx
1850 
1851         jmp     freq_tsc_end
1852 
1853 freq_tsc_too_fast:
1854         / return 0 as a 64 bit quantity
1855         xorl    %eax, %eax
1856         xorl    %edx, %edx
1857 
1858 freq_tsc_end:
1859         shlq    $32, %rdx
1860         orq     %rdx, %rax
1861 
1862         popq    %rbx
1863         leaveq
1864         ret
1865         SET_SIZE(freq_tsc)
1866 
1867 #elif defined(__i386)
1868 
1869         ENTRY_NP(freq_tsc)
1870         pushl   %ebp
1871         movl    %esp, %ebp
1872         pushl   %edi
1873         pushl   %esi
1874         pushl   %ebx
1875 
1876 / We have a TSC, but we have no way in general to know how reliable it is.
1877 / Usually a marginal TSC behaves appropriately unless not enough time
1878 / elapses between reads. A reliable TSC can be read as often and as rapidly
1879 / as desired. The simplistic approach of reading the TSC counter and
1880 / correlating to the PIT counter cannot be naively followed. Instead estimates
1881 / have to be taken to successively refine a guess at the speed of the cpu
1882 / and then the TSC and PIT counter are correlated. In practice very rarely
1883 / is more than one quick loop required for an estimate. Measures have to be
1884 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1885 / measuring the clock rate of very fast processors.
1886 /
1887 / The following constant can be tuned. It should be such that the loop does
1888 / not take too many nor too few PIT counts to execute. If this value is too
1889 / large, then on slow machines the loop will take a long time, or the PIT
1890 / counter may even wrap. If this value is too small, then on fast machines
1891 / the PIT counter may count so few ticks that the resolution of the PIT
1892 / itself causes a bad guess. Because this code is used in machines with
1893 / marginal TSC's and/or IO, if this value is too small on those, it may
1894 / cause the calculated cpu frequency to vary slightly from boot to boot.
1895 /
1896 / In all cases even if this constant is set inappropriately, the algorithm
1897 / will still work and the caller should be able to handle variances in the
1898 / calculation of cpu frequency, but the calculation will be inefficient and
1899 / take a disproportionate amount of time relative to a well selected value.
1900 / As the slowest supported cpu becomes faster, this constant should be
1901 / carefully increased.
1902 
1903         movl    $0x8000, %ecx
1904 
1905         / to make sure the instruction cache has been warmed
1906         clc
1907 
1908         jmp     freq_tsc_loop
1909 
1910 / The following block of code up to and including the latching of the PIT
1911 / counter after freq_tsc_perf_loop is very critical and very carefully
1912 / written, it should only be modified with great care. freq_tsc_loop to
1913 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1914 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
1915 
1916         .align  32
1917 freq_tsc_loop:
1918         / save the loop count in %ebx
1919         movl    %ecx, %ebx
1920 
1921         / initialize the PIT counter and start a count down
1922         movb    $PIT_LOADMODE, %al
1923         outb    $PITCTL_PORT
1924         movb    $0xff, %al
1925         outb    $PITCTR0_PORT
1926         outb    $PITCTR0_PORT
1927 
1928         / read the TSC and store the TS in %edi:%esi
1929         rdtsc
1930         movl    %eax, %esi
1931 
1932 freq_tsc_perf_loop:
1933         movl    %edx, %edi
1934         movl    %eax, %esi
1935         movl    %edx, %edi
1936         loop    freq_tsc_perf_loop
1937 
1938         / read the TSC and store the LSW in %ecx
1939         rdtsc
1940         movl    %eax, %ecx
1941 
1942         / latch the PIT counter and status
1943         movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
1944         outb    $PITCTL_PORT
1945 
1946         / remember if the icache has been warmed
1947         setc    %ah
1948 
1949         / read the PIT status
1950         inb     $PITCTR0_PORT
1951         shll    $8, %eax
1952 
1953         / read PIT count
1954         inb     $PITCTR0_PORT
1955         shll    $8, %eax
1956         inb     $PITCTR0_PORT
1957         bswap   %eax
1958 
1959         / check to see if the PIT count was loaded into the CE
1960         btw     $_CONST(PITSTAT_NULLCNT+8), %ax
1961         jc      freq_tsc_increase_count
1962 
1963         / check to see if PIT counter wrapped
1964         btw     $_CONST(PITSTAT_OUTPUT+8), %ax
1965         jnc     freq_tsc_pit_did_not_wrap
1966 
1967         / halve count
1968         shrl    $1, %ebx
1969         movl    %ebx, %ecx
1970 
1971         / the instruction cache has been warmed
1972         stc
1973 
1974         jmp     freq_tsc_loop
1975 
1976 freq_tsc_increase_count:
1977         shll    $1, %ebx
1978         jc      freq_tsc_too_fast
1979 
1980         movl    %ebx, %ecx
1981 
1982         / the instruction cache has been warmed
1983         stc
1984 
1985         jmp     freq_tsc_loop
1986 
1987 freq_tsc_pit_did_not_wrap:
1988         roll    $16, %eax
1989 
1990         cmpw    $0x2000, %ax
1991         notw    %ax
1992         jb      freq_tsc_sufficient_duration
1993 
1994 freq_tsc_calculate:
1995         / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1996         / then on the second CLK pulse the CE is decremented, therefore mode 0
1997         / is really a (count + 1) counter, ugh
1998         xorl    %esi, %esi
1999         movw    %ax, %si
2000         incl    %esi
2001 
2002         movl    $0xf000, %eax
2003         mull    %ebx
2004 
2005         / tuck away (target_pit_count * loop_count)
2006         movl    %edx, %ecx
2007         movl    %eax, %ebx
2008 
2009         movl    %esi, %eax
2010         movl    $0xffffffff, %edx
2011         mull    %edx
2012 
2013         addl    %esi, %eax
2014         adcl    $0, %edx
2015 
2016         cmpl    %ecx, %edx
2017         ja      freq_tsc_div_safe
2018         jb      freq_tsc_too_fast
2019 
2020         cmpl    %ebx, %eax
2021         jbe     freq_tsc_too_fast
2022 
2023 freq_tsc_div_safe:
2024         movl    %ecx, %edx
2025         movl    %ebx, %eax
2026 
2027         movl    %esi, %ecx
2028         divl    %ecx
2029 
2030         movl    %eax, %ecx
2031 
2032         / the instruction cache has been warmed
2033         stc
2034 
2035         jmp     freq_tsc_loop
2036 
2037 freq_tsc_sufficient_duration:
2038         / test to see if the icache has been warmed
2039         btl     $16, %eax
2040         jnc     freq_tsc_calculate
2041 
2042         / recall mode 0 is a (count + 1) counter
2043         andl    $0xffff, %eax
2044         incl    %eax
2045 
2046         / save the number of PIT counts
2047         movl    8(%ebp), %ebx
2048         movl    %eax, (%ebx)
2049 
2050         / calculate the number of TS's that elapsed
2051         movl    %ecx, %eax
2052         subl    %esi, %eax
2053         sbbl    %edi, %edx
2054 
2055         jmp     freq_tsc_end
2056 
2057 freq_tsc_too_fast:
2058         / return 0 as a 64 bit quantity
2059         xorl    %eax, %eax
2060         xorl    %edx, %edx
2061 
2062 freq_tsc_end:
2063         popl    %ebx
2064         popl    %esi
2065         popl    %edi
2066         popl    %ebp
2067         ret
2068         SET_SIZE(freq_tsc)
2069 
2070 #endif  /* __i386 */
2071 #endif  /* __lint */
2072 
2073 #if !defined(__amd64)
2074 #if defined(__lint)
2075 
2076 /*
2077  * We do not have a TSC so we use a block of instructions with well known
2078  * timings.
2079  */
2080 
2081 /*ARGSUSED*/
2082 uint64_t
2083 freq_notsc(uint32_t *pit_counter)
2084 {
2085         return (0);
2086 }
2087 
2088 #else   /* __lint */
2089         ENTRY_NP(freq_notsc)
2090         pushl   %ebp
2091         movl    %esp, %ebp
2092         pushl   %edi
2093         pushl   %esi
2094         pushl   %ebx
2095 
2096         / initial count for the idivl loop
2097         movl    $0x1000, %ecx
2098 
2099         / load the divisor
2100         movl    $1, %ebx
2101 
2102         jmp     freq_notsc_loop
2103 
2104 .align  16
2105 freq_notsc_loop:
2106         / set high 32 bits of dividend to zero
2107         xorl    %edx, %edx
2108 
2109         / save the loop count in %edi
2110         movl    %ecx, %edi
2111 
2112         / initialize the PIT counter and start a count down
2113         movb    $PIT_LOADMODE, %al
2114         outb    $PITCTL_PORT
2115         movb    $0xff, %al
2116         outb    $PITCTR0_PORT
2117         outb    $PITCTR0_PORT
2118 
2119         / set low 32 bits of dividend to zero
2120         xorl    %eax, %eax
2121 
2122 / It is vital that the arguments to idivl be set appropriately because on some
2123 / cpu's this instruction takes more or less clock ticks depending on its
2124 / arguments.
2125 freq_notsc_perf_loop:
2126         idivl   %ebx
2127         idivl   %ebx
2128         idivl   %ebx
2129         idivl   %ebx
2130         idivl   %ebx
2131         loop    freq_notsc_perf_loop
2132 
2133         / latch the PIT counter and status
2134         movb    $_CONST(PIT_READBACK|PIT_READBACKC0), %al
2135         outb    $PITCTL_PORT
2136 
2137         / read the PIT status
2138         inb     $PITCTR0_PORT
2139         shll    $8, %eax
2140 
2141         / read PIT count
2142         inb     $PITCTR0_PORT
2143         shll    $8, %eax
2144         inb     $PITCTR0_PORT
2145         bswap   %eax
2146 
2147         / check to see if the PIT count was loaded into the CE
2148         btw     $_CONST(PITSTAT_NULLCNT+8), %ax
2149         jc      freq_notsc_increase_count
2150 
2151         / check to see if PIT counter wrapped
2152         btw     $_CONST(PITSTAT_OUTPUT+8), %ax
2153         jnc     freq_notsc_pit_did_not_wrap
2154 
2155         / halve count
2156         shrl    $1, %edi
2157         movl    %edi, %ecx
2158 
2159         jmp     freq_notsc_loop
2160 
2161 freq_notsc_increase_count:
2162         shll    $1, %edi
2163         jc      freq_notsc_too_fast
2164 
2165         movl    %edi, %ecx
2166 
2167         jmp     freq_notsc_loop
2168 
2169 freq_notsc_pit_did_not_wrap:
2170         shrl    $16, %eax
2171 
2172         cmpw    $0x2000, %ax
2173         notw    %ax
2174         jb      freq_notsc_sufficient_duration
2175 
2176 freq_notsc_calculate:
2177         / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2178         / then on the second CLK pulse the CE is decremented, therefore mode 0
2179         / is really a (count + 1) counter, ugh
2180         xorl    %esi, %esi
2181         movw    %ax, %si
2182         incl    %esi
2183 
2184         movl    %edi, %eax
2185         movl    $0xf000, %ecx
2186         mull    %ecx
2187 
2188         / tuck away (target_pit_count * loop_count)
2189         movl    %edx, %edi
2190         movl    %eax, %ecx
2191 
2192         movl    %esi, %eax
2193         movl    $0xffffffff, %edx
2194         mull    %edx
2195 
2196         addl    %esi, %eax
2197         adcl    $0, %edx
2198 
2199         cmpl    %edi, %edx
2200         ja      freq_notsc_div_safe
2201         jb      freq_notsc_too_fast
2202 
2203         cmpl    %ecx, %eax
2204         jbe     freq_notsc_too_fast
2205 
2206 freq_notsc_div_safe:
2207         movl    %edi, %edx
2208         movl    %ecx, %eax
2209 
2210         movl    %esi, %ecx
2211         divl    %ecx
2212 
2213         movl    %eax, %ecx
2214 
2215         jmp     freq_notsc_loop
2216 
2217 freq_notsc_sufficient_duration:
2218         / recall mode 0 is a (count + 1) counter
2219         incl    %eax
2220 
2221         / save the number of PIT counts
2222         movl    8(%ebp), %ebx
2223         movl    %eax, (%ebx)
2224 
2225         / calculate the number of cpu clock ticks that elapsed
2226         cmpl    $X86_VENDOR_Cyrix, x86_vendor
2227         jz      freq_notsc_notcyrix
2228 
2229         / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
2230         movl    $86, %eax
2231         jmp     freq_notsc_calculate_tsc
2232 
2233 freq_notsc_notcyrix:
2234         / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
2235         movl    $237, %eax
2236 
2237 freq_notsc_calculate_tsc:
2238         mull    %edi
2239 
2240         jmp     freq_notsc_end
2241 
2242 freq_notsc_too_fast:
2243         / return 0 as a 64 bit quantity
2244         xorl    %eax, %eax
2245         xorl    %edx, %edx
2246 
2247 freq_notsc_end:
2248         popl    %ebx
2249         popl    %esi
2250         popl    %edi
2251         popl    %ebp
2252 
2253         ret
2254         SET_SIZE(freq_notsc)
2255 
2256 #endif  /* __lint */
2257 #endif  /* !__amd64 */
2258 
2259 #if !defined(__lint)
2260         .data
2261 #if !defined(__amd64)
2262         .align  4
2263 cpu_vendor:
2264         .long   0, 0, 0         /* Vendor ID string returned */
2265 
2266         .globl  CyrixInstead
2267 
2268         .globl  x86_featureset
2269         .globl  x86_type
2270         .globl  x86_vendor
2271 #endif
2272 
2273 #endif  /* __lint */