1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /*
26 * Copyright (c) 2018 Joyent, Inc.
27 */
28
29 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
31 /* All Rights Reserved */
32
33 /* Copyright (c) 1987, 1988 Microsoft Corporation */
34 /* All Rights Reserved */
35
36
37 #include <sys/asm_linkage.h>
38 #include <sys/asm_misc.h>
39 #include <sys/regset.h>
40 #include <sys/privregs.h>
41 #include <sys/psw.h>
42 #include <sys/reboot.h>
43 #include <sys/x86_archext.h>
44 #include <sys/machparam.h>
45
46 #if defined(__lint)
47
48 #include <sys/types.h>
49 #include <sys/thread.h>
50 #include <sys/systm.h>
51 #include <sys/lgrp.h>
52 #include <sys/regset.h>
53 #include <sys/link.h>
54 #include <sys/bootconf.h>
55 #include <sys/bootsvcs.h>
56
57 #else /* __lint */
58
59 #include <sys/segments.h>
60 #include <sys/pcb.h>
61 #include <sys/trap.h>
62 #include <sys/ftrace.h>
63 #include <sys/traptrace.h>
64 #include <sys/clock.h>
65 #include <sys/cmn_err.h>
66 #include <sys/pit.h>
67 #include <sys/panic.h>
68
69 #if defined(__xpv)
70 #include <sys/hypervisor.h>
71 #endif
72
73 #include "assym.h"
74
75 /*
76 * Our assumptions:
77 * - We are running in protected-paged mode.
78 * - Interrupts are disabled.
79 * - The GDT and IDT are the callers; we need our copies.
80 * - The kernel's text, initialized data and bss are mapped.
81 *
82 * Our actions:
83 * - Save arguments
84 * - Initialize our stack pointer to the thread 0 stack (t0stack)
85 * and leave room for a phony "struct regs".
86 * - Our GDT and IDT need to get munged.
87 * - Since we are using the boot's GDT descriptors, we need
88 * to copy them into our GDT before we switch to ours.
89 * - We start using our GDT by loading correct values in the
90 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
91 * gs=KGS_SEL).
92 * - The default LDT entry for syscall is set.
93 * - We load the default LDT into the hardware LDT register.
94 * - We load the default TSS into the hardware task register.
95 * - Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
96 * - mlsetup(%esp) gets called.
97 * - We change our appearance to look like the real thread 0.
98 * (NOTE: making ourselves to be a real thread may be a noop)
99 * - main() gets called. (NOTE: main() never returns).
100 *
101 * NOW, the real code!
102 */
103 /*
104 * The very first thing in the kernel's text segment must be a jump
105 * to the os/fakebop.c startup code.
106 */
107 .text
108 jmp _start
109
110 /*
111 * Globals:
112 */
113 .globl _locore_start
114 .globl mlsetup
115 .globl main
116 .globl panic
117 .globl t0stack
118 .globl t0
119 .globl sysp
120 .globl edata
121
122 /*
123 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
124 */
125 .globl bootops
126 .globl bootopsp
127
128 /*
129 * NOTE: t0stack should be the first thing in the data section so that
130 * if it ever overflows, it will fault on the last kernel text page.
131 */
132 .data
133 .comm t0stack, DEFAULTSTKSZ, 32
134 .comm t0, 4094, 32
135
136 #endif /* __lint */
137
138
139 #if defined(__amd64)
140
141 #if defined(__lint)
142
143 /* ARGSUSED */
144 void
145 _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
146 {}
147
148 #else /* __lint */
149
150 /*
151 * kobj_init() vectors us back to here with (note) a slightly different
152 * set of arguments than _start is given (see lint prototypes above).
153 *
154 * XXX Make this less vile, please.
155 */
156 ENTRY_NP(_locore_start)
157
158 /*
159 * %rdi = boot services (should die someday)
160 * %rdx = bootops
161 * end
162 */
163
164 leaq edata(%rip), %rbp /* reference edata for ksyms */
165 movq $0, (%rbp) /* limit stack back trace */
166
167 /*
168 * Initialize our stack pointer to the thread 0 stack (t0stack)
169 * and leave room for a "struct regs" for lwp0. Note that the
170 * stack doesn't actually align to a 16-byte boundary until just
171 * before we call mlsetup because we want to use %rsp to point at
172 * our regs structure.
173 */
174 leaq t0stack(%rip), %rsp
175 addq $_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
176 #if (REGSIZE & 15) == 0
177 subq $8, %rsp
178 #endif
179 /*
180 * Save call back for special x86 boot services vector
181 */
182 movq %rdi, sysp(%rip)
183
184 movq %rdx, bootops(%rip) /* save bootops */
185 movq $bootops, bootopsp(%rip)
186
187 /*
188 * Save arguments and flags, if only for debugging ..
189 */
190 movq %rdi, REGOFF_RDI(%rsp)
191 movq %rsi, REGOFF_RSI(%rsp)
192 movq %rdx, REGOFF_RDX(%rsp)
193 movq %rcx, REGOFF_RCX(%rsp)
194 movq %r8, REGOFF_R8(%rsp)
195 movq %r9, REGOFF_R9(%rsp)
196 pushf
197 popq %r11
198 movq %r11, REGOFF_RFL(%rsp)
199
200 #if !defined(__xpv)
201 /*
202 * Enable write protect and alignment check faults.
203 */
204 movq %cr0, %rax
205 orq $_CONST(CR0_WP|CR0_AM), %rax
206 andq $_BITNOT(CR0_WT|CR0_CE), %rax
207 movq %rax, %cr0
208 #endif /* __xpv */
209
210 /*
211 * (We just assert this works by virtue of being here)
212 */
213 bts $X86FSET_CPUID, x86_featureset(%rip)
214
215 /*
216 * mlsetup() gets called with a struct regs as argument, while
217 * main takes no args and should never return.
218 */
219 xorl %ebp, %ebp
220 movq %rsp, %rdi
221 pushq %rbp
222 /* (stack pointer now aligned on 16-byte boundary right here) */
223 movq %rsp, %rbp
224 call mlsetup
225 call main
226 /* NOTREACHED */
227 leaq __return_from_main(%rip), %rdi
228 xorl %eax, %eax
229 call panic
230 SET_SIZE(_locore_start)
231
232 #endif /* __amd64 */
233 #endif /* __lint */
234
235 #if !defined(__lint)
236
237 __return_from_main:
238 .string "main() returned"
239 __unsupported_cpu:
240 .string "486 style cpu detected - no longer supported!"
241
242 #if defined(DEBUG)
243 _no_pending_updates:
244 .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
245 #endif
246
247 #endif /* !__lint */
248
249 #if !defined(__amd64)
250
251 #if defined(__lint)
252
253 /* ARGSUSED */
254 void
255 _locore_start(struct boot_syscalls *sysp, struct bootops *bop)
256 {}
257
258 #else /* __lint */
259
260 /*
261 * kobj_init() vectors us back to here with (note) a slightly different
262 * set of arguments than _start is given (see lint prototypes above).
263 *
264 * XXX Make this less vile, please.
265 */
266 ENTRY_NP(_locore_start)
267
268 /*
269 * %ecx = boot services (should die someday)
270 * %ebx = bootops
271 */
272 mov $edata, %ebp / edata needs to be defined for ksyms
273 movl $0, (%ebp) / limit stack back trace
274
275 /*
276 * Initialize our stack pointer to the thread 0 stack (t0stack)
277 * and leave room for a phony "struct regs".
278 */
279 movl $t0stack + DEFAULTSTKSZ - REGSIZE, %esp
280
281 /*
282 * Save call back for special x86 boot services vector
283 */
284 mov %ecx, sysp / save call back for boot services
285
286 mov %ebx, bootops / save bootops
287 movl $bootops, bootopsp
288
289
290 /*
291 * Save all registers and flags
292 */
293 pushal
294 pushfl
295
296 #if !defined(__xpv)
297 /*
298 * Override bios settings and enable write protect and
299 * alignment check faults.
300 */
301 movl %cr0, %eax
302
303 /*
304 * enable WP for detecting faults, and enable alignment checking.
305 */
306 orl $_CONST(CR0_WP|CR0_AM), %eax
307 andl $_BITNOT(CR0_WT|CR0_CE), %eax
308 movl %eax, %cr0 / set the cr0 register correctly and
309 / override the BIOS setup
310
311 /*
312 * If bit 21 of eflags can be flipped, then cpuid is present
313 * and enabled.
314 */
315 pushfl
316 popl %ecx
317 movl %ecx, %eax
318 xorl $PS_ID, %eax / try complemented bit
319 pushl %eax
320 popfl
321 pushfl
322 popl %eax
323 cmpl %eax, %ecx
324 jne have_cpuid
325
326 /*
327 * cpuid may be disabled on Cyrix, try to detect Cyrix by the 5/2 test
328 * div does not modify the cc flags on Cyrix, even though this may
329 * also be true for other vendors, this is generally true only for
330 * newer models from those vendors that support and do not disable
331 * cpuid (usually because cpuid cannot be disabled)
332 */
333
334 /*
335 * clear cc flags
336 */
337 xorb %ah, %ah
338 sahf
339
340 /*
341 * perform 5/2 test
342 */
343 movw $5, %ax
344 movb $2, %bl
345 divb %bl
346
347 lahf
348 cmpb $2, %ah
349 jne cpu_486
350
351 /*
352 * div did not modify the cc flags, chances are the vendor is Cyrix
353 * assume the vendor is Cyrix and use the CCR's to enable cpuid
354 */
355 .set CYRIX_CRI, 0x22 / CR Index Register
356 .set CYRIX_CRD, 0x23 / CR Data Register
357
358 .set CYRIX_CCR3, 0xc3 / Config Control Reg 3
359 .set CYRIX_CCR4, 0xe8 / Config Control Reg 4
360 .set CYRIX_DIR0, 0xfe / Device Identification Reg 0
361 .set CYRIX_DIR1, 0xff / Device Identification Reg 1
362
363 /*
364 * even if the cpu vendor is Cyrix and the motherboard/chipset
365 * vendor decided to ignore lines A1-A4 for I/O addresses, I/O port
366 * 0x21 corresponds with 0x23 and since 0x22 is still untouched,
367 * the reads and writes of 0x21 are guaranteed to be off-chip of
368 * the cpu
369 */
370
371 /*
372 * enable read of ISR at I/O port 0x20
373 */
374 movb $0xb, %al
375 outb $MCMD_PORT
376
377 /*
378 * read IMR and store in %bl
379 */
380 inb $MIMR_PORT
381 movb %al, %bl
382
383 /*
384 * mask out all interrupts so that ISR will not change
385 */
386 movb $0xff, %al
387 outb $MIMR_PORT
388
389 /*
390 * reads of I/O port 0x22 on Cyrix are always directed off-chip
391 * make use of I/O pull-up to test for an unknown device on 0x22
392 */
393 inb $CYRIX_CRI
394 cmpb $0xff, %al
395 je port_22_free
396
397 /*
398 * motherboard/chipset vendor may be ignoring line A1 of I/O address
399 */
400 movb %al, %cl
401
402 /*
403 * if the ISR and the value read from 0x22 do not match then we have
404 * detected some unknown device, probably a chipset, at 0x22
405 */
406 inb $MCMD_PORT
407 cmpb %al, %cl
408 jne restore_IMR
409
410 port_22_free:
411 /*
412 * now test to see if some unknown device is using I/O port 0x23
413 *
414 * read the external I/O port at 0x23
415 */
416 inb $CYRIX_CRD
417
418 /*
419 * Test for pull-up at 0x23 or if I/O address line A1 is being ignored.
420 * IMR is 0xff so both tests are performed simultaneously.
421 */
422 cmpb $0xff, %al
423 jne restore_IMR
424
425 /*
426 * We are a Cyrix part. In case we are some model of Cx486 or a Cx586,
427 * record the type and fix it later if not.
428 */
429 movl $X86_VENDOR_Cyrix, x86_vendor
430 movl $X86_TYPE_CYRIX_486, x86_type
431
432 /*
433 * Try to read CCR3. All Cyrix cpu's which support cpuid have CCR3.
434 *
435 * load CCR3 index into CCR index register
436 */
437
438 movb $CYRIX_CCR3, %al
439 outb $CYRIX_CRI
440
441 /*
442 * If we are not a Cyrix cpu, then we have performed an external I/O
443 * cycle. If the CCR index was not valid for this Cyrix model, we may
444 * have performed an external I/O cycle as well. In these cases and
445 * if the motherboard/chipset vendor ignores I/O address line A1,
446 * then the PIC will have IRQ3 set at the lowest priority as a side
447 * effect of the above outb. We are reasonalbly confident that there
448 * is not an unknown device on I/O port 0x22, so there should have been
449 * no unpredictable side-effect of the above outb.
450 */
451
452 /*
453 * read CCR3
454 */
455 inb $CYRIX_CRD
456
457 /*
458 * If we are not a Cyrix cpu the inb above produced an external I/O
459 * cycle. If we are a Cyrix model that does not support CCR3 wex
460 * produced an external I/O cycle. In all known Cyrix models 6x86 and
461 * above, bit 3 of CCR3 is reserved and cannot be set to 1. In all
462 * Cyrix models prior to the 6x86 that supported CCR3, bits 4-7 are
463 * reserved as well. It is highly unlikely that CCR3 contains the value
464 * 0xff. We test to see if I/O port 0x23 is pull-up or the IMR and
465 * deduce we are not a Cyrix with support for cpuid if so.
466 */
467 cmpb $0xff, %al
468 je restore_PIC
469
470 /*
471 * There exist 486 ISA Cyrix chips that support CCR3 but do not support
472 * DIR0 and DIR1. If we try to read DIR0, we may generate external I/O
473 * cycles, the exact behavior is model specific and undocumented.
474 * Unfortunately these external I/O cycles may confuse some PIC's beyond
475 * recovery. Fortunatetly we can use the following undocumented trick:
476 * if bit 4 of CCR3 can be toggled, then DIR0 and DIR1 are supported.
477 * Pleasantly MAPEN contains bit 4 of CCR3, so this trick is guaranteed
478 * to work on all Cyrix cpu's which support cpuid.
479 */
480 movb %al, %dl
481 xorb $0x10, %dl
482 movb %al, %cl
483
484 /*
485 * write back CRR3 with toggled bit 4 to CCR3
486 */
487 movb $CYRIX_CCR3, %al
488 outb $CYRIX_CRI
489
490 movb %dl, %al
491 outb $CYRIX_CRD
492
493 /*
494 * read CCR3
495 */
496 movb $CYRIX_CCR3, %al
497 outb $CYRIX_CRI
498 inb $CYRIX_CRD
499 movb %al, %dl
500
501 /*
502 * restore CCR3
503 */
504 movb $CYRIX_CCR3, %al
505 outb $CYRIX_CRI
506
507 movb %cl, %al
508 outb $CYRIX_CRD
509
510 /*
511 * if bit 4 was not toggled DIR0 and DIR1 are not supported in which
512 * case we do not have cpuid anyway
513 */
514 andb $0x10, %al
515 andb $0x10, %dl
516 cmpb %al, %dl
517 je restore_PIC
518
519 /*
520 * read DIR0
521 */
522 movb $CYRIX_DIR0, %al
523 outb $CYRIX_CRI
524 inb $CYRIX_CRD
525
526 /*
527 * test for pull-up
528 */
529 cmpb $0xff, %al
530 je restore_PIC
531
532 /*
533 * Values of 0x20-0x27 in DIR0 are currently reserved by Cyrix for
534 * future use. If Cyrix ever produces a cpu that supports cpuid with
535 * these ids, the following test will have to change. For now we remain
536 * pessimistic since the formats of the CRR's may be different then.
537 *
538 * test for at least a 6x86, to see if we support both MAPEN and CPUID
539 */
540 cmpb $0x30, %al
541 jb restore_IMR
542
543 /*
544 * enable MAPEN
545 */
546 movb $CYRIX_CCR3, %al
547 outb $CYRIX_CRI
548
549 andb $0xf, %cl
550 movb %cl, %al
551 orb $0x10, %al
552 outb $CYRIX_CRD
553
554 /*
555 * select CCR4
556 */
557 movb $CYRIX_CCR4, %al
558 outb $CYRIX_CRI
559
560 /*
561 * read CCR4
562 */
563 inb $CYRIX_CRD
564
565 /*
566 * enable cpuid
567 */
568 orb $0x80, %al
569 movb %al, %dl
570
571 /*
572 * select CCR4
573 */
574 movb $CYRIX_CCR4, %al
575 outb $CYRIX_CRI
576
577 /*
578 * write CCR4
579 */
580 movb %dl, %al
581 outb $CYRIX_CRD
582
583 /*
584 * select CCR3
585 */
586 movb $CYRIX_CCR3, %al
587 outb $CYRIX_CRI
588
589 /*
590 * disable MAPEN and write CCR3
591 */
592 movb %cl, %al
593 outb $CYRIX_CRD
594
595 /*
596 * restore IMR
597 */
598 movb %bl, %al
599 outb $MIMR_PORT
600
601 /*
602 * test to see if cpuid available
603 */
604 pushfl
605 popl %ecx
606 movl %ecx, %eax
607 xorl $PS_ID, %eax / try complemented bit
608 pushl %eax
609 popfl
610 pushfl
611 popl %eax
612 cmpl %eax, %ecx
613 jne have_cpuid
614 jmp cpu_486
615
616 restore_PIC:
617 /*
618 * In case the motherboard/chipset vendor is ignoring line A1 of the
619 * I/O address, we set the PIC priorities to sane values.
620 */
621 movb $0xc7, %al / irq 7 lowest priority
622 outb $MCMD_PORT
623
624 restore_IMR:
625 movb %bl, %al
626 outb $MIMR_PORT
627 jmp cpu_486
628
629 have_cpuid:
630 /*
631 * cpuid instruction present
632 */
633 bts $X86FSET_CPUID, x86_featureset / Just to set; Ignore the CF
634 movl $0, %eax
635 cpuid
636
637 movl %ebx, cpu_vendor
638 movl %edx, cpu_vendor+4
639 movl %ecx, cpu_vendor+8
640
641 /*
642 * early cyrix cpus are somewhat strange and need to be
643 * probed in curious ways to determine their identity
644 */
645
646 leal cpu_vendor, %esi
647 leal CyrixInstead, %edi
648 movl $12, %ecx
649 repz
650 cmpsb
651 je vendor_is_cyrix
652
653 / let mlsetup()/cpuid_pass1() handle everything else in C
654
655 jmp cpu_done
656
657 is486:
658 /*
659 * test to see if a useful cpuid
660 */
661 testl %eax, %eax
662 jz isa486
663
664 movl $1, %eax
665 cpuid
666
667 movl %eax, %ebx
668 andl $0xF00, %ebx
669 cmpl $0x400, %ebx
670 je isa486
671
672 rep; ret /* use 2 byte return instruction */
673 /* AMD Software Optimization Guide - Section 6.2 */
674 isa486:
675 /*
676 * lose the return address
677 */
678 popl %eax
679 jmp cpu_486
680
681 vendor_is_cyrix:
682 call is486
683
684 /*
685 * Processor signature and feature flags for Cyrix are insane.
686 * BIOS can play with semi-documented registers, so cpuid must be used
687 * cautiously. Since we are Cyrix that has cpuid, we have DIR0 and DIR1
688 * Keep the family in %ebx and feature flags in %edx until not needed
689 */
690
691 /*
692 * read DIR0
693 */
694 movb $CYRIX_DIR0, %al
695 outb $CYRIX_CRI
696 inb $CYRIX_CRD
697
698 /*
699 * First we handle the cases where we are a 6x86 or 6x86L.
700 * The 6x86 is basically a 486, the only reliable bit in the
701 * feature flags is for FPU. The 6x86L is better, unfortunately
702 * there is no really good way to distinguish between these two
703 * cpu's. We are pessimistic and when in doubt assume 6x86.
704 */
705
706 cmpb $0x40, %al
707 jae maybeGX
708
709 /*
710 * We are an M1, either a 6x86 or 6x86L.
711 */
712 cmpb $0x30, %al
713 je maybe6x86L
714 cmpb $0x31, %al
715 je maybe6x86L
716 cmpb $0x34, %al
717 je maybe6x86L
718 cmpb $0x35, %al
719 je maybe6x86L
720
721 /*
722 * although it is possible that we are a 6x86L, the cpu and
723 * documentation are so buggy, we just do not care.
724 */
725 jmp likely6x86
726
727 maybe6x86L:
728 /*
729 * read DIR1
730 */
731 movb $CYRIX_DIR1, %al
732 outb $CYRIX_CRI
733 inb $CYRIX_CRD
734 cmpb $0x22, %al
735 jb likely6x86
736
737 /*
738 * We are a 6x86L, or at least a 6x86 with honest cpuid feature flags
739 */
740 movl $X86_TYPE_CYRIX_6x86L, x86_type
741 jmp coma_bug
742
743 likely6x86:
744 /*
745 * We are likely a 6x86, or a 6x86L without a way of knowing
746 *
747 * The 6x86 has NO Pentium or Pentium Pro compatible features even
748 * though it claims to be a Pentium Pro compatible!
749 *
750 * The 6x86 core used in the 6x86 may have most of the Pentium system
751 * registers and largely conform to the Pentium System Programming
752 * Reference. Documentation on these parts is long gone. Treat it as
753 * a crippled Pentium and hope for the best.
754 */
755
756 movl $X86_TYPE_CYRIX_6x86, x86_type
757 jmp coma_bug
758
759 maybeGX:
760 /*
761 * Now we check whether we are a MediaGX or GXm. We have particular
762 * reason for concern here. Even though most of the GXm's
763 * report having TSC in the cpuid feature flags, the TSC may be
764 * horribly broken. What is worse, is that MediaGX's are basically
765 * 486's while the good GXm's are more like Pentium Pro's!
766 */
767
768 cmpb $0x50, %al
769 jae maybeM2
770
771 /*
772 * We are either a MediaGX (sometimes called a Gx86) or GXm
773 */
774
775 cmpb $41, %al
776 je maybeMediaGX
777
778 cmpb $44, %al
779 jb maybeGXm
780
781 cmpb $47, %al
782 jbe maybeMediaGX
783
784 /*
785 * We do not honestly know what we are, so assume a MediaGX
786 */
787 jmp media_gx
788
789 maybeGXm:
790 /*
791 * It is still possible we are either a MediaGX or GXm, trust cpuid
792 * family should be 5 on a GXm
793 */
794 cmpl $0x500, %ebx
795 je GXm
796
797 /*
798 * BIOS/Cyrix might set family to 6 on a GXm
799 */
800 cmpl $0x600, %ebx
801 jne media_gx
802
803 GXm:
804 movl $X86_TYPE_CYRIX_GXm, x86_type
805 jmp cpu_done
806
807 maybeMediaGX:
808 /*
809 * read DIR1
810 */
811 movb $CYRIX_DIR1, %al
812 outb $CYRIX_CRI
813 inb $CYRIX_CRD
814
815 cmpb $0x30, %al
816 jae maybeGXm
817
818 /*
819 * we are a MediaGX for which we do not trust cpuid
820 */
821 media_gx:
822 movl $X86_TYPE_CYRIX_MediaGX, x86_type
823 jmp cpu_486
824
825 maybeM2:
826 /*
827 * Now we check whether we are a 6x86MX or MII. These cpu's are
828 * virtually identical, but we care because for the 6x86MX, we
829 * must work around the coma bug. Also for 6x86MX prior to revision
830 * 1.4, the TSC may have serious bugs.
831 */
832
833 cmpb $0x60, %al
834 jae maybeM3
835
836 /*
837 * family should be 6, but BIOS/Cyrix might set it to 5
838 */
839 cmpl $0x600, %ebx
840 ja cpu_486
841
842 /*
843 * read DIR1
844 */
845 movb $CYRIX_DIR1, %al
846 outb $CYRIX_CRI
847 inb $CYRIX_CRD
848
849 cmpb $0x8, %al
850 jb cyrix6x86MX
851 cmpb $0x80, %al
852 jb MII
853
854 cyrix6x86MX:
855 /*
856 * It is altogether unclear how the revision stamped on the cpu
857 * maps to the values in DIR0 and DIR1. Just assume TSC is broken.
858 */
859 movl $X86_TYPE_CYRIX_6x86MX, x86_type
860 jmp coma_bug
861
862 MII:
863 movl $X86_TYPE_CYRIX_MII, x86_type
864 likeMII:
865 jmp cpu_done
866
867 maybeM3:
868 /*
869 * We are some chip that we cannot identify yet, an MIII perhaps.
870 * We will be optimistic and hope that the chip is much like an MII,
871 * and that cpuid is sane. Cyrix seemed to have gotten it right in
872 * time for the MII, we can only hope it stayed that way.
873 * Maybe the BIOS or Cyrix is trying to hint at something
874 */
875 cmpl $0x500, %ebx
876 je GXm
877
878 cmpb $0x80, %al
879 jae likelyM3
880
881 /*
882 * Just test for the features Cyrix is known for
883 */
884
885 jmp MII
886
887 likelyM3:
888 /*
889 * DIR0 with values from 0x80 to 0x8f indicates a VIA Cyrix III, aka
890 * the Cyrix MIII. There may be parts later that use the same ranges
891 * for DIR0 with special values in DIR1, maybe the VIA CIII, but for
892 * now we will call anything with a DIR0 of 0x80 or higher an MIII.
893 * The MIII is supposed to support large pages, but we will believe
894 * it when we see it. For now we just enable and test for MII features.
895 */
896 movl $X86_TYPE_VIA_CYRIX_III, x86_type
897 jmp likeMII
898
899 coma_bug:
900
901 /*
902 * With NO_LOCK set to 0 in CCR1, the usual state that BIOS enforces, some
903 * bus cycles are issued with LOCK# asserted. With NO_LOCK set to 1, all bus
904 * cycles except page table accesses and interrupt ACK cycles do not assert
905 * LOCK#. xchgl is an instruction that asserts LOCK# if NO_LOCK is set to 0.
906 * Due to a bug in the cpu core involving over-optimization of branch
907 * prediction, register renaming, and execution of instructions down both the
908 * X and Y pipes for the xchgl instruction, short loops can be written that
909 * never de-assert LOCK# from one invocation of the loop to the next, ad
910 * infinitum. The undesirable effect of this situation is that interrupts are
911 * not serviced. The ideal workaround to this bug would be to set NO_LOCK to
912 * 1. Unfortunately bus cycles that would otherwise have asserted LOCK# no
913 * longer do, unless they are page table accesses or interrupt ACK cycles.
914 * With LOCK# not asserted, these bus cycles are now cached. This can cause
915 * undesirable behaviour if the ARR's are not configured correctly. Solaris
916 * does not configure the ARR's, nor does it provide any useful mechanism for
917 * doing so, thus the ideal workaround is not viable. Fortunately, the only
918 * known exploits for this bug involve the xchgl instruction specifically.
919 * There is a group of undocumented registers on Cyrix 6x86, 6x86L, and
920 * 6x86MX cpu's which can be used to specify one instruction as a serializing
921 * instruction. With the xchgl instruction serialized, LOCK# is still
922 * asserted, but it is the sole instruction for which LOCK# is asserted.
923 * There is now some added penalty for the xchgl instruction, but the usual
924 * bus locking is preserved. This ingenious workaround was discovered by
925 * disassembling a binary provided by Cyrix as a workaround for this bug on
926 * Windows, but its not documented anywhere by Cyrix, nor is the bug actually
927 * mentioned in any public errata! The only concern for this workaround is
928 * that there may be similar undiscovered bugs with other instructions that
929 * assert LOCK# that may be leveraged to similar ends. The fact that Cyrix
930 * fixed this bug sometime late in 1997 and no other exploits other than
931 * xchgl have been discovered is good indication that this workaround is
932 * reasonable.
933 */
934
935 .set CYRIX_DBR0, 0x30 / Debug Register 0
936 .set CYRIX_DBR1, 0x31 / Debug Register 1
937 .set CYRIX_DBR2, 0x32 / Debug Register 2
938 .set CYRIX_DBR3, 0x33 / Debug Register 3
939 .set CYRIX_DOR, 0x3c / Debug Opcode Register
940
941 /*
942 * What is known about DBR1, DBR2, DBR3, and DOR is that for normal
943 * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode
944 * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f,
945 * and 0xff. Then, DOR is loaded with the one byte opcode.
946 */
947
948 /*
949 * select CCR3
950 */
951 movb $CYRIX_CCR3, %al
952 outb $CYRIX_CRI
953
954 /*
955 * read CCR3 and mask out MAPEN
956 */
957 inb $CYRIX_CRD
958 andb $0xf, %al
959
960 /*
961 * save masked CCR3 in %ah
962 */
963 movb %al, %ah
964
965 /*
966 * select CCR3
967 */
968 movb $CYRIX_CCR3, %al
969 outb $CYRIX_CRI
970
971 /*
972 * enable MAPEN
973 */
974 movb %ah, %al
975 orb $0x10, %al
976 outb $CYRIX_CRD
977
978 /*
979 * read DBR0
980 */
981 movb $CYRIX_DBR0, %al
982 outb $CYRIX_CRI
983 inb $CYRIX_CRD
984
985 /*
986 * disable MATCH and save in %bh
987 */
988 orb $0x80, %al
989 movb %al, %bh
990
991 /*
992 * write DBR0
993 */
994 movb $CYRIX_DBR0, %al
995 outb $CYRIX_CRI
996 movb %bh, %al
997 outb $CYRIX_CRD
998
999 /*
1000 * write DBR1
1001 */
1002 movb $CYRIX_DBR1, %al
1003 outb $CYRIX_CRI
1004 movb $0xf8, %al
1005 outb $CYRIX_CRD
1006
1007 /*
1008 * write DBR2
1009 */
1010 movb $CYRIX_DBR2, %al
1011 outb $CYRIX_CRI
1012 movb $0x7f, %al
1013 outb $CYRIX_CRD
1014
1015 /*
1016 * write DBR3
1017 */
1018 movb $CYRIX_DBR3, %al
1019 outb $CYRIX_CRI
1020 xorb %al, %al
1021 outb $CYRIX_CRD
1022
1023 /*
1024 * write DOR
1025 */
1026 movb $CYRIX_DOR, %al
1027 outb $CYRIX_CRI
1028 movb $0x87, %al
1029 outb $CYRIX_CRD
1030
1031 /*
1032 * enable MATCH
1033 */
1034 movb $CYRIX_DBR0, %al
1035 outb $CYRIX_CRI
1036 movb %bh, %al
1037 andb $0x7f, %al
1038 outb $CYRIX_CRD
1039
1040 /*
1041 * disable MAPEN
1042 */
1043 movb $0xc3, %al
1044 outb $CYRIX_CRI
1045 movb %ah, %al
1046 outb $CYRIX_CRD
1047
1048 jmp cpu_done
1049
1050 cpu_done:
1051
1052 popfl /* Restore original FLAGS */
1053 popal /* Restore all registers */
1054
1055 #endif /* !__xpv */
1056
1057 /*
1058 * mlsetup(%esp) gets called.
1059 */
1060 pushl %esp
1061 call mlsetup
1062 addl $4, %esp
1063
1064 /*
1065 * We change our appearance to look like the real thread 0.
1066 * (NOTE: making ourselves to be a real thread may be a noop)
1067 * main() gets called. (NOTE: main() never returns).
1068 */
1069 call main
1070 /* NOTREACHED */
1071 pushl $__return_from_main
1072 call panic
1073
1074 /* NOTREACHED */
1075 cpu_486:
1076 pushl $__unsupported_cpu
1077 call panic
1078 SET_SIZE(_locore_start)
1079
1080 #endif /* __lint */
1081 #endif /* !__amd64 */
1082
1083
1084 /*
1085 * For stack layout, see privregs.h
1086 * When cmntrap gets called, the error code and trap number have been pushed.
1087 * When cmntrap_pushed gets called, the entire struct regs has been pushed.
1088 */
1089
1090 #if defined(__lint)
1091
1092 /* ARGSUSED */
1093 void
1094 cmntrap()
1095 {}
1096
1097 #else /* __lint */
1098
1099 .globl trap /* C handler called below */
1100
1101 #if defined(__amd64)
1102
1103 ENTRY_NP2(cmntrap, _cmntrap)
1104
1105 INTR_PUSH
1106
1107 ALTENTRY(cmntrap_pushed)
1108
1109 movq %rsp, %rbp
1110
1111 /*
1112 * - if this is a #pf i.e. T_PGFLT, %r15 is live
1113 * and contains the faulting address i.e. a copy of %cr2
1114 *
1115 * - if this is a #db i.e. T_SGLSTP, %r15 is live
1116 * and contains the value of %db6
1117 */
1118
1119 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1120 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1121 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1122
1123 /*
1124 * We must first check if DTrace has set its NOFAULT bit. This
1125 * regrettably must happen before the trap stack is recorded, because
1126 * this requires a call to getpcstack() and may induce recursion if an
1127 * fbt::getpcstack: enabling is inducing the bad load.
1128 */
1129 movl %gs:CPU_ID, %eax
1130 shlq $CPU_CORE_SHIFT, %rax
1131 leaq cpu_core(%rip), %r8
1132 addq %r8, %rax
1133 movw CPUC_DTRACE_FLAGS(%rax), %cx
1134 testw $CPU_DTRACE_NOFAULT, %cx
1135 jnz .dtrace_induced
1136
1137 TRACE_STACK(%rdi)
1138
1139 movq %rbp, %rdi
1140 movq %r15, %rsi
1141 movl %gs:CPU_ID, %edx
1142
1143 /*
1144 * We know that this isn't a DTrace non-faulting load; we can now safely
1145 * reenable interrupts. (In the case of pagefaults, we enter through an
1146 * interrupt gate.)
1147 */
1148 ENABLE_INTR_FLAGS
1149
1150 call trap /* trap(rp, addr, cpuid) handles all traps */
1151 jmp _sys_rtt
1152
1153 .dtrace_induced:
1154 cmpw $KCS_SEL, REGOFF_CS(%rbp) /* test CS for user-mode trap */
1155 jne 3f /* if from user, panic */
1156
1157 cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp)
1158 je 1f
1159
1160 cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp)
1161 je 0f
1162
1163 cmpl $T_ILLINST, REGOFF_TRAPNO(%rbp)
1164 je 0f
1165
1166 cmpl $T_ZERODIV, REGOFF_TRAPNO(%rbp)
1167 jne 4f /* if not PF/GP/UD/DE, panic */
1168
1169 orw $CPU_DTRACE_DIVZERO, %cx
1170 movw %cx, CPUC_DTRACE_FLAGS(%rax)
1171 jmp 2f
1172
1173 /*
1174 * If we've taken a GPF, we don't (unfortunately) have the address that
1175 * induced the fault. So instead of setting the fault to BADADDR,
1176 * we'll set the fault to ILLOP.
1177 */
1178 0:
1179 orw $CPU_DTRACE_ILLOP, %cx
1180 movw %cx, CPUC_DTRACE_FLAGS(%rax)
1181 jmp 2f
1182 1:
1183 orw $CPU_DTRACE_BADADDR, %cx
1184 movw %cx, CPUC_DTRACE_FLAGS(%rax) /* set fault to bad addr */
1185 movq %r15, CPUC_DTRACE_ILLVAL(%rax)
1186 /* fault addr is illegal value */
1187 2:
1188 movq REGOFF_RIP(%rbp), %rdi
1189 movq %rdi, %r12
1190 call dtrace_instr_size
1191 addq %rax, %r12
1192 movq %r12, REGOFF_RIP(%rbp)
1193 INTR_POP
1194 jmp tr_iret_auto
1195 /*NOTREACHED*/
1196 3:
1197 leaq dtrace_badflags(%rip), %rdi
1198 xorl %eax, %eax
1199 call panic
1200 4:
1201 leaq dtrace_badtrap(%rip), %rdi
1202 xorl %eax, %eax
1203 call panic
1204 SET_SIZE(cmntrap_pushed)
1205 SET_SIZE(cmntrap)
1206 SET_SIZE(_cmntrap)
1207
1208 #elif defined(__i386)
1209
1210
1211 ENTRY_NP2(cmntrap, _cmntrap)
1212
1213 INTR_PUSH
1214
1215 ALTENTRY(cmntrap_pushed)
1216
1217 movl %esp, %ebp
1218
1219 /*
1220 * - if this is a #pf i.e. T_PGFLT, %esi is live
1221 * and contains the faulting address i.e. a copy of %cr2
1222 *
1223 * - if this is a #db i.e. T_SGLSTP, %esi is live
1224 * and contains the value of %db6
1225 */
1226
1227 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1228 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1229 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1230
1231 /*
1232 * We must first check if DTrace has set its NOFAULT bit. This
1233 * regrettably must happen before the trap stack is recorded, because
1234 * this requires a call to getpcstack() and may induce recursion if an
1235 * fbt::getpcstack: enabling is inducing the bad load.
1236 */
1237 movl %gs:CPU_ID, %eax
1238 shll $CPU_CORE_SHIFT, %eax
1239 addl $cpu_core, %eax
1240 movw CPUC_DTRACE_FLAGS(%eax), %cx
1241 testw $CPU_DTRACE_NOFAULT, %cx
1242 jnz .dtrace_induced
1243
1244 TRACE_STACK(%edi)
1245
1246 pushl %gs:CPU_ID
1247 pushl %esi /* fault address for PGFLTs */
1248 pushl %ebp /* ®s */
1249
1250 /*
1251 * We know that this isn't a DTrace non-faulting load; we can now safely
1252 * reenable interrupts. (In the case of pagefaults, we enter through an
1253 * interrupt gate.)
1254 */
1255 ENABLE_INTR_FLAGS
1256
1257 call trap /* trap(rp, addr, cpuid) handles all traps */
1258 addl $12, %esp /* get argument off stack */
1259 jmp _sys_rtt
1260
1261 .dtrace_induced:
1262 cmpw $KCS_SEL, REGOFF_CS(%ebp) /* test CS for user-mode trap */
1263 jne 3f /* if from user, panic */
1264
1265 cmpl $T_PGFLT, REGOFF_TRAPNO(%ebp)
1266 je 1f
1267
1268 cmpl $T_GPFLT, REGOFF_TRAPNO(%ebp)
1269 je 0f
1270
1271 cmpl $T_ZERODIV, REGOFF_TRAPNO(%ebp)
1272 jne 4f /* if not PF/GP/UD/DE, panic */
1273
1274 orw $CPU_DTRACE_DIVZERO, %cx
1275 movw %cx, CPUC_DTRACE_FLAGS(%eax)
1276 jmp 2f
1277
1278 0:
1279 /*
1280 * If we've taken a GPF, we don't (unfortunately) have the address that
1281 * induced the fault. So instead of setting the fault to BADADDR,
1282 * we'll set the fault to ILLOP.
1283 */
1284 orw $CPU_DTRACE_ILLOP, %cx
1285 movw %cx, CPUC_DTRACE_FLAGS(%eax)
1286 jmp 2f
1287 1:
1288 orw $CPU_DTRACE_BADADDR, %cx
1289 movw %cx, CPUC_DTRACE_FLAGS(%eax) /* set fault to bad addr */
1290 movl %esi, CPUC_DTRACE_ILLVAL(%eax)
1291 /* fault addr is illegal value */
1292 2:
1293 pushl REGOFF_EIP(%ebp)
1294 call dtrace_instr_size
1295 addl $4, %esp
1296 movl REGOFF_EIP(%ebp), %ecx
1297 addl %eax, %ecx
1298 movl %ecx, REGOFF_EIP(%ebp)
1299 INTR_POP_KERNEL
1300 IRET
1301 /*NOTREACHED*/
1302 3:
1303 pushl $dtrace_badflags
1304 call panic
1305 4:
1306 pushl $dtrace_badtrap
1307 call panic
1308 SET_SIZE(cmntrap)
1309 SET_SIZE(_cmntrap)
1310
1311 #endif /* __i386 */
1312
1313 /*
1314 * Declare a uintptr_t which has the size of _cmntrap to enable stack
1315 * traceback code to know when a regs structure is on the stack.
1316 */
1317 .globl _cmntrap_size
1318 .align CLONGSIZE
1319 _cmntrap_size:
1320 .NWORD . - _cmntrap
1321 .type _cmntrap_size, @object
1322
1323 dtrace_badflags:
1324 .string "bad DTrace flags"
1325
1326 dtrace_badtrap:
1327 .string "bad DTrace trap"
1328
1329 #endif /* __lint */
1330
1331 #if defined(__lint)
1332
1333 /* ARGSUSED */
1334 void
1335 cmninttrap()
1336 {}
1337
1338 #if !defined(__xpv)
1339 void
1340 bop_trap_handler(void)
1341 {}
1342 #endif
1343
1344 #else /* __lint */
1345
1346 .globl trap /* C handler called below */
1347
1348 #if defined(__amd64)
1349
1350 ENTRY_NP(cmninttrap)
1351
1352 INTR_PUSH
1353 INTGATE_INIT_KERNEL_FLAGS
1354
1355 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1356 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1357 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1358
1359 movq %rsp, %rbp
1360
1361 movl %gs:CPU_ID, %edx
1362 xorl %esi, %esi
1363 movq %rsp, %rdi
1364 call trap /* trap(rp, addr, cpuid) handles all traps */
1365 jmp _sys_rtt
1366 SET_SIZE(cmninttrap)
1367
1368 #if !defined(__xpv)
1369 /*
1370 * Handle traps early in boot. Just revectors into C quickly as
1371 * these are always fatal errors.
1372 *
1373 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
1374 */
1375 ENTRY(bop_trap_handler)
1376 movq %rsp, %rdi
1377 sub $8, %rsp
1378 call bop_trap
1379 SET_SIZE(bop_trap_handler)
1380 #endif
1381
1382 #elif defined(__i386)
1383
1384 ENTRY_NP(cmninttrap)
1385
1386 INTR_PUSH
1387 INTGATE_INIT_KERNEL_FLAGS
1388
1389 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1390 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1391 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1392
1393 movl %esp, %ebp
1394
1395 TRACE_STACK(%edi)
1396
1397 pushl %gs:CPU_ID
1398 pushl $0
1399 pushl %ebp
1400 call trap /* trap(rp, addr, cpuid) handles all traps */
1401 addl $12, %esp
1402 jmp _sys_rtt
1403 SET_SIZE(cmninttrap)
1404
1405 #if !defined(__xpv)
1406 /*
1407 * Handle traps early in boot. Just revectors into C quickly as
1408 * these are always fatal errors.
1409 */
1410 ENTRY(bop_trap_handler)
1411 movl %esp, %eax
1412 pushl %eax
1413 call bop_trap
1414 SET_SIZE(bop_trap_handler)
1415 #endif
1416
1417 #endif /* __i386 */
1418
1419 #endif /* __lint */
1420
1421 #if defined(__lint)
1422
1423 /* ARGSUSED */
1424 void
1425 dtrace_trap()
1426 {}
1427
1428 #else /* __lint */
1429
1430 .globl dtrace_user_probe
1431
1432 #if defined(__amd64)
1433
1434 ENTRY_NP(dtrace_trap)
1435
1436 INTR_PUSH
1437
1438 TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
1439 TRACE_REGS(%rdi, %rsp, %rbx, %rcx) /* Uses label 9 */
1440 TRACE_STAMP(%rdi) /* Clobbers %eax, %edx, uses 9 */
1441
1442 movq %rsp, %rbp
1443
1444 movl %gs:CPU_ID, %edx
1445 #if defined(__xpv)
1446 movq %gs:CPU_VCPU_INFO, %rsi
1447 movq VCPU_INFO_ARCH_CR2(%rsi), %rsi
1448 #else
1449 movq %cr2, %rsi
1450 #endif
1451 movq %rsp, %rdi
1452
1453 ENABLE_INTR_FLAGS
1454
1455 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1456 jmp _sys_rtt
1457
1458 SET_SIZE(dtrace_trap)
1459
1460 #elif defined(__i386)
1461
1462 ENTRY_NP(dtrace_trap)
1463
1464 INTR_PUSH
1465
1466 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_TRAP) /* Uses labels 8 and 9 */
1467 TRACE_REGS(%edi, %esp, %ebx, %ecx) /* Uses label 9 */
1468 TRACE_STAMP(%edi) /* Clobbers %eax, %edx, uses 9 */
1469
1470 movl %esp, %ebp
1471
1472 pushl %gs:CPU_ID
1473 #if defined(__xpv)
1474 movl %gs:CPU_VCPU_INFO, %eax
1475 movl VCPU_INFO_ARCH_CR2(%eax), %eax
1476 #else
1477 movl %cr2, %eax
1478 #endif
1479 pushl %eax
1480 pushl %ebp
1481
1482 ENABLE_INTR_FLAGS
1483
1484 call dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
1485 addl $12, %esp /* get argument off stack */
1486
1487 jmp _sys_rtt
1488 SET_SIZE(dtrace_trap)
1489
1490 #endif /* __i386 */
1491
1492 #endif /* __lint */
1493
1494 /*
1495 * Return from _sys_trap routine.
1496 */
1497
1498 #if defined(__lint)
1499
1500 void
1501 lwp_rtt_initial(void)
1502 {}
1503
1504 void
1505 lwp_rtt(void)
1506 {}
1507
1508 void
1509 _sys_rtt(void)
1510 {}
1511
1512 #else /* __lint */
1513
1514 ENTRY_NP(lwp_rtt_initial)
1515 movq %gs:CPU_THREAD, %r15
1516 movq T_STACK(%r15), %rsp /* switch to the thread stack */
1517 movq %rsp, %rbp
1518 call __dtrace_probe___proc_start
1519 jmp _lwp_rtt
1520
1521 ENTRY_NP(lwp_rtt)
1522
1523 /*
1524 * r14 lwp
1525 * rdx lwp->lwp_procp
1526 * r15 curthread
1527 */
1528
1529 movq %gs:CPU_THREAD, %r15
1530 movq T_STACK(%r15), %rsp /* switch to the thread stack */
1531 movq %rsp, %rbp
1532 _lwp_rtt:
1533 call __dtrace_probe___proc_lwp__start
1534 movq %gs:CPU_LWP, %r14
1535 movq LWP_PROCP(%r14), %rdx
1536
1537 /*
1538 * XX64 Is the stack misaligned correctly at this point?
1539 * If not, we need to do a push before calling anything ..
1540 */
1541
1542 #if defined(DEBUG)
1543 /*
1544 * If we were to run lwp_savectx at this point -without-
1545 * pcb_rupdate being set to 1, we'd end up sampling the hardware
1546 * state left by the previous running lwp, rather than setting
1547 * the values requested by the lwp creator. Bad.
1548 */
1549 testb $0x1, PCB_RUPDATE(%r14)
1550 jne 1f
1551 leaq _no_pending_updates(%rip), %rdi
1552 movl $__LINE__, %esi
1553 movq %r14, %rdx
1554 xorl %eax, %eax
1555 call panic
1556 1:
1557 #endif
1558
1559 /*
1560 * If agent lwp, clear %fs and %gs
1561 */
1562 cmpq %r15, P_AGENTTP(%rdx)
1563 jne 1f
1564 xorl %ecx, %ecx
1565 movq %rcx, REGOFF_FS(%rsp)
1566 movq %rcx, REGOFF_GS(%rsp)
1567 movw %cx, LWP_PCB_FS(%r14)
1568 movw %cx, LWP_PCB_GS(%r14)
1569 1:
1570 call dtrace_systrace_rtt
1571 movq REGOFF_RDX(%rsp), %rsi
1572 movq REGOFF_RAX(%rsp), %rdi
1573 call post_syscall /* post_syscall(rval1, rval2) */
1574
1575 /*
1576 * XXX - may want a fast path that avoids sys_rtt_common in the
1577 * most common case.
1578 */
1579 ALTENTRY(_sys_rtt)
1580 CLI(%rax) /* disable interrupts */
1581 ALTENTRY(_sys_rtt_ints_disabled)
1582 movq %rsp, %rdi /* pass rp to sys_rtt_common */
1583 call sys_rtt_common /* do common sys_rtt tasks */
1584 testq %rax, %rax /* returning to userland? */
1585 jz sr_sup
1586
1587 /*
1588 * Return to user
1589 */
1590 ASSERT_UPCALL_MASK_IS_SET
1591 cmpw $UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
1592 je sys_rtt_syscall
1593
1594 /*
1595 * Return to 32-bit userland
1596 */
1597 ALTENTRY(sys_rtt_syscall32)
1598 USER32_POP
1599 jmp tr_iret_user
1600 /*NOTREACHED*/
1601
1602 ALTENTRY(sys_rtt_syscall)
1603 /*
1604 * Return to 64-bit userland
1605 */
1606 USER_POP
1607 ALTENTRY(nopop_sys_rtt_syscall)
1608 jmp tr_iret_user
1609 /*NOTREACHED*/
1610 SET_SIZE(nopop_sys_rtt_syscall)
1611
1612 /*
1613 * Return to supervisor
1614 * NOTE: to make the check in trap() that tests if we are executing
1615 * segment register fixup/restore code work properly, sr_sup MUST be
1616 * after _sys_rtt .
1617 */
1618 ALTENTRY(sr_sup)
1619 /*
1620 * Restore regs before doing iretq to kernel mode
1621 */
1622 INTR_POP
1623 jmp tr_iret_kernel
1624 .globl _sys_rtt_end
1625 _sys_rtt_end:
1626 /*NOTREACHED*/
1627 SET_SIZE(sr_sup)
1628 SET_SIZE(_sys_rtt_end)
1629 SET_SIZE(lwp_rtt)
1630 SET_SIZE(lwp_rtt_initial)
1631 SET_SIZE(_sys_rtt_ints_disabled)
1632 SET_SIZE(_sys_rtt)
1633 SET_SIZE(sys_rtt_syscall)
1634 SET_SIZE(sys_rtt_syscall32)
1635
1636 #endif /* __lint */
1637
1638 #if defined(__lint)
1639
1640 /*
1641 * So why do we have to deal with all this crud in the world of ia32?
1642 *
1643 * Basically there are four classes of ia32 implementations, those that do not
1644 * have a TSC, those that have a marginal TSC that is broken to the extent
1645 * that it is useless, those that have a marginal TSC that is not quite so
1646 * horribly broken and can be used with some care, and those that have a
1647 * reliable TSC. This crud has to be here in order to sift through all the
1648 * variants.
1649 */
1650
1651 /*ARGSUSED*/
1652 uint64_t
1653 freq_tsc(uint32_t *pit_counter)
1654 {
1655 return (0);
1656 }
1657
1658 #else /* __lint */
1659
1660 #if defined(__amd64)
1661
1662 /*
1663 * XX64 quick and dirty port from the i386 version. Since we
1664 * believe the amd64 tsc is more reliable, could this code be
1665 * simpler?
1666 */
1667 ENTRY_NP(freq_tsc)
1668 pushq %rbp
1669 movq %rsp, %rbp
1670 movq %rdi, %r9 /* save pit_counter */
1671 pushq %rbx
1672
1673 / We have a TSC, but we have no way in general to know how reliable it is.
1674 / Usually a marginal TSC behaves appropriately unless not enough time
1675 / elapses between reads. A reliable TSC can be read as often and as rapidly
1676 / as desired. The simplistic approach of reading the TSC counter and
1677 / correlating to the PIT counter cannot be naively followed. Instead estimates
1678 / have to be taken to successively refine a guess at the speed of the cpu
1679 / and then the TSC and PIT counter are correlated. In practice very rarely
1680 / is more than one quick loop required for an estimate. Measures have to be
1681 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1682 / measuring the clock rate of very fast processors.
1683 /
1684 / The following constant can be tuned. It should be such that the loop does
1685 / not take too many nor too few PIT counts to execute. If this value is too
1686 / large, then on slow machines the loop will take a long time, or the PIT
1687 / counter may even wrap. If this value is too small, then on fast machines
1688 / the PIT counter may count so few ticks that the resolution of the PIT
1689 / itself causes a bad guess. Because this code is used in machines with
1690 / marginal TSC's and/or IO, if this value is too small on those, it may
1691 / cause the calculated cpu frequency to vary slightly from boot to boot.
1692 /
1693 / In all cases even if this constant is set inappropriately, the algorithm
1694 / will still work and the caller should be able to handle variances in the
1695 / calculation of cpu frequency, but the calculation will be inefficient and
1696 / take a disproportionate amount of time relative to a well selected value.
1697 / As the slowest supported cpu becomes faster, this constant should be
1698 / carefully increased.
1699
1700 movl $0x8000, %ecx
1701
1702 / to make sure the instruction cache has been warmed
1703 clc
1704
1705 jmp freq_tsc_loop
1706
1707 / The following block of code up to and including the latching of the PIT
1708 / counter after freq_tsc_perf_loop is very critical and very carefully
1709 / written, it should only be modified with great care. freq_tsc_loop to
1710 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1711 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
1712
1713 .align 32
1714 freq_tsc_loop:
1715 / save the loop count in %ebx
1716 movl %ecx, %ebx
1717
1718 / initialize the PIT counter and start a count down
1719 movb $PIT_LOADMODE, %al
1720 outb $PITCTL_PORT
1721 movb $0xff, %al
1722 outb $PITCTR0_PORT
1723 outb $PITCTR0_PORT
1724
1725 / read the TSC and store the TS in %edi:%esi
1726 rdtsc
1727 movl %eax, %esi
1728
1729 freq_tsc_perf_loop:
1730 movl %edx, %edi
1731 movl %eax, %esi
1732 movl %edx, %edi
1733 loop freq_tsc_perf_loop
1734
1735 / read the TSC and store the LSW in %ecx
1736 rdtsc
1737 movl %eax, %ecx
1738
1739 / latch the PIT counter and status
1740 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
1741 outb $PITCTL_PORT
1742
1743 / remember if the icache has been warmed
1744 setc %ah
1745
1746 / read the PIT status
1747 inb $PITCTR0_PORT
1748 shll $8, %eax
1749
1750 / read PIT count
1751 inb $PITCTR0_PORT
1752 shll $8, %eax
1753 inb $PITCTR0_PORT
1754 bswap %eax
1755
1756 / check to see if the PIT count was loaded into the CE
1757 btw $_CONST(PITSTAT_NULLCNT+8), %ax
1758 jc freq_tsc_increase_count
1759
1760 / check to see if PIT counter wrapped
1761 btw $_CONST(PITSTAT_OUTPUT+8), %ax
1762 jnc freq_tsc_pit_did_not_wrap
1763
1764 / halve count
1765 shrl $1, %ebx
1766 movl %ebx, %ecx
1767
1768 / the instruction cache has been warmed
1769 stc
1770
1771 jmp freq_tsc_loop
1772
1773 freq_tsc_increase_count:
1774 shll $1, %ebx
1775 jc freq_tsc_too_fast
1776
1777 movl %ebx, %ecx
1778
1779 / the instruction cache has been warmed
1780 stc
1781
1782 jmp freq_tsc_loop
1783
1784 freq_tsc_pit_did_not_wrap:
1785 roll $16, %eax
1786
1787 cmpw $0x2000, %ax
1788 notw %ax
1789 jb freq_tsc_sufficient_duration
1790
1791 freq_tsc_calculate:
1792 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1793 / then on the second CLK pulse the CE is decremented, therefore mode 0
1794 / is really a (count + 1) counter, ugh
1795 xorl %esi, %esi
1796 movw %ax, %si
1797 incl %esi
1798
1799 movl $0xf000, %eax
1800 mull %ebx
1801
1802 / tuck away (target_pit_count * loop_count)
1803 movl %edx, %ecx
1804 movl %eax, %ebx
1805
1806 movl %esi, %eax
1807 movl $0xffffffff, %edx
1808 mull %edx
1809
1810 addl %esi, %eax
1811 adcl $0, %edx
1812
1813 cmpl %ecx, %edx
1814 ja freq_tsc_div_safe
1815 jb freq_tsc_too_fast
1816
1817 cmpl %ebx, %eax
1818 jbe freq_tsc_too_fast
1819
1820 freq_tsc_div_safe:
1821 movl %ecx, %edx
1822 movl %ebx, %eax
1823
1824 movl %esi, %ecx
1825 divl %ecx
1826
1827 movl %eax, %ecx
1828
1829 / the instruction cache has been warmed
1830 stc
1831
1832 jmp freq_tsc_loop
1833
1834 freq_tsc_sufficient_duration:
1835 / test to see if the icache has been warmed
1836 btl $16, %eax
1837 jnc freq_tsc_calculate
1838
1839 / recall mode 0 is a (count + 1) counter
1840 andl $0xffff, %eax
1841 incl %eax
1842
1843 / save the number of PIT counts
1844 movl %eax, (%r9)
1845
1846 / calculate the number of TS's that elapsed
1847 movl %ecx, %eax
1848 subl %esi, %eax
1849 sbbl %edi, %edx
1850
1851 jmp freq_tsc_end
1852
1853 freq_tsc_too_fast:
1854 / return 0 as a 64 bit quantity
1855 xorl %eax, %eax
1856 xorl %edx, %edx
1857
1858 freq_tsc_end:
1859 shlq $32, %rdx
1860 orq %rdx, %rax
1861
1862 popq %rbx
1863 leaveq
1864 ret
1865 SET_SIZE(freq_tsc)
1866
1867 #elif defined(__i386)
1868
1869 ENTRY_NP(freq_tsc)
1870 pushl %ebp
1871 movl %esp, %ebp
1872 pushl %edi
1873 pushl %esi
1874 pushl %ebx
1875
1876 / We have a TSC, but we have no way in general to know how reliable it is.
1877 / Usually a marginal TSC behaves appropriately unless not enough time
1878 / elapses between reads. A reliable TSC can be read as often and as rapidly
1879 / as desired. The simplistic approach of reading the TSC counter and
1880 / correlating to the PIT counter cannot be naively followed. Instead estimates
1881 / have to be taken to successively refine a guess at the speed of the cpu
1882 / and then the TSC and PIT counter are correlated. In practice very rarely
1883 / is more than one quick loop required for an estimate. Measures have to be
1884 / taken to prevent the PIT counter from wrapping beyond its resolution and for
1885 / measuring the clock rate of very fast processors.
1886 /
1887 / The following constant can be tuned. It should be such that the loop does
1888 / not take too many nor too few PIT counts to execute. If this value is too
1889 / large, then on slow machines the loop will take a long time, or the PIT
1890 / counter may even wrap. If this value is too small, then on fast machines
1891 / the PIT counter may count so few ticks that the resolution of the PIT
1892 / itself causes a bad guess. Because this code is used in machines with
1893 / marginal TSC's and/or IO, if this value is too small on those, it may
1894 / cause the calculated cpu frequency to vary slightly from boot to boot.
1895 /
1896 / In all cases even if this constant is set inappropriately, the algorithm
1897 / will still work and the caller should be able to handle variances in the
1898 / calculation of cpu frequency, but the calculation will be inefficient and
1899 / take a disproportionate amount of time relative to a well selected value.
1900 / As the slowest supported cpu becomes faster, this constant should be
1901 / carefully increased.
1902
1903 movl $0x8000, %ecx
1904
1905 / to make sure the instruction cache has been warmed
1906 clc
1907
1908 jmp freq_tsc_loop
1909
1910 / The following block of code up to and including the latching of the PIT
1911 / counter after freq_tsc_perf_loop is very critical and very carefully
1912 / written, it should only be modified with great care. freq_tsc_loop to
1913 / freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
1914 / freq_tsc_perf_loop up to the unlatching of the PIT counter.
1915
1916 .align 32
1917 freq_tsc_loop:
1918 / save the loop count in %ebx
1919 movl %ecx, %ebx
1920
1921 / initialize the PIT counter and start a count down
1922 movb $PIT_LOADMODE, %al
1923 outb $PITCTL_PORT
1924 movb $0xff, %al
1925 outb $PITCTR0_PORT
1926 outb $PITCTR0_PORT
1927
1928 / read the TSC and store the TS in %edi:%esi
1929 rdtsc
1930 movl %eax, %esi
1931
1932 freq_tsc_perf_loop:
1933 movl %edx, %edi
1934 movl %eax, %esi
1935 movl %edx, %edi
1936 loop freq_tsc_perf_loop
1937
1938 / read the TSC and store the LSW in %ecx
1939 rdtsc
1940 movl %eax, %ecx
1941
1942 / latch the PIT counter and status
1943 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
1944 outb $PITCTL_PORT
1945
1946 / remember if the icache has been warmed
1947 setc %ah
1948
1949 / read the PIT status
1950 inb $PITCTR0_PORT
1951 shll $8, %eax
1952
1953 / read PIT count
1954 inb $PITCTR0_PORT
1955 shll $8, %eax
1956 inb $PITCTR0_PORT
1957 bswap %eax
1958
1959 / check to see if the PIT count was loaded into the CE
1960 btw $_CONST(PITSTAT_NULLCNT+8), %ax
1961 jc freq_tsc_increase_count
1962
1963 / check to see if PIT counter wrapped
1964 btw $_CONST(PITSTAT_OUTPUT+8), %ax
1965 jnc freq_tsc_pit_did_not_wrap
1966
1967 / halve count
1968 shrl $1, %ebx
1969 movl %ebx, %ecx
1970
1971 / the instruction cache has been warmed
1972 stc
1973
1974 jmp freq_tsc_loop
1975
1976 freq_tsc_increase_count:
1977 shll $1, %ebx
1978 jc freq_tsc_too_fast
1979
1980 movl %ebx, %ecx
1981
1982 / the instruction cache has been warmed
1983 stc
1984
1985 jmp freq_tsc_loop
1986
1987 freq_tsc_pit_did_not_wrap:
1988 roll $16, %eax
1989
1990 cmpw $0x2000, %ax
1991 notw %ax
1992 jb freq_tsc_sufficient_duration
1993
1994 freq_tsc_calculate:
1995 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
1996 / then on the second CLK pulse the CE is decremented, therefore mode 0
1997 / is really a (count + 1) counter, ugh
1998 xorl %esi, %esi
1999 movw %ax, %si
2000 incl %esi
2001
2002 movl $0xf000, %eax
2003 mull %ebx
2004
2005 / tuck away (target_pit_count * loop_count)
2006 movl %edx, %ecx
2007 movl %eax, %ebx
2008
2009 movl %esi, %eax
2010 movl $0xffffffff, %edx
2011 mull %edx
2012
2013 addl %esi, %eax
2014 adcl $0, %edx
2015
2016 cmpl %ecx, %edx
2017 ja freq_tsc_div_safe
2018 jb freq_tsc_too_fast
2019
2020 cmpl %ebx, %eax
2021 jbe freq_tsc_too_fast
2022
2023 freq_tsc_div_safe:
2024 movl %ecx, %edx
2025 movl %ebx, %eax
2026
2027 movl %esi, %ecx
2028 divl %ecx
2029
2030 movl %eax, %ecx
2031
2032 / the instruction cache has been warmed
2033 stc
2034
2035 jmp freq_tsc_loop
2036
2037 freq_tsc_sufficient_duration:
2038 / test to see if the icache has been warmed
2039 btl $16, %eax
2040 jnc freq_tsc_calculate
2041
2042 / recall mode 0 is a (count + 1) counter
2043 andl $0xffff, %eax
2044 incl %eax
2045
2046 / save the number of PIT counts
2047 movl 8(%ebp), %ebx
2048 movl %eax, (%ebx)
2049
2050 / calculate the number of TS's that elapsed
2051 movl %ecx, %eax
2052 subl %esi, %eax
2053 sbbl %edi, %edx
2054
2055 jmp freq_tsc_end
2056
2057 freq_tsc_too_fast:
2058 / return 0 as a 64 bit quantity
2059 xorl %eax, %eax
2060 xorl %edx, %edx
2061
2062 freq_tsc_end:
2063 popl %ebx
2064 popl %esi
2065 popl %edi
2066 popl %ebp
2067 ret
2068 SET_SIZE(freq_tsc)
2069
2070 #endif /* __i386 */
2071 #endif /* __lint */
2072
2073 #if !defined(__amd64)
2074 #if defined(__lint)
2075
2076 /*
2077 * We do not have a TSC so we use a block of instructions with well known
2078 * timings.
2079 */
2080
2081 /*ARGSUSED*/
2082 uint64_t
2083 freq_notsc(uint32_t *pit_counter)
2084 {
2085 return (0);
2086 }
2087
2088 #else /* __lint */
2089 ENTRY_NP(freq_notsc)
2090 pushl %ebp
2091 movl %esp, %ebp
2092 pushl %edi
2093 pushl %esi
2094 pushl %ebx
2095
2096 / initial count for the idivl loop
2097 movl $0x1000, %ecx
2098
2099 / load the divisor
2100 movl $1, %ebx
2101
2102 jmp freq_notsc_loop
2103
2104 .align 16
2105 freq_notsc_loop:
2106 / set high 32 bits of dividend to zero
2107 xorl %edx, %edx
2108
2109 / save the loop count in %edi
2110 movl %ecx, %edi
2111
2112 / initialize the PIT counter and start a count down
2113 movb $PIT_LOADMODE, %al
2114 outb $PITCTL_PORT
2115 movb $0xff, %al
2116 outb $PITCTR0_PORT
2117 outb $PITCTR0_PORT
2118
2119 / set low 32 bits of dividend to zero
2120 xorl %eax, %eax
2121
2122 / It is vital that the arguments to idivl be set appropriately because on some
2123 / cpu's this instruction takes more or less clock ticks depending on its
2124 / arguments.
2125 freq_notsc_perf_loop:
2126 idivl %ebx
2127 idivl %ebx
2128 idivl %ebx
2129 idivl %ebx
2130 idivl %ebx
2131 loop freq_notsc_perf_loop
2132
2133 / latch the PIT counter and status
2134 movb $_CONST(PIT_READBACK|PIT_READBACKC0), %al
2135 outb $PITCTL_PORT
2136
2137 / read the PIT status
2138 inb $PITCTR0_PORT
2139 shll $8, %eax
2140
2141 / read PIT count
2142 inb $PITCTR0_PORT
2143 shll $8, %eax
2144 inb $PITCTR0_PORT
2145 bswap %eax
2146
2147 / check to see if the PIT count was loaded into the CE
2148 btw $_CONST(PITSTAT_NULLCNT+8), %ax
2149 jc freq_notsc_increase_count
2150
2151 / check to see if PIT counter wrapped
2152 btw $_CONST(PITSTAT_OUTPUT+8), %ax
2153 jnc freq_notsc_pit_did_not_wrap
2154
2155 / halve count
2156 shrl $1, %edi
2157 movl %edi, %ecx
2158
2159 jmp freq_notsc_loop
2160
2161 freq_notsc_increase_count:
2162 shll $1, %edi
2163 jc freq_notsc_too_fast
2164
2165 movl %edi, %ecx
2166
2167 jmp freq_notsc_loop
2168
2169 freq_notsc_pit_did_not_wrap:
2170 shrl $16, %eax
2171
2172 cmpw $0x2000, %ax
2173 notw %ax
2174 jb freq_notsc_sufficient_duration
2175
2176 freq_notsc_calculate:
2177 / in mode 0, the PIT loads the count into the CE on the first CLK pulse,
2178 / then on the second CLK pulse the CE is decremented, therefore mode 0
2179 / is really a (count + 1) counter, ugh
2180 xorl %esi, %esi
2181 movw %ax, %si
2182 incl %esi
2183
2184 movl %edi, %eax
2185 movl $0xf000, %ecx
2186 mull %ecx
2187
2188 / tuck away (target_pit_count * loop_count)
2189 movl %edx, %edi
2190 movl %eax, %ecx
2191
2192 movl %esi, %eax
2193 movl $0xffffffff, %edx
2194 mull %edx
2195
2196 addl %esi, %eax
2197 adcl $0, %edx
2198
2199 cmpl %edi, %edx
2200 ja freq_notsc_div_safe
2201 jb freq_notsc_too_fast
2202
2203 cmpl %ecx, %eax
2204 jbe freq_notsc_too_fast
2205
2206 freq_notsc_div_safe:
2207 movl %edi, %edx
2208 movl %ecx, %eax
2209
2210 movl %esi, %ecx
2211 divl %ecx
2212
2213 movl %eax, %ecx
2214
2215 jmp freq_notsc_loop
2216
2217 freq_notsc_sufficient_duration:
2218 / recall mode 0 is a (count + 1) counter
2219 incl %eax
2220
2221 / save the number of PIT counts
2222 movl 8(%ebp), %ebx
2223 movl %eax, (%ebx)
2224
2225 / calculate the number of cpu clock ticks that elapsed
2226 cmpl $X86_VENDOR_Cyrix, x86_vendor
2227 jz freq_notsc_notcyrix
2228
2229 / freq_notsc_perf_loop takes 86 clock cycles on Cyrix 6x86 cores
2230 movl $86, %eax
2231 jmp freq_notsc_calculate_tsc
2232
2233 freq_notsc_notcyrix:
2234 / freq_notsc_perf_loop takes 237 clock cycles on Intel Pentiums
2235 movl $237, %eax
2236
2237 freq_notsc_calculate_tsc:
2238 mull %edi
2239
2240 jmp freq_notsc_end
2241
2242 freq_notsc_too_fast:
2243 / return 0 as a 64 bit quantity
2244 xorl %eax, %eax
2245 xorl %edx, %edx
2246
2247 freq_notsc_end:
2248 popl %ebx
2249 popl %esi
2250 popl %edi
2251 popl %ebp
2252
2253 ret
2254 SET_SIZE(freq_notsc)
2255
2256 #endif /* __lint */
2257 #endif /* !__amd64 */
2258
2259 #if !defined(__lint)
2260 .data
2261 #if !defined(__amd64)
2262 .align 4
2263 cpu_vendor:
2264 .long 0, 0, 0 /* Vendor ID string returned */
2265
2266 .globl CyrixInstead
2267
2268 .globl x86_featureset
2269 .globl x86_type
2270 .globl x86_vendor
2271 #endif
2272
2273 #endif /* __lint */