1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2010, Intel Corporation. 26 * All rights reserved. 27 * 28 * Copyright 2018 Joyent, Inc. 29 */ 30 31 #include <sys/asm_linkage.h> 32 #include <sys/asm_misc.h> 33 #include <sys/regset.h> 34 #include <sys/privregs.h> 35 #include <sys/x86_archext.h> 36 37 #if !defined(__lint) 38 #include <sys/segments.h> 39 #include "assym.h" 40 #endif 41 42 /* 43 * Our assumptions: 44 * - We are running in real mode. 45 * - Interrupts are disabled. 46 * - Selectors are equal (cs == ds == ss) for all real mode code 47 * - The GDT, IDT, ktss and page directory has been built for us 48 * 49 * Our actions: 50 * Start CPU: 51 * - We start using our GDT by loading correct values in the 52 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL, 53 * gs=KGS_SEL). 54 * - We change over to using our IDT. 55 * - We load the default LDT into the hardware LDT register. 56 * - We load the default TSS into the hardware task register. 57 * - call mp_startup(void) indirectly through the T_PC 58 * Stop CPU: 59 * - Put CPU into halted state with interrupts disabled 60 * 61 */ 62 63 #if defined(__lint) 64 65 void 66 real_mode_start_cpu(void) 67 {} 68 69 void 70 real_mode_stop_cpu_stage1(void) 71 {} 72 73 void 74 real_mode_stop_cpu_stage2(void) 75 {} 76 77 #else /* __lint */ 78 79 #if defined(__amd64) 80 81 ENTRY_NP(real_mode_start_cpu) 82 83 /* 84 * NOTE: The GNU assembler automatically does the right thing to 85 * generate data size operand prefixes based on the code size 86 * generation mode (e.g. .code16, .code32, .code64) and as such 87 * prefixes need not be used on instructions EXCEPT in the case 88 * of address prefixes for code for which the reference is not 89 * automatically of the default operand size. 90 */ 91 .code16 92 cli 93 movw %cs, %ax 94 movw %ax, %ds /* load cs into ds */ 95 movw %ax, %ss /* and into ss */ 96 97 /* 98 * Helps in debugging by giving us the fault address. 99 * 100 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack. 101 */ 102 movl $0xffc, %esp 103 movl %cr0, %eax 104 105 /* 106 * Enable protected-mode, write protect, and alignment mask 107 */ 108 orl $(CR0_PE|CR0_WP|CR0_AM), %eax 109 movl %eax, %cr0 110 111 /* 112 * Do a jmp immediately after writing to cr0 when enabling protected 113 * mode to clear the real mode prefetch queue (per Intel's docs) 114 */ 115 jmp pestart 116 117 pestart: 118 /* 119 * 16-bit protected mode is now active, so prepare to turn on long 120 * mode. 121 * 122 * Note that we currently assume that if we're attempting to run a 123 * kernel compiled with (__amd64) #defined, the target CPU has long 124 * mode support. 125 */ 126 127 #if 0 128 /* 129 * If there's a chance this might not be true, the following test should 130 * be done, with the no_long_mode branch then doing something 131 * appropriate: 132 */ 133 134 movl $0x80000000, %eax /* get largest extended CPUID */ 135 cpuid 136 cmpl $0x80000000, %eax /* check if > 0x80000000 */ 137 jbe no_long_mode /* nope, no long mode */ 138 movl $0x80000001, %eax 139 cpuid /* get extended feature flags */ 140 btl $29, %edx /* check for long mode */ 141 jnc no_long_mode /* long mode not supported */ 142 #endif 143 144 /* 145 * Add any initial cr4 bits 146 */ 147 movl %cr4, %eax 148 addr32 orl CR4OFF, %eax 149 150 /* 151 * Enable PAE mode (CR4.PAE) 152 */ 153 orl $CR4_PAE, %eax 154 movl %eax, %cr4 155 156 /* 157 * Point cr3 to the 64-bit long mode page tables. 158 * 159 * Note that these MUST exist in 32-bit space, as we don't have 160 * a way to load %cr3 with a 64-bit base address for the page tables 161 * until the CPU is actually executing in 64-bit long mode. 162 */ 163 addr32 movl CR3OFF, %eax 164 movl %eax, %cr3 165 166 /* 167 * Set long mode enable in EFER (EFER.LME = 1) 168 */ 169 movl $MSR_AMD_EFER, %ecx 170 rdmsr 171 orl $AMD_EFER_LME, %eax 172 wrmsr 173 174 /* 175 * Finally, turn on paging (CR0.PG = 1) to activate long mode. 176 */ 177 movl %cr0, %eax 178 orl $CR0_PG, %eax 179 movl %eax, %cr0 180 181 /* 182 * The instruction after enabling paging in CR0 MUST be a branch. 183 */ 184 jmp long_mode_active 185 186 long_mode_active: 187 /* 188 * Long mode is now active but since we're still running with the 189 * original 16-bit CS we're actually in 16-bit compatability mode. 190 * 191 * We have to load an intermediate GDT and IDT here that we know are 192 * in 32-bit space before we can use the kernel's GDT and IDT, which 193 * may be in the 64-bit address space, and since we're in compatability 194 * mode, we only have access to 16 and 32-bit instructions at the 195 * moment. 196 */ 197 addr32 lgdtl TEMPGDTOFF /* load temporary GDT */ 198 addr32 lidtl TEMPIDTOFF /* load temporary IDT */ 199 200 /* 201 * Do a far transfer to 64-bit mode. Set the CS selector to a 64-bit 202 * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump 203 * to the real mode platter address of long_mode 64 as until the 64-bit 204 * CS is in place we don't have access to 64-bit instructions and thus 205 * can't reference a 64-bit %rip. 206 */ 207 pushl $TEMP_CS64_SEL 208 addr32 pushl LM64OFF 209 lretl 210 211 .globl long_mode_64 212 long_mode_64: 213 .code64 214 /* 215 * We are now running in long mode with a 64-bit CS (EFER.LMA=1, 216 * CS.L=1) so we now have access to 64-bit instructions. 217 * 218 * First, set the 64-bit GDT base. 219 */ 220 .globl rm_platter_pa 221 movl rm_platter_pa, %eax 222 lgdtq GDTROFF(%rax) /* load 64-bit GDT */ 223 224 /* 225 * Save the CPU number in %r11; get the value here since it's saved in 226 * the real mode platter. 227 */ 228 movl CPUNOFF(%rax), %r11d 229 230 /* 231 * Add rm_platter_pa to %rsp to point it to the same location as seen 232 * from 64-bit mode. 233 */ 234 addq %rax, %rsp 235 236 /* 237 * Now do an lretq to load CS with the appropriate selector for the 238 * kernel's 64-bit GDT and to start executing 64-bit setup code at the 239 * virtual address where boot originally loaded this code rather than 240 * the copy in the real mode platter's rm_code array as we've been 241 * doing so far. 242 */ 243 pushq $KCS_SEL 244 pushq $kernel_cs_code 245 lretq 246 .globl real_mode_start_cpu_end 247 real_mode_start_cpu_end: 248 nop 249 250 kernel_cs_code: 251 /* 252 * Complete the balance of the setup we need to before executing 253 * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS). 254 */ 255 .globl rm_platter_va 256 movq rm_platter_va, %rax 257 lidtq IDTROFF(%rax) 258 259 movw $KDS_SEL, %ax 260 movw %ax, %ds 261 movw %ax, %es 262 movw %ax, %ss 263 264 movw $KTSS_SEL, %ax /* setup kernel TSS */ 265 ltr %ax 266 267 xorw %ax, %ax /* clear LDTR */ 268 lldt %ax 269 270 /* 271 * Set GS to the address of the per-cpu structure as contained in 272 * cpu[cpu_number]. 273 * 274 * Unfortunately there's no way to set the 64-bit gsbase with a mov, 275 * so we have to stuff the low 32 bits in %eax and the high 32 bits in 276 * %edx, then call wrmsr. 277 */ 278 leaq cpu(%rip), %rdi 279 movl (%rdi, %r11, 8), %eax 280 movl 4(%rdi, %r11, 8), %edx 281 movl $MSR_AMD_GSBASE, %ecx 282 wrmsr 283 284 /* 285 * Init FS and KernelGSBase. 286 * 287 * Based on code in mlsetup(), set them both to 8G (which shouldn't be 288 * valid until some 64-bit processes run); this will then cause an 289 * exception in any code that tries to index off them before they are 290 * properly setup. 291 */ 292 xorl %eax, %eax /* low 32 bits = 0 */ 293 movl $2, %edx /* high 32 bits = 2 */ 294 movl $MSR_AMD_FSBASE, %ecx 295 wrmsr 296 297 movl $MSR_AMD_KGSBASE, %ecx 298 wrmsr 299 300 /* 301 * Init %rsp to the exception stack set in tss_ist1 and create a legal 302 * AMD64 ABI stack frame 303 */ 304 movq %gs:CPU_TSS, %rax 305 movq TSS_IST1(%rax), %rsp 306 pushq $0 /* null return address */ 307 pushq $0 /* null frame pointer terminates stack trace */ 308 movq %rsp, %rbp /* stack aligned on 16-byte boundary */ 309 310 movq %cr0, %rax 311 andq $~(CR0_TS|CR0_EM), %rax /* clear emulate math chip bit */ 312 orq $(CR0_MP|CR0_NE), %rax 313 movq %rax, %cr0 /* set machine status word */ 314 315 /* 316 * Before going any further, enable usage of page table NX bit if 317 * that's how our page tables are set up. 318 */ 319 bt $X86FSET_NX, x86_featureset(%rip) 320 jnc 1f 321 movl $MSR_AMD_EFER, %ecx 322 rdmsr 323 orl $AMD_EFER_NXE, %eax 324 wrmsr 325 1: 326 327 /* 328 * Complete the rest of the setup and call mp_startup(). 329 */ 330 movq %gs:CPU_THREAD, %rax /* get thread ptr */ 331 call *T_PC(%rax) /* call mp_startup_boot */ 332 /* not reached */ 333 int $20 /* whoops, returned somehow! */ 334 335 SET_SIZE(real_mode_start_cpu) 336 337 #elif defined(__i386) 338 339 ENTRY_NP(real_mode_start_cpu) 340 341 #if !defined(__GNUC_AS__) 342 343 cli 344 D16 movw %cs, %eax 345 movw %eax, %ds /* load cs into ds */ 346 movw %eax, %ss /* and into ss */ 347 348 /* 349 * Helps in debugging by giving us the fault address. 350 * 351 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack. 352 */ 353 D16 movl $0xffc, %esp 354 355 D16 A16 lgdt %cs:GDTROFF 356 D16 A16 lidt %cs:IDTROFF 357 D16 A16 movl %cs:CR4OFF, %eax /* set up CR4, if desired */ 358 D16 andl %eax, %eax 359 D16 A16 je no_cr4 360 361 D16 movl %eax, %ecx 362 D16 movl %cr4, %eax 363 D16 orl %ecx, %eax 364 D16 movl %eax, %cr4 365 no_cr4: 366 D16 A16 movl %cs:CR3OFF, %eax 367 A16 movl %eax, %cr3 368 movl %cr0, %eax 369 370 /* 371 * Enable protected-mode, paging, write protect, and alignment mask 372 */ 373 D16 orl $[CR0_PG|CR0_PE|CR0_WP|CR0_AM], %eax 374 movl %eax, %cr0 375 jmp pestart 376 377 pestart: 378 D16 pushl $KCS_SEL 379 D16 pushl $kernel_cs_code 380 D16 lret 381 .globl real_mode_start_cpu_end 382 real_mode_start_cpu_end: 383 nop 384 385 .globl kernel_cs_code 386 kernel_cs_code: 387 /* 388 * At this point we are with kernel's cs and proper eip. 389 * 390 * We will be executing not from the copy in real mode platter, 391 * but from the original code where boot loaded us. 392 * 393 * By this time GDT and IDT are loaded as is cr3. 394 */ 395 movw $KFS_SEL,%eax 396 movw %eax,%fs 397 movw $KGS_SEL,%eax 398 movw %eax,%gs 399 movw $KDS_SEL,%eax 400 movw %eax,%ds 401 movw %eax,%es 402 movl %gs:CPU_TSS,%esi 403 movw %eax,%ss 404 movl TSS_ESP0(%esi),%esp 405 movw $KTSS_SEL,%ax 406 ltr %ax 407 xorw %ax, %ax /* clear LDTR */ 408 lldt %ax 409 movl %cr0,%edx 410 andl $-1![CR0_TS|CR0_EM],%edx /* clear emulate math chip bit */ 411 orl $[CR0_MP|CR0_NE],%edx 412 movl %edx,%cr0 /* set machine status word */ 413 414 /* 415 * Before going any further, enable usage of page table NX bit if 416 * that's how our page tables are set up. 417 */ 418 bt $X86FSET_NX, x86_featureset 419 jnc 1f 420 movl %cr4, %ecx 421 andl $CR4_PAE, %ecx 422 jz 1f 423 movl $MSR_AMD_EFER, %ecx 424 rdmsr 425 orl $AMD_EFER_NXE, %eax 426 wrmsr 427 1: 428 movl %gs:CPU_THREAD, %eax /* get thread ptr */ 429 call *T_PC(%eax) /* call mp_startup */ 430 /* not reached */ 431 int $20 /* whoops, returned somehow! */ 432 433 #else 434 435 cli 436 mov %cs, %ax 437 mov %eax, %ds /* load cs into ds */ 438 mov %eax, %ss /* and into ss */ 439 440 /* 441 * Helps in debugging by giving us the fault address. 442 * 443 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack. 444 */ 445 D16 mov $0xffc, %esp 446 447 D16 A16 lgdtl %cs:GDTROFF 448 D16 A16 lidtl %cs:IDTROFF 449 D16 A16 mov %cs:CR4OFF, %eax /* set up CR4, if desired */ 450 D16 and %eax, %eax 451 D16 A16 je no_cr4 452 453 D16 mov %eax, %ecx 454 D16 mov %cr4, %eax 455 D16 or %ecx, %eax 456 D16 mov %eax, %cr4 457 no_cr4: 458 D16 A16 mov %cs:CR3OFF, %eax 459 A16 mov %eax, %cr3 460 mov %cr0, %eax 461 462 /* 463 * Enable protected-mode, paging, write protect, and alignment mask 464 */ 465 D16 or $(CR0_PG|CR0_PE|CR0_WP|CR0_AM), %eax 466 mov %eax, %cr0 467 jmp pestart 468 469 pestart: 470 D16 pushl $KCS_SEL 471 D16 pushl $kernel_cs_code 472 D16 lret 473 .globl real_mode_start_cpu_end 474 real_mode_start_cpu_end: 475 nop 476 .globl kernel_cs_code 477 kernel_cs_code: 478 /* 479 * At this point we are with kernel's cs and proper eip. 480 * 481 * We will be executing not from the copy in real mode platter, 482 * but from the original code where boot loaded us. 483 * 484 * By this time GDT and IDT are loaded as is cr3. 485 */ 486 mov $KFS_SEL, %ax 487 mov %eax, %fs 488 mov $KGS_SEL, %ax 489 mov %eax, %gs 490 mov $KDS_SEL, %ax 491 mov %eax, %ds 492 mov %eax, %es 493 mov %gs:CPU_TSS, %esi 494 mov %eax, %ss 495 mov TSS_ESP0(%esi), %esp 496 mov $(KTSS_SEL), %ax 497 ltr %ax 498 xorw %ax, %ax /* clear LDTR */ 499 lldt %ax 500 mov %cr0, %edx 501 and $~(CR0_TS|CR0_EM), %edx /* clear emulate math chip bit */ 502 or $(CR0_MP|CR0_NE), %edx 503 mov %edx, %cr0 /* set machine status word */ 504 505 /* 506 * Before going any farther, enable usage of page table NX bit if 507 * that's how our page tables are set up. (PCIDE is enabled later on). 508 */ 509 bt $X86FSET_NX, x86_featureset 510 jnc 1f 511 movl %cr4, %ecx 512 andl $CR4_PAE, %ecx 513 jz 1f 514 movl $MSR_AMD_EFER, %ecx 515 rdmsr 516 orl $AMD_EFER_NXE, %eax 517 wrmsr 518 1: 519 mov %gs:CPU_THREAD, %eax /* get thread ptr */ 520 call *T_PC(%eax) /* call mp_startup */ 521 /* not reached */ 522 int $20 /* whoops, returned somehow! */ 523 #endif 524 525 SET_SIZE(real_mode_start_cpu) 526 527 #endif /* __amd64 */ 528 529 #if defined(__amd64) 530 531 ENTRY_NP(real_mode_stop_cpu_stage1) 532 533 #if !defined(__GNUC_AS__) 534 535 /* 536 * For vulcan as we need to do a .code32 and mentally invert the 537 * meaning of the addr16 and data16 prefixes to get 32-bit access when 538 * generating code to be executed in 16-bit mode (sigh...) 539 */ 540 .code32 541 cli 542 movw %cs, %ax 543 movw %ax, %ds /* load cs into ds */ 544 movw %ax, %ss /* and into ss */ 545 546 /* 547 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 548 */ 549 movw $CPUHALTCODEOFF, %ax 550 .byte 0xff, 0xe0 /* jmp *%ax */ 551 552 #else /* __GNUC_AS__ */ 553 554 /* 555 * NOTE: The GNU assembler automatically does the right thing to 556 * generate data size operand prefixes based on the code size 557 * generation mode (e.g. .code16, .code32, .code64) and as such 558 * prefixes need not be used on instructions EXCEPT in the case 559 * of address prefixes for code for which the reference is not 560 * automatically of the default operand size. 561 */ 562 .code16 563 cli 564 movw %cs, %ax 565 movw %ax, %ds /* load cs into ds */ 566 movw %ax, %ss /* and into ss */ 567 568 /* 569 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 570 */ 571 movw $CPUHALTCODEOFF, %ax 572 jmp *%ax 573 574 #endif /* !__GNUC_AS__ */ 575 576 .globl real_mode_stop_cpu_stage1_end 577 real_mode_stop_cpu_stage1_end: 578 nop 579 580 SET_SIZE(real_mode_stop_cpu_stage1) 581 582 #elif defined(__i386) 583 584 ENTRY_NP(real_mode_stop_cpu_stage1) 585 586 #if !defined(__GNUC_AS__) 587 588 cli 589 D16 movw %cs, %eax 590 movw %eax, %ds /* load cs into ds */ 591 movw %eax, %ss /* and into ss */ 592 593 /* 594 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 595 */ 596 movw $CPUHALTCODEOFF, %ax 597 .byte 0xff, 0xe0 /* jmp *%ax */ 598 599 #else /* __GNUC_AS__ */ 600 601 cli 602 mov %cs, %ax 603 mov %eax, %ds /* load cs into ds */ 604 mov %eax, %ss /* and into ss */ 605 606 /* 607 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 608 */ 609 movw $CPUHALTCODEOFF, %ax 610 jmp *%ax 611 612 #endif /* !__GNUC_AS__ */ 613 614 .globl real_mode_stop_cpu_stage1_end 615 real_mode_stop_cpu_stage1_end: 616 nop 617 618 SET_SIZE(real_mode_stop_cpu_stage1) 619 620 #endif /* __amd64 */ 621 622 ENTRY_NP(real_mode_stop_cpu_stage2) 623 624 movw $0xdead, %ax 625 movw %ax, CPUHALTEDOFF 626 627 real_mode_stop_cpu_loop: 628 /* 629 * Put CPU into halted state. 630 * Only INIT, SMI, NMI could break the loop. 631 */ 632 hlt 633 jmp real_mode_stop_cpu_loop 634 635 .globl real_mode_stop_cpu_stage2_end 636 real_mode_stop_cpu_stage2_end: 637 nop 638 639 SET_SIZE(real_mode_stop_cpu_stage2) 640 641 #endif /* __lint */