1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2010, Intel Corporation. 26 * All rights reserved. 27 * 28 * Copyright 2019 Joyent, Inc. 29 */ 30 31 #include <sys/asm_linkage.h> 32 #include <sys/asm_misc.h> 33 #include <sys/regset.h> 34 #include <sys/privregs.h> 35 #include <sys/x86_archext.h> 36 37 #if !defined(__lint) 38 #include <sys/segments.h> 39 #include "assym.h" 40 #endif 41 42 /* 43 * Our assumptions: 44 * - We are running in real mode. 45 * - Interrupts are disabled. 46 * - Selectors are equal (cs == ds == ss) for all real mode code 47 * - The GDT, IDT, ktss and page directory has been built for us 48 * 49 * Our actions: 50 * Start CPU: 51 * - We start using our GDT by loading correct values in the 52 * selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL, 53 * gs=KGS_SEL). 54 * - We change over to using our IDT. 55 * - We load the default LDT into the hardware LDT register. 56 * - We load the default TSS into the hardware task register. 57 * - call mp_startup(void) indirectly through the T_PC 58 * Stop CPU: 59 * - Put CPU into halted state with interrupts disabled 60 * 61 */ 62 63 #if defined(__lint) 64 65 void 66 real_mode_start_cpu(void) 67 {} 68 69 void 70 real_mode_stop_cpu_stage1(void) 71 {} 72 73 void 74 real_mode_stop_cpu_stage2(void) 75 {} 76 77 #else /* __lint */ 78 79 #if defined(__amd64) 80 81 ENTRY_NP(real_mode_start_cpu) 82 83 /* 84 * NOTE: The GNU assembler automatically does the right thing to 85 * generate data size operand prefixes based on the code size 86 * generation mode (e.g. .code16, .code32, .code64) and as such 87 * prefixes need not be used on instructions EXCEPT in the case 88 * of address prefixes for code for which the reference is not 89 * automatically of the default operand size. 90 */ 91 .code16 92 cli 93 movw %cs, %ax 94 movw %ax, %ds /* load cs into ds */ 95 movw %ax, %ss /* and into ss */ 96 97 /* 98 * Helps in debugging by giving us the fault address. 99 * 100 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack. 101 */ 102 movl $0xffc, %esp 103 movl %cr0, %eax 104 105 /* 106 * Enable protected-mode, write protect, and alignment mask 107 */ 108 orl $(CR0_PE|CR0_WP|CR0_AM), %eax 109 movl %eax, %cr0 110 111 /* 112 * Do a jmp immediately after writing to cr0 when enabling protected 113 * mode to clear the real mode prefetch queue (per Intel's docs) 114 */ 115 jmp pestart 116 117 pestart: 118 /* 119 * 16-bit protected mode is now active, so prepare to turn on long 120 * mode. 121 * 122 * Note that we currently assume that if we're attempting to run a 123 * kernel compiled with (__amd64) #defined, the target CPU has long 124 * mode support. 125 */ 126 127 #if 0 128 /* 129 * If there's a chance this might not be true, the following test should 130 * be done, with the no_long_mode branch then doing something 131 * appropriate: 132 */ 133 134 movl $0x80000000, %eax /* get largest extended CPUID */ 135 cpuid 136 cmpl $0x80000000, %eax /* check if > 0x80000000 */ 137 jbe no_long_mode /* nope, no long mode */ 138 movl $0x80000001, %eax 139 cpuid /* get extended feature flags */ 140 btl $29, %edx /* check for long mode */ 141 jnc no_long_mode /* long mode not supported */ 142 #endif 143 144 /* 145 * Add any initial cr4 bits 146 */ 147 movl %cr4, %eax 148 addr32 orl CR4OFF, %eax 149 150 /* 151 * Enable PAE mode (CR4.PAE) 152 */ 153 orl $CR4_PAE, %eax 154 movl %eax, %cr4 155 156 /* 157 * Point cr3 to the 64-bit long mode page tables. 158 * 159 * Note that these MUST exist in 32-bit space, as we don't have 160 * a way to load %cr3 with a 64-bit base address for the page tables 161 * until the CPU is actually executing in 64-bit long mode. 162 */ 163 addr32 movl CR3OFF, %eax 164 movl %eax, %cr3 165 166 /* 167 * Set long mode enable in EFER (EFER.LME = 1) 168 */ 169 movl $MSR_AMD_EFER, %ecx 170 rdmsr 171 orl $AMD_EFER_LME, %eax 172 wrmsr 173 174 /* 175 * Finally, turn on paging (CR0.PG = 1) to activate long mode. 176 */ 177 movl %cr0, %eax 178 orl $CR0_PG, %eax 179 movl %eax, %cr0 180 181 /* 182 * The instruction after enabling paging in CR0 MUST be a branch. 183 */ 184 jmp long_mode_active 185 186 long_mode_active: 187 /* 188 * Long mode is now active but since we're still running with the 189 * original 16-bit CS we're actually in 16-bit compatability mode. 190 * 191 * We have to load an intermediate GDT and IDT here that we know are 192 * in 32-bit space before we can use the kernel's GDT and IDT, which 193 * may be in the 64-bit address space, and since we're in compatability 194 * mode, we only have access to 16 and 32-bit instructions at the 195 * moment. 196 */ 197 addr32 lgdtl TEMPGDTOFF /* load temporary GDT */ 198 addr32 lidtl TEMPIDTOFF /* load temporary IDT */ 199 200 /* 201 * Do a far transfer to 64-bit mode. Set the CS selector to a 64-bit 202 * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump 203 * to the real mode platter address of long_mode 64 as until the 64-bit 204 * CS is in place we don't have access to 64-bit instructions and thus 205 * can't reference a 64-bit %rip. 206 */ 207 pushl $TEMP_CS64_SEL 208 addr32 pushl LM64OFF 209 lretl 210 211 .globl long_mode_64 212 long_mode_64: 213 .code64 214 /* 215 * We are now running in long mode with a 64-bit CS (EFER.LMA=1, 216 * CS.L=1) so we now have access to 64-bit instructions. 217 * 218 * First, set the 64-bit GDT base. 219 */ 220 .globl rm_platter_pa 221 movl rm_platter_pa, %eax 222 lgdtq GDTROFF(%rax) /* load 64-bit GDT */ 223 224 /* 225 * Save the CPU number in %r11; get the value here since it's saved in 226 * the real mode platter. 227 */ 228 movl CPUNOFF(%rax), %r11d 229 230 /* 231 * Add rm_platter_pa to %rsp to point it to the same location as seen 232 * from 64-bit mode. 233 */ 234 addq %rax, %rsp 235 236 /* 237 * Now do an lretq to load CS with the appropriate selector for the 238 * kernel's 64-bit GDT and to start executing 64-bit setup code at the 239 * virtual address where boot originally loaded this code rather than 240 * the copy in the real mode platter's rm_code array as we've been 241 * doing so far. 242 */ 243 pushq $KCS_SEL 244 pushq $kernel_cs_code 245 lretq 246 .globl real_mode_start_cpu_end 247 real_mode_start_cpu_end: 248 nop 249 250 kernel_cs_code: 251 /* 252 * Complete the balance of the setup we need to before executing 253 * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS). 254 */ 255 .globl rm_platter_va 256 movq rm_platter_va, %rax 257 lidtq IDTROFF(%rax) 258 259 movw $KDS_SEL, %ax 260 movw %ax, %ds 261 movw %ax, %es 262 movw %ax, %ss 263 264 movw $KTSS_SEL, %ax /* setup kernel TSS */ 265 ltr %ax 266 267 xorw %ax, %ax /* clear LDTR */ 268 lldt %ax 269 270 /* 271 * Set GS to the address of the per-cpu structure as contained in 272 * cpu[cpu_number]. 273 * 274 * Unfortunately there's no way to set the 64-bit gsbase with a mov, 275 * so we have to stuff the low 32 bits in %eax and the high 32 bits in 276 * %edx, then call wrmsr. 277 */ 278 leaq cpu(%rip), %rdi 279 movl (%rdi, %r11, 8), %eax 280 movl 4(%rdi, %r11, 8), %edx 281 movl $MSR_AMD_GSBASE, %ecx 282 wrmsr 283 284 /* 285 * Init FS and KernelGSBase. 286 * 287 * Based on code in mlsetup(), set them both to 8G (which shouldn't be 288 * valid until some 64-bit processes run); this will then cause an 289 * exception in any code that tries to index off them before they are 290 * properly setup. 291 */ 292 xorl %eax, %eax /* low 32 bits = 0 */ 293 movl $2, %edx /* high 32 bits = 2 */ 294 movl $MSR_AMD_FSBASE, %ecx 295 wrmsr 296 297 movl $MSR_AMD_KGSBASE, %ecx 298 wrmsr 299 300 /* 301 * Init %rsp to the exception stack set in tss_ist1 and create a legal 302 * AMD64 ABI stack frame 303 */ 304 movq %gs:CPU_TSS, %rax 305 movq TSS_IST1(%rax), %rsp 306 pushq $0 /* null return address */ 307 pushq $0 /* null frame pointer terminates stack trace */ 308 movq %rsp, %rbp /* stack aligned on 16-byte boundary */ 309 310 movq %cr0, %rax 311 andq $~(CR0_TS|CR0_EM), %rax /* clear emulate math chip bit */ 312 orq $(CR0_MP|CR0_NE), %rax 313 movq %rax, %cr0 /* set machine status word */ 314 315 /* 316 * Before going any further, enable usage of page table NX bit if 317 * that's how our page tables are set up. 318 */ 319 bt $X86FSET_NX, x86_featureset(%rip) 320 jnc 1f 321 movl $MSR_AMD_EFER, %ecx 322 rdmsr 323 orl $AMD_EFER_NXE, %eax 324 wrmsr 325 1: 326 327 /* 328 * Complete the rest of the setup and call mp_startup(). 329 */ 330 movq %gs:CPU_THREAD, %rax /* get thread ptr */ 331 movq T_PC(%rax), %rax 332 INDIRECT_CALL_REG(rax) /* call mp_startup_boot */ 333 /* not reached */ 334 int $20 /* whoops, returned somehow! */ 335 336 SET_SIZE(real_mode_start_cpu) 337 338 #elif defined(__i386) 339 340 ENTRY_NP(real_mode_start_cpu) 341 342 #if !defined(__GNUC_AS__) 343 344 cli 345 D16 movw %cs, %eax 346 movw %eax, %ds /* load cs into ds */ 347 movw %eax, %ss /* and into ss */ 348 349 /* 350 * Helps in debugging by giving us the fault address. 351 * 352 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack. 353 */ 354 D16 movl $0xffc, %esp 355 356 D16 A16 lgdt %cs:GDTROFF 357 D16 A16 lidt %cs:IDTROFF 358 D16 A16 movl %cs:CR4OFF, %eax /* set up CR4, if desired */ 359 D16 andl %eax, %eax 360 D16 A16 je no_cr4 361 362 D16 movl %eax, %ecx 363 D16 movl %cr4, %eax 364 D16 orl %ecx, %eax 365 D16 movl %eax, %cr4 366 no_cr4: 367 D16 A16 movl %cs:CR3OFF, %eax 368 A16 movl %eax, %cr3 369 movl %cr0, %eax 370 371 /* 372 * Enable protected-mode, paging, write protect, and alignment mask 373 */ 374 D16 orl $[CR0_PG|CR0_PE|CR0_WP|CR0_AM], %eax 375 movl %eax, %cr0 376 jmp pestart 377 378 pestart: 379 D16 pushl $KCS_SEL 380 D16 pushl $kernel_cs_code 381 D16 lret 382 .globl real_mode_start_cpu_end 383 real_mode_start_cpu_end: 384 nop 385 386 .globl kernel_cs_code 387 kernel_cs_code: 388 /* 389 * At this point we are with kernel's cs and proper eip. 390 * 391 * We will be executing not from the copy in real mode platter, 392 * but from the original code where boot loaded us. 393 * 394 * By this time GDT and IDT are loaded as is cr3. 395 */ 396 movw $KFS_SEL,%eax 397 movw %eax,%fs 398 movw $KGS_SEL,%eax 399 movw %eax,%gs 400 movw $KDS_SEL,%eax 401 movw %eax,%ds 402 movw %eax,%es 403 movl %gs:CPU_TSS,%esi 404 movw %eax,%ss 405 movl TSS_ESP0(%esi),%esp 406 movw $KTSS_SEL,%ax 407 ltr %ax 408 xorw %ax, %ax /* clear LDTR */ 409 lldt %ax 410 movl %cr0,%edx 411 andl $-1![CR0_TS|CR0_EM],%edx /* clear emulate math chip bit */ 412 orl $[CR0_MP|CR0_NE],%edx 413 movl %edx,%cr0 /* set machine status word */ 414 415 /* 416 * Before going any further, enable usage of page table NX bit if 417 * that's how our page tables are set up. 418 */ 419 bt $X86FSET_NX, x86_featureset 420 jnc 1f 421 movl %cr4, %ecx 422 andl $CR4_PAE, %ecx 423 jz 1f 424 movl $MSR_AMD_EFER, %ecx 425 rdmsr 426 orl $AMD_EFER_NXE, %eax 427 wrmsr 428 1: 429 movl %gs:CPU_THREAD, %eax /* get thread ptr */ 430 call *T_PC(%eax) /* call mp_startup */ 431 /* not reached */ 432 int $20 /* whoops, returned somehow! */ 433 434 #else 435 436 cli 437 mov %cs, %ax 438 mov %eax, %ds /* load cs into ds */ 439 mov %eax, %ss /* and into ss */ 440 441 /* 442 * Helps in debugging by giving us the fault address. 443 * 444 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack. 445 */ 446 D16 mov $0xffc, %esp 447 448 D16 A16 lgdtl %cs:GDTROFF 449 D16 A16 lidtl %cs:IDTROFF 450 D16 A16 mov %cs:CR4OFF, %eax /* set up CR4, if desired */ 451 D16 and %eax, %eax 452 D16 A16 je no_cr4 453 454 D16 mov %eax, %ecx 455 D16 mov %cr4, %eax 456 D16 or %ecx, %eax 457 D16 mov %eax, %cr4 458 no_cr4: 459 D16 A16 mov %cs:CR3OFF, %eax 460 A16 mov %eax, %cr3 461 mov %cr0, %eax 462 463 /* 464 * Enable protected-mode, paging, write protect, and alignment mask 465 */ 466 D16 or $(CR0_PG|CR0_PE|CR0_WP|CR0_AM), %eax 467 mov %eax, %cr0 468 jmp pestart 469 470 pestart: 471 D16 pushl $KCS_SEL 472 D16 pushl $kernel_cs_code 473 D16 lret 474 .globl real_mode_start_cpu_end 475 real_mode_start_cpu_end: 476 nop 477 .globl kernel_cs_code 478 kernel_cs_code: 479 /* 480 * At this point we are with kernel's cs and proper eip. 481 * 482 * We will be executing not from the copy in real mode platter, 483 * but from the original code where boot loaded us. 484 * 485 * By this time GDT and IDT are loaded as is cr3. 486 */ 487 mov $KFS_SEL, %ax 488 mov %eax, %fs 489 mov $KGS_SEL, %ax 490 mov %eax, %gs 491 mov $KDS_SEL, %ax 492 mov %eax, %ds 493 mov %eax, %es 494 mov %gs:CPU_TSS, %esi 495 mov %eax, %ss 496 mov TSS_ESP0(%esi), %esp 497 mov $(KTSS_SEL), %ax 498 ltr %ax 499 xorw %ax, %ax /* clear LDTR */ 500 lldt %ax 501 mov %cr0, %edx 502 and $~(CR0_TS|CR0_EM), %edx /* clear emulate math chip bit */ 503 or $(CR0_MP|CR0_NE), %edx 504 mov %edx, %cr0 /* set machine status word */ 505 506 /* 507 * Before going any farther, enable usage of page table NX bit if 508 * that's how our page tables are set up. (PCIDE is enabled later on). 509 */ 510 bt $X86FSET_NX, x86_featureset 511 jnc 1f 512 movl %cr4, %ecx 513 andl $CR4_PAE, %ecx 514 jz 1f 515 movl $MSR_AMD_EFER, %ecx 516 rdmsr 517 orl $AMD_EFER_NXE, %eax 518 wrmsr 519 1: 520 mov %gs:CPU_THREAD, %eax /* get thread ptr */ 521 call *T_PC(%eax) /* call mp_startup */ 522 /* not reached */ 523 int $20 /* whoops, returned somehow! */ 524 #endif 525 526 SET_SIZE(real_mode_start_cpu) 527 528 #endif /* __amd64 */ 529 530 #if defined(__amd64) 531 532 ENTRY_NP(real_mode_stop_cpu_stage1) 533 534 #if !defined(__GNUC_AS__) 535 536 /* 537 * For vulcan as we need to do a .code32 and mentally invert the 538 * meaning of the addr16 and data16 prefixes to get 32-bit access when 539 * generating code to be executed in 16-bit mode (sigh...) 540 */ 541 .code32 542 cli 543 movw %cs, %ax 544 movw %ax, %ds /* load cs into ds */ 545 movw %ax, %ss /* and into ss */ 546 547 /* 548 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 549 */ 550 movw $CPUHALTCODEOFF, %ax 551 .byte 0xff, 0xe0 /* jmp *%ax */ 552 553 #else /* __GNUC_AS__ */ 554 555 /* 556 * NOTE: The GNU assembler automatically does the right thing to 557 * generate data size operand prefixes based on the code size 558 * generation mode (e.g. .code16, .code32, .code64) and as such 559 * prefixes need not be used on instructions EXCEPT in the case 560 * of address prefixes for code for which the reference is not 561 * automatically of the default operand size. 562 */ 563 .code16 564 cli 565 movw %cs, %ax 566 movw %ax, %ds /* load cs into ds */ 567 movw %ax, %ss /* and into ss */ 568 569 /* 570 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 571 */ 572 movw $CPUHALTCODEOFF, %ax 573 jmp *%ax 574 575 #endif /* !__GNUC_AS__ */ 576 577 .globl real_mode_stop_cpu_stage1_end 578 real_mode_stop_cpu_stage1_end: 579 nop 580 581 SET_SIZE(real_mode_stop_cpu_stage1) 582 583 #elif defined(__i386) 584 585 ENTRY_NP(real_mode_stop_cpu_stage1) 586 587 #if !defined(__GNUC_AS__) 588 589 cli 590 D16 movw %cs, %eax 591 movw %eax, %ds /* load cs into ds */ 592 movw %eax, %ss /* and into ss */ 593 594 /* 595 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 596 */ 597 movw $CPUHALTCODEOFF, %ax 598 .byte 0xff, 0xe0 /* jmp *%ax */ 599 600 #else /* __GNUC_AS__ */ 601 602 cli 603 mov %cs, %ax 604 mov %eax, %ds /* load cs into ds */ 605 mov %eax, %ss /* and into ss */ 606 607 /* 608 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code 609 */ 610 movw $CPUHALTCODEOFF, %ax 611 /* 612 * The following indirect call is executed as part of starting up a CPU. 613 * As such nothing else should be running on it or executing in the 614 * system such that it is a viable Spectre v2 branch target injection 615 * location. At least, in theory. 616 */ 617 jmp *%ax 618 619 #endif /* !__GNUC_AS__ */ 620 621 .globl real_mode_stop_cpu_stage1_end 622 real_mode_stop_cpu_stage1_end: 623 nop 624 625 SET_SIZE(real_mode_stop_cpu_stage1) 626 627 #endif /* __amd64 */ 628 629 ENTRY_NP(real_mode_stop_cpu_stage2) 630 631 movw $0xdead, %ax 632 movw %ax, CPUHALTEDOFF 633 634 real_mode_stop_cpu_loop: 635 /* 636 * Put CPU into halted state. 637 * Only INIT, SMI, NMI could break the loop. 638 */ 639 hlt 640 jmp real_mode_stop_cpu_loop 641 642 .globl real_mode_stop_cpu_stage2_end 643 real_mode_stop_cpu_stage2_end: 644 nop 645 646 SET_SIZE(real_mode_stop_cpu_stage2) 647 648 #endif /* __lint */