1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright (c) 2010, Intel Corporation.
  26  * All rights reserved.
  27  *
  28  * Copyright 2019 Joyent, Inc.
  29  */
  30 
  31 #include <sys/asm_linkage.h>
  32 #include <sys/asm_misc.h>
  33 #include <sys/regset.h>
  34 #include <sys/privregs.h>
  35 #include <sys/x86_archext.h>
  36 
  37 #if !defined(__lint)
  38 #include <sys/segments.h>
  39 #include "assym.h"
  40 #endif
  41 
  42 /*
  43  *      Our assumptions:
  44  *              - We are running in real mode.
  45  *              - Interrupts are disabled.
  46  *              - Selectors are equal (cs == ds == ss) for all real mode code
  47  *              - The GDT, IDT, ktss and page directory has been built for us
  48  *
  49  *      Our actions:
  50  *      Start CPU:
  51  *              - We start using our GDT by loading correct values in the
  52  *                selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
  53  *                gs=KGS_SEL).
  54  *              - We change over to using our IDT.
  55  *              - We load the default LDT into the hardware LDT register.
  56  *              - We load the default TSS into the hardware task register.
  57  *              - call mp_startup(void) indirectly through the T_PC
  58  *      Stop CPU:
  59  *              - Put CPU into halted state with interrupts disabled
  60  *
  61  */
  62 
  63 #if defined(__lint)
  64 
  65 void
  66 real_mode_start_cpu(void)
  67 {}
  68 
  69 void
  70 real_mode_stop_cpu_stage1(void)
  71 {}
  72 
  73 void
  74 real_mode_stop_cpu_stage2(void)
  75 {}
  76 
  77 #else   /* __lint */
  78 
  79 #if defined(__amd64)
  80 
  81         ENTRY_NP(real_mode_start_cpu)
  82 
  83         /*
  84          * NOTE:  The GNU assembler automatically does the right thing to
  85          *        generate data size operand prefixes based on the code size
  86          *        generation mode (e.g. .code16, .code32, .code64) and as such
  87          *        prefixes need not be used on instructions EXCEPT in the case
  88          *        of address prefixes for code for which the reference is not
  89          *        automatically of the default operand size.
  90          */
  91         .code16
  92         cli
  93         movw            %cs, %ax
  94         movw            %ax, %ds        /* load cs into ds */
  95         movw            %ax, %ss        /* and into ss */
  96 
  97         /*
  98          * Helps in debugging by giving us the fault address.
  99          *
 100          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
 101          */
 102         movl            $0xffc, %esp
 103         movl            %cr0, %eax
 104 
 105         /*
 106          * Enable protected-mode, write protect, and alignment mask
 107          */
 108         orl             $(CR0_PE|CR0_WP|CR0_AM), %eax
 109         movl            %eax, %cr0
 110 
 111         /*
 112          * Do a jmp immediately after writing to cr0 when enabling protected
 113          * mode to clear the real mode prefetch queue (per Intel's docs)
 114          */
 115         jmp             pestart
 116 
 117 pestart:
 118         /*
 119          * 16-bit protected mode is now active, so prepare to turn on long
 120          * mode.
 121          *
 122          * Note that we currently assume that if we're attempting to run a
 123          * kernel compiled with (__amd64) #defined, the target CPU has long
 124          * mode support.
 125          */
 126 
 127 #if 0
 128         /*
 129          * If there's a chance this might not be true, the following test should
 130          * be done, with the no_long_mode branch then doing something
 131          * appropriate:
 132          */
 133 
 134         movl            $0x80000000, %eax       /* get largest extended CPUID */
 135         cpuid
 136         cmpl            $0x80000000, %eax       /* check if > 0x80000000 */
 137         jbe             no_long_mode            /* nope, no long mode */
 138         movl            $0x80000001, %eax
 139         cpuid                                   /* get extended feature flags */
 140         btl             $29, %edx               /* check for long mode */
 141         jnc             no_long_mode            /* long mode not supported */
 142 #endif
 143 
 144         /*
 145          * Add any initial cr4 bits
 146          */
 147         movl            %cr4, %eax
 148         addr32 orl      CR4OFF, %eax
 149 
 150         /*
 151          * Enable PAE mode (CR4.PAE)
 152          */
 153         orl             $CR4_PAE, %eax
 154         movl            %eax, %cr4
 155 
 156         /*
 157          * Point cr3 to the 64-bit long mode page tables.
 158          *
 159          * Note that these MUST exist in 32-bit space, as we don't have
 160          * a way to load %cr3 with a 64-bit base address for the page tables
 161          * until the CPU is actually executing in 64-bit long mode.
 162          */
 163         addr32 movl     CR3OFF, %eax
 164         movl            %eax, %cr3
 165 
 166         /*
 167          * Set long mode enable in EFER (EFER.LME = 1)
 168          */
 169         movl    $MSR_AMD_EFER, %ecx
 170         rdmsr
 171         orl     $AMD_EFER_LME, %eax
 172         wrmsr
 173 
 174         /*
 175          * Finally, turn on paging (CR0.PG = 1) to activate long mode.
 176          */
 177         movl    %cr0, %eax
 178         orl     $CR0_PG, %eax
 179         movl    %eax, %cr0
 180 
 181         /*
 182          * The instruction after enabling paging in CR0 MUST be a branch.
 183          */
 184         jmp     long_mode_active
 185 
 186 long_mode_active:
 187         /*
 188          * Long mode is now active but since we're still running with the
 189          * original 16-bit CS we're actually in 16-bit compatability mode.
 190          *
 191          * We have to load an intermediate GDT and IDT here that we know are
 192          * in 32-bit space before we can use the kernel's GDT and IDT, which
 193          * may be in the 64-bit address space, and since we're in compatability
 194          * mode, we only have access to 16 and 32-bit instructions at the
 195          * moment.
 196          */
 197         addr32 lgdtl    TEMPGDTOFF      /* load temporary GDT */
 198         addr32 lidtl    TEMPIDTOFF      /* load temporary IDT */
 199 
 200         /*
 201          * Do a far transfer to 64-bit mode.  Set the CS selector to a 64-bit
 202          * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump
 203          * to the real mode platter address of long_mode 64 as until the 64-bit
 204          * CS is in place we don't have access to 64-bit instructions and thus
 205          * can't reference a 64-bit %rip.
 206          */
 207         pushl           $TEMP_CS64_SEL
 208         addr32 pushl    LM64OFF
 209         lretl
 210 
 211         .globl  long_mode_64
 212 long_mode_64:
 213         .code64
 214         /*
 215          * We are now running in long mode with a 64-bit CS (EFER.LMA=1,
 216          * CS.L=1) so we now have access to 64-bit instructions.
 217          *
 218          * First, set the 64-bit GDT base.
 219          */
 220         .globl  rm_platter_pa
 221         movl    rm_platter_pa, %eax
 222         lgdtq   GDTROFF(%rax)           /* load 64-bit GDT */
 223 
 224         /*
 225          * Save the CPU number in %r11; get the value here since it's saved in
 226          * the real mode platter.
 227          */
 228         movl    CPUNOFF(%rax), %r11d
 229 
 230         /*
 231          * Add rm_platter_pa to %rsp to point it to the same location as seen
 232          * from 64-bit mode.
 233          */
 234         addq    %rax, %rsp
 235 
 236         /*
 237          * Now do an lretq to load CS with the appropriate selector for the
 238          * kernel's 64-bit GDT and to start executing 64-bit setup code at the
 239          * virtual address where boot originally loaded this code rather than
 240          * the copy in the real mode platter's rm_code array as we've been
 241          * doing so far.
 242          */
 243         pushq   $KCS_SEL
 244         pushq   $kernel_cs_code
 245         lretq
 246         .globl real_mode_start_cpu_end
 247 real_mode_start_cpu_end:
 248         nop
 249 
 250 kernel_cs_code:
 251         /*
 252          * Complete the balance of the setup we need to before executing
 253          * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS).
 254          */
 255         .globl  rm_platter_va
 256         movq    rm_platter_va, %rax
 257         lidtq   IDTROFF(%rax)
 258 
 259         movw    $KDS_SEL, %ax
 260         movw    %ax, %ds
 261         movw    %ax, %es
 262         movw    %ax, %ss
 263 
 264         movw    $KTSS_SEL, %ax          /* setup kernel TSS */
 265         ltr     %ax
 266 
 267         xorw    %ax, %ax                /* clear LDTR */
 268         lldt    %ax
 269 
 270         /*
 271          * Set GS to the address of the per-cpu structure as contained in
 272          * cpu[cpu_number].
 273          *
 274          * Unfortunately there's no way to set the 64-bit gsbase with a mov,
 275          * so we have to stuff the low 32 bits in %eax and the high 32 bits in
 276          * %edx, then call wrmsr.
 277          */
 278         leaq    cpu(%rip), %rdi
 279         movl    (%rdi, %r11, 8), %eax
 280         movl    4(%rdi, %r11, 8), %edx
 281         movl    $MSR_AMD_GSBASE, %ecx
 282         wrmsr
 283 
 284         /*
 285          * Init FS and KernelGSBase.
 286          *
 287          * Based on code in mlsetup(), set them both to 8G (which shouldn't be
 288          * valid until some 64-bit processes run); this will then cause an
 289          * exception in any code that tries to index off them before they are
 290          * properly setup.
 291          */
 292         xorl    %eax, %eax              /* low 32 bits = 0 */
 293         movl    $2, %edx                /* high 32 bits = 2 */
 294         movl    $MSR_AMD_FSBASE, %ecx
 295         wrmsr
 296 
 297         movl    $MSR_AMD_KGSBASE, %ecx
 298         wrmsr
 299 
 300         /*
 301          * Init %rsp to the exception stack set in tss_ist1 and create a legal
 302          * AMD64 ABI stack frame
 303          */
 304         movq    %gs:CPU_TSS, %rax
 305         movq    TSS_IST1(%rax), %rsp
 306         pushq   $0              /* null return address */
 307         pushq   $0              /* null frame pointer terminates stack trace */
 308         movq    %rsp, %rbp      /* stack aligned on 16-byte boundary */
 309 
 310         movq    %cr0, %rax
 311         andq    $~(CR0_TS|CR0_EM), %rax /* clear emulate math chip bit */
 312         orq     $(CR0_MP|CR0_NE), %rax
 313         movq    %rax, %cr0              /* set machine status word */
 314 
 315         /*
 316          * Before going any further, enable usage of page table NX bit if
 317          * that's how our page tables are set up.
 318          */
 319         bt      $X86FSET_NX, x86_featureset(%rip)
 320         jnc     1f
 321         movl    $MSR_AMD_EFER, %ecx
 322         rdmsr
 323         orl     $AMD_EFER_NXE, %eax
 324         wrmsr
 325 1:
 326 
 327         /*
 328          * Complete the rest of the setup and call mp_startup().
 329          */
 330         movq    %gs:CPU_THREAD, %rax    /* get thread ptr */
 331         movq    T_PC(%rax), %rax
 332         INDIRECT_CALL_REG(rax)          /* call mp_startup_boot */
 333         /* not reached */
 334         int     $20                     /* whoops, returned somehow! */
 335 
 336         SET_SIZE(real_mode_start_cpu)
 337 
 338 #elif defined(__i386)
 339 
 340         ENTRY_NP(real_mode_start_cpu)
 341 
 342 #if !defined(__GNUC_AS__)
 343 
 344         cli
 345         D16 movw        %cs, %eax
 346         movw            %eax, %ds       /* load cs into ds */
 347         movw            %eax, %ss       /* and into ss */
 348 
 349         /*
 350          * Helps in debugging by giving us the fault address.
 351          *
 352          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
 353          */
 354         D16 movl        $0xffc, %esp
 355 
 356         D16 A16 lgdt    %cs:GDTROFF
 357         D16 A16 lidt    %cs:IDTROFF
 358         D16 A16 movl    %cs:CR4OFF, %eax        /* set up CR4, if desired */
 359         D16 andl        %eax, %eax
 360         D16 A16 je      no_cr4
 361 
 362         D16 movl        %eax, %ecx
 363         D16 movl        %cr4, %eax
 364         D16 orl         %ecx, %eax
 365         D16 movl        %eax, %cr4
 366 no_cr4:
 367         D16 A16 movl    %cs:CR3OFF, %eax
 368         A16 movl        %eax, %cr3
 369         movl            %cr0, %eax
 370 
 371         /*
 372          * Enable protected-mode, paging, write protect, and alignment mask
 373          */
 374         D16 orl         $[CR0_PG|CR0_PE|CR0_WP|CR0_AM], %eax
 375         movl            %eax, %cr0
 376         jmp             pestart
 377 
 378 pestart:
 379         D16 pushl       $KCS_SEL
 380         D16 pushl       $kernel_cs_code
 381         D16 lret
 382         .globl real_mode_start_cpu_end
 383 real_mode_start_cpu_end:
 384         nop
 385 
 386         .globl  kernel_cs_code
 387 kernel_cs_code:
 388         /*
 389          * At this point we are with kernel's cs and proper eip.
 390          *
 391          * We will be executing not from the copy in real mode platter,
 392          * but from the original code where boot loaded us.
 393          *
 394          * By this time GDT and IDT are loaded as is cr3.
 395          */
 396         movw    $KFS_SEL,%eax
 397         movw    %eax,%fs
 398         movw    $KGS_SEL,%eax
 399         movw    %eax,%gs
 400         movw    $KDS_SEL,%eax
 401         movw    %eax,%ds
 402         movw    %eax,%es
 403         movl    %gs:CPU_TSS,%esi
 404         movw    %eax,%ss
 405         movl    TSS_ESP0(%esi),%esp
 406         movw    $KTSS_SEL,%ax
 407         ltr     %ax
 408         xorw    %ax, %ax                /* clear LDTR */
 409         lldt    %ax
 410         movl    %cr0,%edx
 411         andl    $-1![CR0_TS|CR0_EM],%edx  /* clear emulate math chip bit */
 412         orl     $[CR0_MP|CR0_NE],%edx
 413         movl    %edx,%cr0                 /* set machine status word */
 414 
 415         /*
 416          * Before going any further, enable usage of page table NX bit if
 417          * that's how our page tables are set up.
 418          */
 419         bt      $X86FSET_NX, x86_featureset
 420         jnc     1f
 421         movl    %cr4, %ecx
 422         andl    $CR4_PAE, %ecx
 423         jz      1f
 424         movl    $MSR_AMD_EFER, %ecx
 425         rdmsr
 426         orl     $AMD_EFER_NXE, %eax
 427         wrmsr
 428 1:
 429         movl    %gs:CPU_THREAD, %eax    /* get thread ptr */
 430         call    *T_PC(%eax)             /* call mp_startup */
 431         /* not reached */
 432         int     $20                     /* whoops, returned somehow! */
 433 
 434 #else
 435 
 436         cli
 437         mov             %cs, %ax
 438         mov             %eax, %ds       /* load cs into ds */
 439         mov             %eax, %ss       /* and into ss */
 440 
 441         /*
 442          * Helps in debugging by giving us the fault address.
 443          *
 444          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
 445          */
 446         D16 mov         $0xffc, %esp
 447 
 448         D16 A16 lgdtl   %cs:GDTROFF
 449         D16 A16 lidtl   %cs:IDTROFF
 450         D16 A16 mov     %cs:CR4OFF, %eax        /* set up CR4, if desired */
 451         D16 and         %eax, %eax
 452         D16 A16 je      no_cr4
 453 
 454         D16 mov         %eax, %ecx
 455         D16 mov         %cr4, %eax
 456         D16 or          %ecx, %eax
 457         D16 mov         %eax, %cr4
 458 no_cr4:
 459         D16 A16 mov     %cs:CR3OFF, %eax
 460         A16 mov         %eax, %cr3
 461         mov             %cr0, %eax
 462 
 463         /*
 464          * Enable protected-mode, paging, write protect, and alignment mask
 465          */
 466         D16 or          $(CR0_PG|CR0_PE|CR0_WP|CR0_AM), %eax
 467         mov             %eax, %cr0
 468         jmp             pestart
 469 
 470 pestart:
 471         D16 pushl       $KCS_SEL
 472         D16 pushl       $kernel_cs_code
 473         D16 lret
 474         .globl real_mode_start_cpu_end
 475 real_mode_start_cpu_end:
 476         nop
 477         .globl  kernel_cs_code
 478 kernel_cs_code:
 479         /*
 480          * At this point we are with kernel's cs and proper eip.
 481          *
 482          * We will be executing not from the copy in real mode platter,
 483          * but from the original code where boot loaded us.
 484          *
 485          * By this time GDT and IDT are loaded as is cr3.
 486          */
 487         mov     $KFS_SEL, %ax
 488         mov     %eax, %fs
 489         mov     $KGS_SEL, %ax
 490         mov     %eax, %gs
 491         mov     $KDS_SEL, %ax
 492         mov     %eax, %ds
 493         mov     %eax, %es
 494         mov     %gs:CPU_TSS, %esi
 495         mov     %eax, %ss
 496         mov     TSS_ESP0(%esi), %esp
 497         mov     $(KTSS_SEL), %ax
 498         ltr     %ax
 499         xorw    %ax, %ax                /* clear LDTR */
 500         lldt    %ax
 501         mov     %cr0, %edx
 502         and     $~(CR0_TS|CR0_EM), %edx /* clear emulate math chip bit */
 503         or      $(CR0_MP|CR0_NE), %edx
 504         mov     %edx, %cr0              /* set machine status word */
 505 
 506         /*
 507          * Before going any farther, enable usage of page table NX bit if
 508          * that's how our page tables are set up.  (PCIDE is enabled later on).
 509          */
 510         bt      $X86FSET_NX, x86_featureset
 511         jnc     1f
 512         movl    %cr4, %ecx
 513         andl    $CR4_PAE, %ecx
 514         jz      1f
 515         movl    $MSR_AMD_EFER, %ecx
 516         rdmsr
 517         orl     $AMD_EFER_NXE, %eax
 518         wrmsr
 519 1:
 520         mov     %gs:CPU_THREAD, %eax    /* get thread ptr */
 521         call    *T_PC(%eax)             /* call mp_startup */
 522         /* not reached */
 523         int     $20                     /* whoops, returned somehow! */
 524 #endif
 525 
 526         SET_SIZE(real_mode_start_cpu)
 527 
 528 #endif  /* __amd64 */
 529 
 530 #if defined(__amd64)
 531 
 532         ENTRY_NP(real_mode_stop_cpu_stage1)
 533 
 534 #if !defined(__GNUC_AS__)
 535 
 536         /*
 537          * For vulcan as we need to do a .code32 and mentally invert the
 538          * meaning of the addr16 and data16 prefixes to get 32-bit access when
 539          * generating code to be executed in 16-bit mode (sigh...)
 540          */
 541         .code32
 542         cli
 543         movw            %cs, %ax
 544         movw            %ax, %ds        /* load cs into ds */
 545         movw            %ax, %ss        /* and into ss */
 546 
 547         /*
 548          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 549          */
 550         movw            $CPUHALTCODEOFF, %ax
 551         .byte           0xff, 0xe0      /* jmp *%ax */
 552 
 553 #else   /* __GNUC_AS__ */
 554 
 555         /*
 556          * NOTE:  The GNU assembler automatically does the right thing to
 557          *        generate data size operand prefixes based on the code size
 558          *        generation mode (e.g. .code16, .code32, .code64) and as such
 559          *        prefixes need not be used on instructions EXCEPT in the case
 560          *        of address prefixes for code for which the reference is not
 561          *        automatically of the default operand size.
 562          */
 563         .code16
 564         cli
 565         movw            %cs, %ax
 566         movw            %ax, %ds        /* load cs into ds */
 567         movw            %ax, %ss        /* and into ss */
 568 
 569         /*
 570          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 571          */
 572         movw            $CPUHALTCODEOFF, %ax
 573         jmp             *%ax
 574 
 575 #endif  /* !__GNUC_AS__ */
 576 
 577         .globl real_mode_stop_cpu_stage1_end
 578 real_mode_stop_cpu_stage1_end:
 579         nop
 580 
 581         SET_SIZE(real_mode_stop_cpu_stage1)
 582 
 583 #elif defined(__i386)
 584 
 585         ENTRY_NP(real_mode_stop_cpu_stage1)
 586 
 587 #if !defined(__GNUC_AS__)
 588 
 589         cli
 590         D16 movw        %cs, %eax
 591         movw            %eax, %ds       /* load cs into ds */
 592         movw            %eax, %ss       /* and into ss */
 593 
 594         /*
 595          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 596          */
 597         movw            $CPUHALTCODEOFF, %ax
 598         .byte           0xff, 0xe0      /* jmp *%ax */
 599 
 600 #else   /* __GNUC_AS__ */
 601 
 602         cli
 603         mov             %cs, %ax
 604         mov             %eax, %ds       /* load cs into ds */
 605         mov             %eax, %ss       /* and into ss */
 606 
 607         /*
 608          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 609          */
 610         movw            $CPUHALTCODEOFF, %ax
 611         /*
 612          * The following indirect call is executed as part of starting up a CPU.
 613          * As such nothing else should be running on it or executing in the
 614          * system such that it is a viable Spectre v2 branch target injection
 615          * location. At least, in theory.
 616          */
 617         jmp             *%ax
 618 
 619 #endif  /* !__GNUC_AS__ */
 620 
 621         .globl real_mode_stop_cpu_stage1_end
 622 real_mode_stop_cpu_stage1_end:
 623         nop
 624 
 625         SET_SIZE(real_mode_stop_cpu_stage1)
 626 
 627 #endif  /* __amd64 */
 628 
 629         ENTRY_NP(real_mode_stop_cpu_stage2)
 630 
 631         movw            $0xdead, %ax
 632         movw            %ax, CPUHALTEDOFF
 633 
 634 real_mode_stop_cpu_loop:
 635         /*
 636          * Put CPU into halted state.
 637          * Only INIT, SMI, NMI could break the loop.
 638          */
 639         hlt
 640         jmp             real_mode_stop_cpu_loop
 641 
 642         .globl real_mode_stop_cpu_stage2_end
 643 real_mode_stop_cpu_stage2_end:
 644         nop
 645 
 646         SET_SIZE(real_mode_stop_cpu_stage2)
 647 
 648 #endif  /* __lint */