1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright (c) 2010, Intel Corporation.
  26  * All rights reserved.
  27  *
  28  * Copyright 2018 Joyent, Inc.
  29  */
  30         
  31 #include <sys/asm_linkage.h>
  32 #include <sys/asm_misc.h>
  33 #include <sys/regset.h>
  34 #include <sys/privregs.h>
  35 #include <sys/x86_archext.h>
  36 
  37 #if !defined(__lint)
  38 #include <sys/segments.h>
  39 #include "assym.h"
  40 #endif
  41 
  42 /*
  43  *      Our assumptions:
  44  *              - We are running in real mode.
  45  *              - Interrupts are disabled.
  46  *              - Selectors are equal (cs == ds == ss) for all real mode code
  47  *              - The GDT, IDT, ktss and page directory has been built for us
  48  *
  49  *      Our actions:
  50  *      Start CPU:
  51  *              - We start using our GDT by loading correct values in the
  52  *                selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
  53  *                gs=KGS_SEL).
  54  *              - We change over to using our IDT.
  55  *              - We load the default LDT into the hardware LDT register.
  56  *              - We load the default TSS into the hardware task register.
  57  *              - call mp_startup(void) indirectly through the T_PC
  58  *      Stop CPU:
  59  *              - Put CPU into halted state with interrupts disabled
  60  *
  61  */
  62 
  63 #if defined(__lint)
  64 
  65 void
  66 real_mode_start_cpu(void)
  67 {}
  68 
  69 void
  70 real_mode_stop_cpu_stage1(void)
  71 {}
  72 
  73 void
  74 real_mode_stop_cpu_stage2(void)
  75 {}
  76 
  77 #else   /* __lint */
  78 
  79 #if defined(__amd64)
  80 
  81         ENTRY_NP(real_mode_start_cpu)
  82 
  83         /*
  84          * NOTE:  The GNU assembler automatically does the right thing to
  85          *        generate data size operand prefixes based on the code size
  86          *        generation mode (e.g. .code16, .code32, .code64) and as such
  87          *        prefixes need not be used on instructions EXCEPT in the case
  88          *        of address prefixes for code for which the reference is not
  89          *        automatically of the default operand size.
  90          */      
  91         .code16
  92         cli
  93         movw            %cs, %ax
  94         movw            %ax, %ds        /* load cs into ds */
  95         movw            %ax, %ss        /* and into ss */
  96 
  97         /*
  98          * Helps in debugging by giving us the fault address.
  99          *
 100          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
 101          */
 102         movl            $0xffc, %esp
 103         movl            %cr0, %eax
 104 
 105         /*
 106          * Enable protected-mode, write protect, and alignment mask
 107          */
 108         orl             $(CR0_PE|CR0_WP|CR0_AM), %eax
 109         movl            %eax, %cr0
 110 
 111         /*
 112          * Do a jmp immediately after writing to cr0 when enabling protected
 113          * mode to clear the real mode prefetch queue (per Intel's docs)
 114          */
 115         jmp             pestart
 116 
 117 pestart:
 118         /*
 119          * 16-bit protected mode is now active, so prepare to turn on long
 120          * mode.
 121          *
 122          * Note that we currently assume that if we're attempting to run a
 123          * kernel compiled with (__amd64) #defined, the target CPU has long
 124          * mode support.
 125          */
 126 
 127 #if 0
 128         /*
 129          * If there's a chance this might not be true, the following test should
 130          * be done, with the no_long_mode branch then doing something
 131          * appropriate:
 132          */
 133 
 134         movl            $0x80000000, %eax       /* get largest extended CPUID */
 135         cpuid
 136         cmpl            $0x80000000, %eax       /* check if > 0x80000000 */
 137         jbe             no_long_mode            /* nope, no long mode */
 138         movl            $0x80000001, %eax       
 139         cpuid                                   /* get extended feature flags */
 140         btl             $29, %edx               /* check for long mode */
 141         jnc             no_long_mode            /* long mode not supported */
 142 #endif
 143 
 144         /*
 145          * Add any initial cr4 bits
 146          */
 147         movl            %cr4, %eax
 148         addr32 orl      CR4OFF, %eax
 149 
 150         /*
 151          * Enable PAE mode (CR4.PAE)
 152          */
 153         orl             $CR4_PAE, %eax
 154         movl            %eax, %cr4
 155 
 156         /*
 157          * Point cr3 to the 64-bit long mode page tables.
 158          *
 159          * Note that these MUST exist in 32-bit space, as we don't have
 160          * a way to load %cr3 with a 64-bit base address for the page tables
 161          * until the CPU is actually executing in 64-bit long mode.
 162          */
 163         addr32 movl     CR3OFF, %eax
 164         movl            %eax, %cr3
 165 
 166         /*
 167          * Set long mode enable in EFER (EFER.LME = 1)
 168          */
 169         movl    $MSR_AMD_EFER, %ecx
 170         rdmsr
 171         orl     $AMD_EFER_LME, %eax
 172         wrmsr
 173 
 174         /*
 175          * Finally, turn on paging (CR0.PG = 1) to activate long mode.
 176          */
 177         movl    %cr0, %eax
 178         orl     $CR0_PG, %eax
 179         movl    %eax, %cr0
 180 
 181         /*
 182          * The instruction after enabling paging in CR0 MUST be a branch.
 183          */
 184         jmp     long_mode_active
 185 
 186 long_mode_active:
 187         /*
 188          * Long mode is now active but since we're still running with the
 189          * original 16-bit CS we're actually in 16-bit compatability mode.
 190          *
 191          * We have to load an intermediate GDT and IDT here that we know are
 192          * in 32-bit space before we can use the kernel's GDT and IDT, which
 193          * may be in the 64-bit address space, and since we're in compatability
 194          * mode, we only have access to 16 and 32-bit instructions at the
 195          * moment.
 196          */
 197         addr32 lgdtl    TEMPGDTOFF      /* load temporary GDT */
 198         addr32 lidtl    TEMPIDTOFF      /* load temporary IDT */
 199 
 200         /*
 201          * Do a far transfer to 64-bit mode.  Set the CS selector to a 64-bit
 202          * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump
 203          * to the real mode platter address of long_mode 64 as until the 64-bit
 204          * CS is in place we don't have access to 64-bit instructions and thus
 205          * can't reference a 64-bit %rip.
 206          */
 207         pushl           $TEMP_CS64_SEL
 208         addr32 pushl    LM64OFF
 209         lretl
 210 
 211         .globl  long_mode_64
 212 long_mode_64:
 213         .code64
 214         /*
 215          * We are now running in long mode with a 64-bit CS (EFER.LMA=1,
 216          * CS.L=1) so we now have access to 64-bit instructions.
 217          *
 218          * First, set the 64-bit GDT base.
 219          */
 220         .globl  rm_platter_pa
 221         movl    rm_platter_pa, %eax
 222         lgdtq   GDTROFF(%rax)           /* load 64-bit GDT */
 223 
 224         /*
 225          * Save the CPU number in %r11; get the value here since it's saved in
 226          * the real mode platter.
 227          */
 228         movl    CPUNOFF(%rax), %r11d
 229 
 230         /*
 231          * Add rm_platter_pa to %rsp to point it to the same location as seen
 232          * from 64-bit mode.
 233          */
 234         addq    %rax, %rsp
 235 
 236         /*
 237          * Now do an lretq to load CS with the appropriate selector for the
 238          * kernel's 64-bit GDT and to start executing 64-bit setup code at the
 239          * virtual address where boot originally loaded this code rather than
 240          * the copy in the real mode platter's rm_code array as we've been
 241          * doing so far.
 242          */
 243         pushq   $KCS_SEL
 244         pushq   $kernel_cs_code
 245         lretq
 246         .globl real_mode_start_cpu_end
 247 real_mode_start_cpu_end:
 248         nop
 249 
 250 kernel_cs_code:
 251         /*
 252          * Complete the balance of the setup we need to before executing
 253          * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS).
 254          */
 255         .globl  rm_platter_va
 256         movq    rm_platter_va, %rax
 257         lidtq   IDTROFF(%rax)
 258 
 259         movw    $KDS_SEL, %ax
 260         movw    %ax, %ds
 261         movw    %ax, %es
 262         movw    %ax, %ss
 263 
 264         movw    $KTSS_SEL, %ax          /* setup kernel TSS */
 265         ltr     %ax
 266 
 267         xorw    %ax, %ax                /* clear LDTR */
 268         lldt    %ax
 269 
 270         /*
 271          * Set GS to the address of the per-cpu structure as contained in
 272          * cpu[cpu_number].
 273          *
 274          * Unfortunately there's no way to set the 64-bit gsbase with a mov,
 275          * so we have to stuff the low 32 bits in %eax and the high 32 bits in
 276          * %edx, then call wrmsr.
 277          */
 278         leaq    cpu(%rip), %rdi
 279         movl    (%rdi, %r11, 8), %eax
 280         movl    4(%rdi, %r11, 8), %edx
 281         movl    $MSR_AMD_GSBASE, %ecx
 282         wrmsr
 283 
 284         /*
 285          * Init FS and KernelGSBase.
 286          *
 287          * Based on code in mlsetup(), set them both to 8G (which shouldn't be
 288          * valid until some 64-bit processes run); this will then cause an
 289          * exception in any code that tries to index off them before they are
 290          * properly setup.
 291          */
 292         xorl    %eax, %eax              /* low 32 bits = 0 */
 293         movl    $2, %edx                /* high 32 bits = 2 */
 294         movl    $MSR_AMD_FSBASE, %ecx
 295         wrmsr
 296 
 297         movl    $MSR_AMD_KGSBASE, %ecx
 298         wrmsr
 299 
 300         /*
 301          * Init %rsp to the exception stack set in tss_ist1 and create a legal
 302          * AMD64 ABI stack frame
 303          */
 304         movq    %gs:CPU_TSS, %rax
 305         movq    TSS_IST1(%rax), %rsp
 306         pushq   $0              /* null return address */
 307         pushq   $0              /* null frame pointer terminates stack trace */
 308         movq    %rsp, %rbp      /* stack aligned on 16-byte boundary */
 309 
 310         movq    %cr0, %rax
 311         andq    $~(CR0_TS|CR0_EM), %rax /* clear emulate math chip bit */
 312         orq     $(CR0_MP|CR0_NE), %rax
 313         movq    %rax, %cr0              /* set machine status word */
 314 
 315         /*
 316          * Before going any further, enable usage of page table NX bit if 
 317          * that's how our page tables are set up.
 318          */
 319         bt      $X86FSET_NX, x86_featureset(%rip)
 320         jnc     1f
 321         movl    $MSR_AMD_EFER, %ecx
 322         rdmsr
 323         orl     $AMD_EFER_NXE, %eax
 324         wrmsr
 325 1:
 326 
 327         /*
 328          * Complete the rest of the setup and call mp_startup().
 329          */
 330         movq    %gs:CPU_THREAD, %rax    /* get thread ptr */
 331         call    *T_PC(%rax)             /* call mp_startup_boot */
 332         /* not reached */
 333         int     $20                     /* whoops, returned somehow! */
 334 
 335         SET_SIZE(real_mode_start_cpu)
 336 
 337 #elif defined(__i386)
 338 
 339         ENTRY_NP(real_mode_start_cpu)
 340 
 341 #if !defined(__GNUC_AS__)
 342 
 343         cli
 344         D16 movw        %cs, %eax
 345         movw            %eax, %ds       /* load cs into ds */
 346         movw            %eax, %ss       /* and into ss */
 347 
 348         /*
 349          * Helps in debugging by giving us the fault address.
 350          *
 351          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
 352          */
 353         D16 movl        $0xffc, %esp
 354 
 355         D16 A16 lgdt    %cs:GDTROFF
 356         D16 A16 lidt    %cs:IDTROFF
 357         D16 A16 movl    %cs:CR4OFF, %eax        /* set up CR4, if desired */
 358         D16 andl        %eax, %eax
 359         D16 A16 je      no_cr4
 360 
 361         D16 movl        %eax, %ecx
 362         D16 movl        %cr4, %eax
 363         D16 orl         %ecx, %eax
 364         D16 movl        %eax, %cr4
 365 no_cr4:
 366         D16 A16 movl    %cs:CR3OFF, %eax
 367         A16 movl        %eax, %cr3
 368         movl            %cr0, %eax
 369 
 370         /*
 371          * Enable protected-mode, paging, write protect, and alignment mask
 372          */
 373         D16 orl         $[CR0_PG|CR0_PE|CR0_WP|CR0_AM], %eax
 374         movl            %eax, %cr0
 375         jmp             pestart
 376 
 377 pestart:
 378         D16 pushl       $KCS_SEL
 379         D16 pushl       $kernel_cs_code
 380         D16 lret
 381         .globl real_mode_start_cpu_end
 382 real_mode_start_cpu_end:
 383         nop
 384 
 385         .globl  kernel_cs_code
 386 kernel_cs_code:
 387         /*
 388          * At this point we are with kernel's cs and proper eip.
 389          *
 390          * We will be executing not from the copy in real mode platter,
 391          * but from the original code where boot loaded us.
 392          *
 393          * By this time GDT and IDT are loaded as is cr3.
 394          */
 395         movw    $KFS_SEL,%eax
 396         movw    %eax,%fs
 397         movw    $KGS_SEL,%eax
 398         movw    %eax,%gs
 399         movw    $KDS_SEL,%eax
 400         movw    %eax,%ds
 401         movw    %eax,%es
 402         movl    %gs:CPU_TSS,%esi
 403         movw    %eax,%ss
 404         movl    TSS_ESP0(%esi),%esp
 405         movw    $KTSS_SEL,%ax
 406         ltr     %ax
 407         xorw    %ax, %ax                /* clear LDTR */
 408         lldt    %ax
 409         movl    %cr0,%edx
 410         andl    $-1![CR0_TS|CR0_EM],%edx  /* clear emulate math chip bit */
 411         orl     $[CR0_MP|CR0_NE],%edx
 412         movl    %edx,%cr0                 /* set machine status word */
 413 
 414         /*
 415          * Before going any further, enable usage of page table NX bit if 
 416          * that's how our page tables are set up.
 417          */
 418         bt      $X86FSET_NX, x86_featureset
 419         jnc     1f
 420         movl    %cr4, %ecx
 421         andl    $CR4_PAE, %ecx
 422         jz      1f
 423         movl    $MSR_AMD_EFER, %ecx
 424         rdmsr
 425         orl     $AMD_EFER_NXE, %eax
 426         wrmsr
 427 1:
 428         movl    %gs:CPU_THREAD, %eax    /* get thread ptr */
 429         call    *T_PC(%eax)             /* call mp_startup */
 430         /* not reached */
 431         int     $20                     /* whoops, returned somehow! */
 432 
 433 #else
 434 
 435         cli
 436         mov             %cs, %ax
 437         mov             %eax, %ds       /* load cs into ds */
 438         mov             %eax, %ss       /* and into ss */
 439 
 440         /*
 441          * Helps in debugging by giving us the fault address.
 442          *
 443          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
 444          */
 445         D16 mov         $0xffc, %esp
 446 
 447         D16 A16 lgdtl   %cs:GDTROFF
 448         D16 A16 lidtl   %cs:IDTROFF
 449         D16 A16 mov     %cs:CR4OFF, %eax        /* set up CR4, if desired */
 450         D16 and         %eax, %eax
 451         D16 A16 je      no_cr4
 452 
 453         D16 mov         %eax, %ecx
 454         D16 mov         %cr4, %eax
 455         D16 or          %ecx, %eax
 456         D16 mov         %eax, %cr4
 457 no_cr4:
 458         D16 A16 mov     %cs:CR3OFF, %eax
 459         A16 mov         %eax, %cr3
 460         mov             %cr0, %eax
 461 
 462         /*
 463          * Enable protected-mode, paging, write protect, and alignment mask
 464          */
 465         D16 or          $(CR0_PG|CR0_PE|CR0_WP|CR0_AM), %eax
 466         mov             %eax, %cr0
 467         jmp             pestart
 468 
 469 pestart:
 470         D16 pushl       $KCS_SEL
 471         D16 pushl       $kernel_cs_code
 472         D16 lret
 473         .globl real_mode_start_cpu_end
 474 real_mode_start_cpu_end:
 475         nop
 476         .globl  kernel_cs_code
 477 kernel_cs_code:
 478         /*
 479          * At this point we are with kernel's cs and proper eip.
 480          *
 481          * We will be executing not from the copy in real mode platter,
 482          * but from the original code where boot loaded us.
 483          *
 484          * By this time GDT and IDT are loaded as is cr3.
 485          */
 486         mov     $KFS_SEL, %ax
 487         mov     %eax, %fs
 488         mov     $KGS_SEL, %ax
 489         mov     %eax, %gs
 490         mov     $KDS_SEL, %ax
 491         mov     %eax, %ds
 492         mov     %eax, %es
 493         mov     %gs:CPU_TSS, %esi
 494         mov     %eax, %ss
 495         mov     TSS_ESP0(%esi), %esp
 496         mov     $(KTSS_SEL), %ax
 497         ltr     %ax
 498         xorw    %ax, %ax                /* clear LDTR */
 499         lldt    %ax
 500         mov     %cr0, %edx
 501         and     $~(CR0_TS|CR0_EM), %edx /* clear emulate math chip bit */
 502         or      $(CR0_MP|CR0_NE), %edx
 503         mov     %edx, %cr0              /* set machine status word */
 504 
 505         /*
 506          * Before going any farther, enable usage of page table NX bit if 
 507          * that's how our page tables are set up.  (PCIDE is enabled later on).
 508          */
 509         bt      $X86FSET_NX, x86_featureset
 510         jnc     1f
 511         movl    %cr4, %ecx
 512         andl    $CR4_PAE, %ecx
 513         jz      1f
 514         movl    $MSR_AMD_EFER, %ecx
 515         rdmsr
 516         orl     $AMD_EFER_NXE, %eax
 517         wrmsr
 518 1:
 519         mov     %gs:CPU_THREAD, %eax    /* get thread ptr */
 520         call    *T_PC(%eax)             /* call mp_startup */
 521         /* not reached */
 522         int     $20                     /* whoops, returned somehow! */
 523 #endif
 524 
 525         SET_SIZE(real_mode_start_cpu)
 526 
 527 #endif  /* __amd64 */
 528 
 529 #if defined(__amd64)
 530 
 531         ENTRY_NP(real_mode_stop_cpu_stage1)
 532 
 533 #if !defined(__GNUC_AS__)
 534 
 535         /*
 536          * For vulcan as we need to do a .code32 and mentally invert the
 537          * meaning of the addr16 and data16 prefixes to get 32-bit access when
 538          * generating code to be executed in 16-bit mode (sigh...)
 539          */
 540         .code32
 541         cli
 542         movw            %cs, %ax
 543         movw            %ax, %ds        /* load cs into ds */
 544         movw            %ax, %ss        /* and into ss */
 545 
 546         /*
 547          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 548          */
 549         movw            $CPUHALTCODEOFF, %ax
 550         .byte           0xff, 0xe0      /* jmp *%ax */
 551 
 552 #else   /* __GNUC_AS__ */
 553 
 554         /*
 555          * NOTE:  The GNU assembler automatically does the right thing to
 556          *        generate data size operand prefixes based on the code size
 557          *        generation mode (e.g. .code16, .code32, .code64) and as such
 558          *        prefixes need not be used on instructions EXCEPT in the case
 559          *        of address prefixes for code for which the reference is not
 560          *        automatically of the default operand size.
 561          */      
 562         .code16
 563         cli
 564         movw            %cs, %ax
 565         movw            %ax, %ds        /* load cs into ds */
 566         movw            %ax, %ss        /* and into ss */
 567 
 568         /*
 569          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 570          */
 571         movw            $CPUHALTCODEOFF, %ax
 572         jmp             *%ax
 573 
 574 #endif  /* !__GNUC_AS__ */
 575 
 576         .globl real_mode_stop_cpu_stage1_end
 577 real_mode_stop_cpu_stage1_end:
 578         nop
 579 
 580         SET_SIZE(real_mode_stop_cpu_stage1)
 581 
 582 #elif defined(__i386)
 583 
 584         ENTRY_NP(real_mode_stop_cpu_stage1)
 585 
 586 #if !defined(__GNUC_AS__)
 587 
 588         cli
 589         D16 movw        %cs, %eax
 590         movw            %eax, %ds       /* load cs into ds */
 591         movw            %eax, %ss       /* and into ss */
 592 
 593         /*
 594          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 595          */
 596         movw            $CPUHALTCODEOFF, %ax
 597         .byte           0xff, 0xe0      /* jmp *%ax */
 598 
 599 #else   /* __GNUC_AS__ */
 600 
 601         cli
 602         mov             %cs, %ax
 603         mov             %eax, %ds       /* load cs into ds */
 604         mov             %eax, %ss       /* and into ss */
 605 
 606         /*
 607          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 608          */
 609         movw            $CPUHALTCODEOFF, %ax
 610         jmp             *%ax
 611 
 612 #endif  /* !__GNUC_AS__ */
 613 
 614         .globl real_mode_stop_cpu_stage1_end
 615 real_mode_stop_cpu_stage1_end:
 616         nop
 617 
 618         SET_SIZE(real_mode_stop_cpu_stage1)
 619 
 620 #endif  /* __amd64 */
 621 
 622         ENTRY_NP(real_mode_stop_cpu_stage2)
 623 
 624         movw            $0xdead, %ax
 625         movw            %ax, CPUHALTEDOFF
 626 
 627 real_mode_stop_cpu_loop:
 628         /*
 629          * Put CPU into halted state.
 630          * Only INIT, SMI, NMI could break the loop.
 631          */
 632         hlt
 633         jmp             real_mode_stop_cpu_loop
 634 
 635         .globl real_mode_stop_cpu_stage2_end
 636 real_mode_stop_cpu_stage2_end:
 637         nop
 638 
 639         SET_SIZE(real_mode_stop_cpu_stage2)
 640 
 641 #endif  /* __lint */