1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright (c) 2010, Intel Corporation.
  26  * All rights reserved.
  27  *
  28  * Copyright 2019 Joyent, Inc.
  29  */
  30 
  31 #include <sys/asm_linkage.h>
  32 #include <sys/asm_misc.h>
  33 #include <sys/regset.h>
  34 #include <sys/privregs.h>
  35 #include <sys/x86_archext.h>
  36 
  37 #include <sys/segments.h>
  38 #include "assym.h"
  39 
  40 /*
  41  *      Our assumptions:
  42  *              - We are running in real mode.
  43  *              - Interrupts are disabled.
  44  *              - Selectors are equal (cs == ds == ss) for all real mode code
  45  *              - The GDT, IDT, ktss and page directory has been built for us
  46  *
  47  *      Our actions:
  48  *      Start CPU:
  49  *              - We start using our GDT by loading correct values in the
  50  *                selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
  51  *                gs=KGS_SEL).
  52  *              - We change over to using our IDT.
  53  *              - We load the default LDT into the hardware LDT register.
  54  *              - We load the default TSS into the hardware task register.
  55  *              - call mp_startup(void) indirectly through the T_PC
  56  *      Stop CPU:
  57  *              - Put CPU into halted state with interrupts disabled
  58  *
  59  */
  60 
  61         ENTRY_NP(real_mode_start_cpu)
  62 
  63         /*
  64          * NOTE:  The GNU assembler automatically does the right thing to
  65          *        generate data size operand prefixes based on the code size
  66          *        generation mode (e.g. .code16, .code32, .code64) and as such
  67          *        prefixes need not be used on instructions EXCEPT in the case
  68          *        of address prefixes for code for which the reference is not
  69          *        automatically of the default operand size.
  70          */
  71         .code16
  72         cli
  73         movw            %cs, %ax
  74         movw            %ax, %ds        /* load cs into ds */
  75         movw            %ax, %ss        /* and into ss */
  76 
  77         /*
  78          * Helps in debugging by giving us the fault address.
  79          *
  80          * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
  81          */
  82         movl            $0xffc, %esp
  83         movl            %cr0, %eax
  84 
  85         /*
  86          * Enable protected-mode, write protect, and alignment mask
  87          */
  88         orl             $(CR0_PE|CR0_WP|CR0_AM), %eax
  89         movl            %eax, %cr0
  90 
  91         /*
  92          * Do a jmp immediately after writing to cr0 when enabling protected
  93          * mode to clear the real mode prefetch queue (per Intel's docs)
  94          */
  95         jmp             pestart
  96 
  97 pestart:
  98         /*
  99          * 16-bit protected mode is now active, so prepare to turn on long
 100          * mode.
 101          */
 102 
 103         /*
 104          * Add any initial cr4 bits
 105          */
 106         movl            %cr4, %eax
 107         addr32 orl      CR4OFF, %eax
 108 
 109         /*
 110          * Enable PAE mode (CR4.PAE)
 111          */
 112         orl             $CR4_PAE, %eax
 113         movl            %eax, %cr4
 114 
 115         /*
 116          * Point cr3 to the 64-bit long mode page tables.
 117          *
 118          * Note that these MUST exist in 32-bit space, as we don't have
 119          * a way to load %cr3 with a 64-bit base address for the page tables
 120          * until the CPU is actually executing in 64-bit long mode.
 121          */
 122         addr32 movl     CR3OFF, %eax
 123         movl            %eax, %cr3
 124 
 125         /*
 126          * Set long mode enable in EFER (EFER.LME = 1)
 127          */
 128         movl    $MSR_AMD_EFER, %ecx
 129         rdmsr
 130         orl     $AMD_EFER_LME, %eax
 131         wrmsr
 132 
 133         /*
 134          * Finally, turn on paging (CR0.PG = 1) to activate long mode.
 135          */
 136         movl    %cr0, %eax
 137         orl     $CR0_PG, %eax
 138         movl    %eax, %cr0
 139 
 140         /*
 141          * The instruction after enabling paging in CR0 MUST be a branch.
 142          */
 143         jmp     long_mode_active
 144 
 145 long_mode_active:
 146         /*
 147          * Long mode is now active but since we're still running with the
 148          * original 16-bit CS we're actually in 16-bit compatability mode.
 149          *
 150          * We have to load an intermediate GDT and IDT here that we know are
 151          * in 32-bit space before we can use the kernel's GDT and IDT, which
 152          * may be in the 64-bit address space, and since we're in compatability
 153          * mode, we only have access to 16 and 32-bit instructions at the
 154          * moment.
 155          */
 156         addr32 lgdtl    TEMPGDTOFF      /* load temporary GDT */
 157         addr32 lidtl    TEMPIDTOFF      /* load temporary IDT */
 158 
 159         /*
 160          * Do a far transfer to 64-bit mode.  Set the CS selector to a 64-bit
 161          * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump
 162          * to the real mode platter address of long_mode 64 as until the 64-bit
 163          * CS is in place we don't have access to 64-bit instructions and thus
 164          * can't reference a 64-bit %rip.
 165          */
 166         pushl           $TEMP_CS64_SEL
 167         addr32 pushl    LM64OFF
 168         lretl
 169 
 170         .globl  long_mode_64
 171 long_mode_64:
 172         .code64
 173         /*
 174          * We are now running in long mode with a 64-bit CS (EFER.LMA=1,
 175          * CS.L=1) so we now have access to 64-bit instructions.
 176          *
 177          * First, set the 64-bit GDT base.
 178          */
 179         .globl  rm_platter_pa
 180         movl    rm_platter_pa, %eax
 181         lgdtq   GDTROFF(%rax)           /* load 64-bit GDT */
 182 
 183         /*
 184          * Save the CPU number in %r11; get the value here since it's saved in
 185          * the real mode platter.
 186          */
 187         movl    CPUNOFF(%rax), %r11d
 188 
 189         /*
 190          * Add rm_platter_pa to %rsp to point it to the same location as seen
 191          * from 64-bit mode.
 192          */
 193         addq    %rax, %rsp
 194 
 195         /*
 196          * Now do an lretq to load CS with the appropriate selector for the
 197          * kernel's 64-bit GDT and to start executing 64-bit setup code at the
 198          * virtual address where boot originally loaded this code rather than
 199          * the copy in the real mode platter's rm_code array as we've been
 200          * doing so far.
 201          */
 202         pushq   $KCS_SEL
 203         pushq   $kernel_cs_code
 204         lretq
 205         .globl real_mode_start_cpu_end
 206 real_mode_start_cpu_end:
 207         nop
 208 
 209 kernel_cs_code:
 210         /*
 211          * Complete the balance of the setup we need to before executing
 212          * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS).
 213          */
 214         .globl  rm_platter_va
 215         movq    rm_platter_va, %rax
 216         lidtq   IDTROFF(%rax)
 217 
 218         movw    $KDS_SEL, %ax
 219         movw    %ax, %ds
 220         movw    %ax, %es
 221         movw    %ax, %ss
 222 
 223         movw    $KTSS_SEL, %ax          /* setup kernel TSS */
 224         ltr     %ax
 225 
 226         xorw    %ax, %ax                /* clear LDTR */
 227         lldt    %ax
 228 
 229         /*
 230          * Set GS to the address of the per-cpu structure as contained in
 231          * cpu[cpu_number].
 232          *
 233          * Unfortunately there's no way to set the 64-bit gsbase with a mov,
 234          * so we have to stuff the low 32 bits in %eax and the high 32 bits in
 235          * %edx, then call wrmsr.
 236          */
 237         leaq    cpu(%rip), %rdi
 238         movl    (%rdi, %r11, 8), %eax
 239         movl    4(%rdi, %r11, 8), %edx
 240         movl    $MSR_AMD_GSBASE, %ecx
 241         wrmsr
 242 
 243         /*
 244          * Init FS and KernelGSBase.
 245          *
 246          * Based on code in mlsetup(), set them both to 8G (which shouldn't be
 247          * valid until some 64-bit processes run); this will then cause an
 248          * exception in any code that tries to index off them before they are
 249          * properly setup.
 250          */
 251         xorl    %eax, %eax              /* low 32 bits = 0 */
 252         movl    $2, %edx                /* high 32 bits = 2 */
 253         movl    $MSR_AMD_FSBASE, %ecx
 254         wrmsr
 255 
 256         movl    $MSR_AMD_KGSBASE, %ecx
 257         wrmsr
 258 
 259         /*
 260          * Init %rsp to the exception stack set in tss_ist1 and create a legal
 261          * AMD64 ABI stack frame
 262          */
 263         movq    %gs:CPU_TSS, %rax
 264         movq    TSS_IST1(%rax), %rsp
 265         pushq   $0              /* null return address */
 266         pushq   $0              /* null frame pointer terminates stack trace */
 267         movq    %rsp, %rbp      /* stack aligned on 16-byte boundary */
 268 
 269         movq    %cr0, %rax
 270         andq    $~(CR0_TS|CR0_EM), %rax /* clear emulate math chip bit */
 271         orq     $(CR0_MP|CR0_NE), %rax
 272         movq    %rax, %cr0              /* set machine status word */
 273 
 274         /*
 275          * Before going any further, enable usage of page table NX bit if
 276          * that's how our page tables are set up.
 277          */
 278         bt      $X86FSET_NX, x86_featureset(%rip)
 279         jnc     1f
 280         movl    $MSR_AMD_EFER, %ecx
 281         rdmsr
 282         orl     $AMD_EFER_NXE, %eax
 283         wrmsr
 284 1:
 285 
 286         /*
 287          * Complete the rest of the setup and call mp_startup().
 288          */
 289         movq    %gs:CPU_THREAD, %rax    /* get thread ptr */
 290         movq    T_PC(%rax), %rax
 291         INDIRECT_CALL_REG(rax)          /* call mp_startup_boot */
 292         /* not reached */
 293         int     $20                     /* whoops, returned somehow! */
 294 
 295         SET_SIZE(real_mode_start_cpu)
 296 
 297         ENTRY_NP(real_mode_stop_cpu_stage1)
 298 
 299 #if !defined(__GNUC_AS__)
 300 
 301         /*
 302          * For vulcan as we need to do a .code32 and mentally invert the
 303          * meaning of the addr16 and data16 prefixes to get 32-bit access when
 304          * generating code to be executed in 16-bit mode (sigh...)
 305          */
 306         .code32
 307         cli
 308         movw            %cs, %ax
 309         movw            %ax, %ds        /* load cs into ds */
 310         movw            %ax, %ss        /* and into ss */
 311 
 312         /*
 313          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 314          */
 315         movw            $CPUHALTCODEOFF, %ax
 316         .byte           0xff, 0xe0      /* jmp *%ax */
 317 
 318 #else   /* __GNUC_AS__ */
 319 
 320         /*
 321          * NOTE:  The GNU assembler automatically does the right thing to
 322          *        generate data size operand prefixes based on the code size
 323          *        generation mode (e.g. .code16, .code32, .code64) and as such
 324          *        prefixes need not be used on instructions EXCEPT in the case
 325          *        of address prefixes for code for which the reference is not
 326          *        automatically of the default operand size.
 327          */
 328         .code16
 329         cli
 330         movw            %cs, %ax
 331         movw            %ax, %ds        /* load cs into ds */
 332         movw            %ax, %ss        /* and into ss */
 333 
 334         /*
 335          * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
 336          */
 337         movw            $CPUHALTCODEOFF, %ax
 338         jmp             *%ax
 339 
 340 #endif  /* !__GNUC_AS__ */
 341 
 342         .globl real_mode_stop_cpu_stage1_end
 343 real_mode_stop_cpu_stage1_end:
 344         nop
 345 
 346         SET_SIZE(real_mode_stop_cpu_stage1)
 347 
 348         ENTRY_NP(real_mode_stop_cpu_stage2)
 349 
 350         movw            $0xdead, %ax
 351         movw            %ax, CPUHALTEDOFF
 352 
 353 real_mode_stop_cpu_loop:
 354         /*
 355          * Put CPU into halted state.
 356          * Only INIT, SMI, NMI could break the loop.
 357          */
 358         hlt
 359         jmp             real_mode_stop_cpu_loop
 360 
 361         .globl real_mode_stop_cpu_stage2_end
 362 real_mode_stop_cpu_stage2_end:
 363         nop
 364 
 365         SET_SIZE(real_mode_stop_cpu_stage2)
 366