Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/mp_startup.c
          +++ new/usr/src/uts/i86pc/os/mp_startup.c
↓ open down ↓ 19 lines elided ↑ open up ↑
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2010, Intel Corporation.
  27   27   * All rights reserved.
  28   28   */
  29   29  /*
  30      - * Copyright 2016 Joyent, Inc.
       30 + * Copyright 2018 Joyent, Inc.
  31   31   * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  32   32   */
  33   33  
  34   34  #include <sys/types.h>
  35   35  #include <sys/thread.h>
  36   36  #include <sys/cpuvar.h>
  37   37  #include <sys/cpu.h>
  38   38  #include <sys/t_lock.h>
  39   39  #include <sys/param.h>
  40   40  #include <sys/proc.h>
↓ open down ↓ 32 lines elided ↑ open up ↑
  73   73  #include <sys/memnode.h>
  74   74  #include <sys/pci_cfgspace.h>
  75   75  #include <sys/mach_mmu.h>
  76   76  #include <sys/sysmacros.h>
  77   77  #if defined(__xpv)
  78   78  #include <sys/hypervisor.h>
  79   79  #endif
  80   80  #include <sys/cpu_module.h>
  81   81  #include <sys/ontrap.h>
  82   82  
  83      -struct cpu      cpus[1];                        /* CPU data */
  84      -struct cpu      *cpu[NCPU] = {&cpus[0]};        /* pointers to all CPUs */
  85      -struct cpu      *cpu_free_list;                 /* list for released CPUs */
  86      -cpu_core_t      cpu_core[NCPU];                 /* cpu_core structures */
       83 +struct cpu      cpus[1] __aligned(MMU_PAGESIZE);
       84 +struct cpu      *cpu[NCPU] = {&cpus[0]};
       85 +struct cpu      *cpu_free_list;
       86 +cpu_core_t      cpu_core[NCPU];
  87   87  
  88   88  #define cpu_next_free   cpu_prev
  89   89  
  90   90  /*
  91   91   * Useful for disabling MP bring-up on a MP capable system.
  92   92   */
  93   93  int use_mp = 1;
  94   94  
  95   95  /*
  96   96   * to be set by a PSM to indicate what cpus
↓ open down ↓ 64 lines elided ↑ open up ↑
 161  161  
 162  162  /*
 163  163   * Configure syscall support on this CPU.
 164  164   */
 165  165  /*ARGSUSED*/
 166  166  void
 167  167  init_cpu_syscall(struct cpu *cp)
 168  168  {
 169  169          kpreempt_disable();
 170  170  
 171      -#if defined(__amd64)
 172  171          if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
 173  172              is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
 174  173                  uint64_t flags;
 175  174  
 176      -#if !defined(__lint)
      175 +#if !defined(__xpv)
 177  176                  /*
 178  177                   * The syscall instruction imposes a certain ordering on
 179  178                   * segment selectors, so we double-check that ordering
 180  179                   * here.
 181  180                   */
 182      -                ASSERT(KDS_SEL == KCS_SEL + 8);
 183      -                ASSERT(UDS_SEL == U32CS_SEL + 8);
 184      -                ASSERT(UCS_SEL == U32CS_SEL + 16);
      181 +                CTASSERT(KDS_SEL == KCS_SEL + 8);
      182 +                CTASSERT(UDS_SEL == U32CS_SEL + 8);
      183 +                CTASSERT(UCS_SEL == U32CS_SEL + 16);
 185  184  #endif
      185 +
 186  186                  /*
 187  187                   * Turn syscall/sysret extensions on.
 188  188                   */
 189  189                  cpu_asysc_enable();
 190  190  
 191  191                  /*
 192  192                   * Program the magic registers ..
 193  193                   */
 194  194                  wrmsr(MSR_AMD_STAR,
 195  195                      ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32);
 196      -                wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall);
 197      -                wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32);
      196 +                if (kpti_enable == 1) {
      197 +                        wrmsr(MSR_AMD_LSTAR,
      198 +                            (uint64_t)(uintptr_t)tr_sys_syscall);
      199 +                        wrmsr(MSR_AMD_CSTAR,
      200 +                            (uint64_t)(uintptr_t)tr_sys_syscall32);
      201 +                } else {
      202 +                        wrmsr(MSR_AMD_LSTAR,
      203 +                            (uint64_t)(uintptr_t)sys_syscall);
      204 +                        wrmsr(MSR_AMD_CSTAR,
      205 +                            (uint64_t)(uintptr_t)sys_syscall32);
      206 +                }
 198  207  
 199  208                  /*
 200  209                   * This list of flags is masked off the incoming
 201  210                   * %rfl when we enter the kernel.
 202  211                   */
 203  212                  flags = PS_IE | PS_T;
 204  213                  if (is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE)
 205  214                          flags |= PS_ACHK;
 206  215                  wrmsr(MSR_AMD_SFMASK, flags);
 207  216          }
 208      -#endif
 209  217  
 210  218          /*
 211      -         * On 32-bit kernels, we use sysenter/sysexit because it's too
 212      -         * hard to use syscall/sysret, and it is more portable anyway.
 213      -         *
 214  219           * On 64-bit kernels on Nocona machines, the 32-bit syscall
 215  220           * variant isn't available to 32-bit applications, but sysenter is.
 216  221           */
 217  222          if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
 218  223              is_x86_feature(x86_featureset, X86FSET_SEP)) {
 219  224  
 220      -#if !defined(__lint)
      225 +#if !defined(__xpv)
 221  226                  /*
 222  227                   * The sysenter instruction imposes a certain ordering on
 223  228                   * segment selectors, so we double-check that ordering
 224  229                   * here. See "sysenter" in Intel document 245471-012, "IA-32
 225  230                   * Intel Architecture Software Developer's Manual Volume 2:
 226  231                   * Instruction Set Reference"
 227  232                   */
 228      -                ASSERT(KDS_SEL == KCS_SEL + 8);
      233 +                CTASSERT(KDS_SEL == KCS_SEL + 8);
 229  234  
 230      -                ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
 231      -                ASSERT32(UDS_SEL == UCS_SEL + 8);
 232      -
 233      -                ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
 234      -                ASSERT64(UDS_SEL == U32CS_SEL + 8);
      235 +                CTASSERT(U32CS_SEL == ((KCS_SEL + 16) | 3));
      236 +                CTASSERT(UDS_SEL == U32CS_SEL + 8);
 235  237  #endif
 236  238  
 237  239                  cpu_sep_enable();
 238  240  
 239  241                  /*
 240  242                   * resume() sets this value to the base of the threads stack
 241  243                   * via a context handler.
 242  244                   */
 243  245                  wrmsr(MSR_INTC_SEP_ESP, 0);
 244      -                wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter);
      246 +
      247 +                if (kpti_enable == 1) {
      248 +                        wrmsr(MSR_INTC_SEP_EIP,
      249 +                            (uint64_t)(uintptr_t)tr_sys_sysenter);
      250 +                } else {
      251 +                        wrmsr(MSR_INTC_SEP_EIP,
      252 +                            (uint64_t)(uintptr_t)sys_sysenter);
      253 +                }
 245  254          }
 246  255  
 247  256          kpreempt_enable();
 248  257  }
 249  258  
 250  259  #if !defined(__xpv)
 251  260  /*
 252  261   * Configure per-cpu ID GDT
 253  262   */
 254  263  static void
↓ open down ↓ 156 lines elided ↑ open up ↑
 411  420  
 412  421  #if defined(__i386)
 413  422          /*
 414  423           * setup kernel %gs.
 415  424           */
 416  425          set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
 417  426              SEL_KPL, 0, 1);
 418  427  #endif
 419  428  
 420  429          /*
 421      -         * If we have more than one node, each cpu gets a copy of IDT
 422      -         * local to its node. If this is a Pentium box, we use cpu 0's
 423      -         * IDT. cpu 0's IDT has been made read-only to workaround the
 424      -         * cmpxchgl register bug
      430 +         * Allocate pages for the CPU LDT.
 425  431           */
 426      -        if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
      432 +        cp->cpu_m.mcpu_ldt = kmem_zalloc(LDT_CPU_SIZE, KM_SLEEP);
      433 +        cp->cpu_m.mcpu_ldt_len = 0;
      434 +
      435 +        /*
      436 +         * Allocate a per-CPU IDT and initialize the new IDT to the currently
      437 +         * runing CPU.
      438 +         */
 427  439  #if !defined(__lint)
 428      -                ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE);
      440 +        ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE);
 429  441  #endif
 430      -                cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP);
 431      -                bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE);
 432      -        } else {
 433      -                cp->cpu_idt = CPU->cpu_idt;
 434      -        }
      442 +        cp->cpu_idt = kmem_alloc(PAGESIZE, KM_SLEEP);
      443 +        bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE);
 435  444  
 436  445          /*
 437  446           * alloc space for cpuid info
 438  447           */
 439  448          cpuid_alloc_space(cp);
 440  449  #if !defined(__xpv)
 441  450          if (is_x86_feature(x86_featureset, X86FSET_MWAIT) &&
 442  451              idle_cpu_prefer_mwait) {
 443  452                  cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp);
 444  453                  cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
↓ open down ↓ 117 lines elided ↑ open up ↑
 562  571                  cpuid_mwait_free(cp);
 563  572                  cp->cpu_m.mcpu_mwait = NULL;
 564  573          }
 565  574  #endif
 566  575          cpuid_free_space(cp);
 567  576  
 568  577          if (cp->cpu_idt != CPU->cpu_idt)
 569  578                  kmem_free(cp->cpu_idt, PAGESIZE);
 570  579          cp->cpu_idt = NULL;
 571  580  
      581 +        kmem_free(cp->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
      582 +        cp->cpu_m.mcpu_ldt = NULL;
      583 +        cp->cpu_m.mcpu_ldt_len = 0;
      584 +
 572  585          kmem_free(cp->cpu_gdt, PAGESIZE);
 573  586          cp->cpu_gdt = NULL;
 574  587  
 575  588          if (cp->cpu_supp_freqs != NULL) {
 576  589                  size_t len = strlen(cp->cpu_supp_freqs) + 1;
 577  590                  kmem_free(cp->cpu_supp_freqs, len);
 578  591                  cp->cpu_supp_freqs = NULL;
 579  592          }
 580  593  
 581  594          teardown_vaddr_for_ppcopy(cp);
↓ open down ↓ 1194 lines elided ↑ open up ↑
1776 1789           * because the cpu_lock is held by the control CPU which is running
1777 1790           * mp_start_cpu_common().
1778 1791           * Need to clear CPU_QUIESCED flag before calling any function which
1779 1792           * may cause thread context switching, such as kmem_alloc() etc.
1780 1793           * The idle thread checks for CPU_QUIESCED flag and loops for ever if
1781 1794           * it's set. So the startup thread may have no chance to switch back
1782 1795           * again if it's switched away with CPU_QUIESCED set.
1783 1796           */
1784 1797          cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
1785 1798  
     1799 +        enable_pcid();
     1800 +
1786 1801          /*
1787 1802           * Setup this processor for XSAVE.
1788 1803           */
1789 1804          if (fp_save_mech == FP_XSAVE) {
1790 1805                  xsave_setup_msr(cp);
1791 1806          }
1792 1807  
1793 1808          cpuid_pass2(cp);
1794 1809          cpuid_pass3(cp);
1795 1810          cpuid_pass4(cp, NULL);
↓ open down ↓ 301 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX