Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/mp_pc.c
          +++ new/usr/src/uts/i86pc/os/mp_pc.c
↓ open down ↓ 18 lines elided ↑ open up ↑
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  /*
  25   25   * Copyright (c) 2010, Intel Corporation.
  26   26   * All rights reserved.
  27   27   */
  28   28  /*
  29      - * Copyright 2011 Joyent, Inc. All rights reserved.
       29 + * Copyright 2018 Joyent, Inc
  30   30   */
  31   31  
  32   32  /*
  33   33   * Welcome to the world of the "real mode platter".
  34   34   * See also startup.c, mpcore.s and apic.c for related routines.
  35   35   */
  36   36  
  37   37  #include <sys/types.h>
  38   38  #include <sys/systm.h>
  39   39  #include <sys/cpuvar.h>
↓ open down ↓ 86 lines elided ↑ open up ↑
 126  126  extern void *long_mode_64(void);
 127  127  #endif  /* __amd64 */
 128  128  
 129  129  /*ARGSUSED*/
 130  130  void
 131  131  rmp_gdt_init(rm_platter_t *rm)
 132  132  {
 133  133  
 134  134  #if defined(__amd64)
 135  135          /* Use the kas address space for the CPU startup thread. */
 136      -        if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
      136 +        if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) {
 137  137                  panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
 138  138                      "located above 4G in physical memory (@ 0x%lx)",
 139      -                    MAKECR3(kas.a_hat->hat_htable->ht_pfn));
      139 +                    mmu_ptob(kas.a_hat->hat_htable->ht_pfn));
      140 +        }
 140  141  
 141  142          /*
 142  143           * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
 143  144           * by code in real_mode_start_cpu():
 144  145           *
 145  146           * GDT[0]:  NULL selector
 146  147           * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
 147  148           *
 148  149           * Clear the IDT as interrupts will be off and a limit of 0 will cause
 149  150           * the CPU to triple fault and reset on an NMI, seemingly as reasonable
↓ open down ↓ 17 lines elided ↑ open up ↑
 167  168              (uint32_t)((uintptr_t)long_mode_64 -
 168  169              (uintptr_t)real_mode_start_cpu);
 169  170  #endif  /* __amd64 */
 170  171  }
 171  172  
 172  173  static void *
 173  174  mach_cpucontext_alloc_tables(struct cpu *cp)
 174  175  {
 175  176          tss_t *ntss;
 176  177          struct cpu_tables *ct;
      178 +        size_t ctsize;
 177  179  
 178  180          /*
 179  181           * Allocate space for stack, tss, gdt and idt. We round the size
 180  182           * allotted for cpu_tables up, so that the TSS is on a unique page.
 181  183           * This is more efficient when running in virtual machines.
 182  184           */
 183      -        ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
      185 +        ctsize = P2ROUNDUP(sizeof (*ct), PAGESIZE);
      186 +        ct = kmem_zalloc(ctsize, KM_SLEEP);
 184  187          if ((uintptr_t)ct & PAGEOFFSET)
 185  188                  panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
 186  189                      cp->cpu_id);
 187  190  
 188  191          ntss = cp->cpu_tss = &ct->ct_tss;
 189  192  
 190  193  #if defined(__amd64)
      194 +        uintptr_t va;
      195 +        size_t len;
 191  196  
 192  197          /*
 193  198           * #DF (double fault).
 194  199           */
 195      -        ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)];
      200 +        ntss->tss_ist1 = (uintptr_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
 196  201  
      202 +        /*
      203 +         * #NM (non-maskable interrupt)
      204 +         */
      205 +        ntss->tss_ist2 = (uintptr_t)&ct->ct_stack2[sizeof (ct->ct_stack2)];
      206 +
      207 +        /*
      208 +         * #MC (machine check exception / hardware error)
      209 +         */
      210 +        ntss->tss_ist3 = (uintptr_t)&ct->ct_stack3[sizeof (ct->ct_stack3)];
      211 +
      212 +        /*
      213 +         * #DB, #BP debug interrupts and KDI/kmdb
      214 +         */
      215 +        ntss->tss_ist4 = (uintptr_t)&cp->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
      216 +
      217 +        if (kpti_enable == 1) {
      218 +                /*
      219 +                 * #GP, #PF, #SS fault interrupts
      220 +                 */
      221 +                ntss->tss_ist5 = (uintptr_t)&cp->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
      222 +
      223 +                /*
      224 +                 * Used by all other interrupts
      225 +                 */
      226 +                ntss->tss_ist6 = (uint64_t)&cp->cpu_m.mcpu_kpti.kf_tr_rsp;
      227 +
      228 +                /*
      229 +                 * On AMD64 we need to make sure that all of the pages of the
      230 +                 * struct cpu_tables are punched through onto the user CPU for
      231 +                 * kpti.
      232 +                 *
      233 +                 * The final page will always be the TSS, so treat that
      234 +                 * separately.
      235 +                 */
      236 +                for (va = (uintptr_t)ct, len = ctsize - MMU_PAGESIZE;
      237 +                    len >= MMU_PAGESIZE;
      238 +                    len -= MMU_PAGESIZE, va += MMU_PAGESIZE) {
      239 +                        /* The doublefault stack must be RW */
      240 +                        hati_cpu_punchin(cp, va, PROT_READ | PROT_WRITE);
      241 +                }
      242 +                ASSERT3U((uintptr_t)ntss, ==, va);
      243 +                hati_cpu_punchin(cp, (uintptr_t)ntss, PROT_READ);
      244 +        }
      245 +
 197  246  #elif defined(__i386)
 198  247  
 199  248          ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
 200      -            (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)];
      249 +            (uint32_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
 201  250  
 202  251          ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
 203  252  
 204  253          ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
 205  254  
 206  255          ntss->tss_cs = KCS_SEL;
 207  256          ntss->tss_ds = ntss->tss_es = KDS_SEL;
 208  257          ntss->tss_fs = KFS_SEL;
 209  258          ntss->tss_gs = KGS_SEL;
 210  259  
↓ open down ↓ 90 lines elided ↑ open up ↑
 301  350           * Now copy all that we've set up onto the real mode platter
 302  351           * for the real mode code to digest as part of starting the cpu.
 303  352           */
 304  353          rm->rm_idt_base = cp->cpu_idt;
 305  354          rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
 306  355          rm->rm_gdt_base = cp->cpu_gdt;
 307  356          rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
 308  357  
 309  358          /*
 310  359           * CPU needs to access kernel address space after powering on.
 311      -         * When hot-adding CPU at runtime, directly use top level page table
 312      -         * of kas other than the return value of getcr3(). getcr3() returns
 313      -         * current process's top level page table, which may be different from
 314      -         * the one of kas.
 315  360           */
 316      -        rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
      361 +        rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE);
 317  362          rm->rm_cpu = cp->cpu_id;
 318  363  
 319  364          /*
 320      -         * For hot-adding CPU at runtime, Machine Check and Performance Counter
 321      -         * should be disabled. They will be enabled on demand after CPU powers
 322      -         * on successfully
      365 +         * We need to mask off any bits set on our boot CPU that can't apply
      366 +         * while the subject CPU is initializing.  If appropriate, they are
      367 +         * enabled later on.
 323  368           */
 324  369          rm->rm_cr4 = getcr4();
 325      -        rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
      370 +        rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE);
 326  371  
 327  372          rmp_gdt_init(rm);
 328  373  
 329  374          return (ct);
 330  375  }
 331  376  
 332  377  void
 333  378  mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
 334  379  {
 335  380          struct cpu_tables *ct = arg;
↓ open down ↓ 324 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX