Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/startup.c
          +++ new/usr/src/uts/i86pc/os/startup.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
  25   25   * Copyright 2017 Nexenta Systems, Inc.
  26      - * Copyright 2015 Joyent, Inc.
       26 + * Copyright (c) 2018 Joyent, Inc.
  27   27   * Copyright (c) 2015 by Delphix. All rights reserved.
  28   28   */
  29   29  /*
  30   30   * Copyright (c) 2010, Intel Corporation.
  31   31   * All rights reserved.
  32   32   */
  33   33  
  34   34  #include <sys/types.h>
  35   35  #include <sys/t_lock.h>
  36   36  #include <sys/param.h>
↓ open down ↓ 402 lines elided ↑ open up ↑
 439  439   *                      |                       |
 440  440   * 0xFFFFFFFF.FFC00000  |-----------------------|- ARGSBASE
 441  441   *                      |       debugger (?)    |
 442  442   * 0xFFFFFFFF.FF800000  |-----------------------|- SEGDEBUGBASE
 443  443   *                      |      unused           |
 444  444   *                      +-----------------------+
 445  445   *                      |      Kernel Data      |
 446  446   * 0xFFFFFFFF.FBC00000  |-----------------------|
 447  447   *                      |      Kernel Text      |
 448  448   * 0xFFFFFFFF.FB800000  |-----------------------|- KERNEL_TEXT
 449      - *                      |---       GDT       ---|- GDT page (GDT_VA)
 450  449   *                      |---    debug info   ---|- debug info (DEBUG_INFO_VA)
      450 + *                      |---       GDT       ---|- GDT page (GDT_VA)
      451 + *                      |---       IDT       ---|- IDT page (IDT_VA)
      452 + *                      |---       LDT       ---|- LDT pages (LDT_VA)
 451  453   *                      |                       |
 452  454   *                      |      Core heap        | (used for loadable modules)
 453  455   * 0xFFFFFFFF.C0000000  |-----------------------|- core_base / ekernelheap
 454  456   *                      |        Kernel         |
 455  457   *                      |         heap          |
 456  458   * 0xFFFFFXXX.XXX00000  |-----------------------|- kernelheap (floating)
 457  459   *                      |        segmap         |
 458  460   * 0xFFFFFXXX.XXX00000  |-----------------------|- segmap_start (floating)
 459  461   *                      |    device mappings    |
 460  462   * 0xFFFFFXXX.XXX00000  |-----------------------|- toxic_addr (floating)
↓ open down ↓ 483 lines elided ↑ open up ↑
 944  946          if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0)
 945  947                  panic("cannot attach segkpm");
 946  948  
 947  949          b.prot = PROT_READ | PROT_WRITE;
 948  950          b.nvcolors = 1;
 949  951  
 950  952          if (segkpm_create(segkpm, (caddr_t)&b) != 0)
 951  953                  panic("segkpm_create segkpm");
 952  954  
 953  955          rw_exit(&kas.a_lock);
      956 +
      957 +        kpm_enable = 1;
      958 +
      959 +        /*
      960 +         * As the KPM was disabled while setting up the system, go back and fix
      961 +         * CPU zero's access to its user page table. This is a bit gross, but
      962 +         * we have a chicken and egg problem otherwise.
      963 +         */
      964 +        ASSERT(CPU->cpu_hat_info->hci_user_l3ptes == NULL);
      965 +        CPU->cpu_hat_info->hci_user_l3ptes =
      966 +            (x86pte_t *)hat_kpm_mapin_pfn(CPU->cpu_hat_info->hci_user_l3pfn);
 954  967  }
 955  968  
 956  969  /*
 957  970   * The debug info page provides enough information to allow external
 958  971   * inspectors (e.g. when running under a hypervisor) to bootstrap
 959  972   * themselves into allowing full-blown kernel debugging.
 960  973   */
 961  974  static void
 962  975  init_debug_info(void)
 963  976  {
↓ open down ↓ 443 lines elided ↑ open up ↑
1407 1420  #endif
1408 1421  }
1409 1422  
1410 1423  /*
1411 1424   * Layout the kernel's part of address space and initialize kmem allocator.
1412 1425   */
1413 1426  static void
1414 1427  startup_kmem(void)
1415 1428  {
1416 1429          extern void page_set_colorequiv_arr(void);
     1430 +#if !defined(__xpv)
     1431 +        extern uint64_t kpti_kbase;
     1432 +#endif
1417 1433  
1418 1434          PRM_POINT("startup_kmem() starting...");
1419 1435  
1420 1436  #if defined(__amd64)
1421 1437          if (eprom_kernelbase && eprom_kernelbase != KERNELBASE)
1422 1438                  cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit "
1423 1439                      "systems.");
1424 1440          kernelbase = segkpm_base - KERNEL_REDZONE_SIZE;
1425 1441          core_base = (uintptr_t)COREHEAP_BASE;
1426 1442          core_size = (size_t)MISC_VA_BASE - COREHEAP_BASE;
↓ open down ↓ 42 lines elided ↑ open up ↑
1469 1485           * XXX  The problem with this sort of hackery is that the
1470 1486           *      compiler just may feel like putting the const declarations
1471 1487           *      (in param.c) into the .text section.  Perhaps they should
1472 1488           *      just be declared as variables there?
1473 1489           */
1474 1490  
1475 1491          *(uintptr_t *)&_kernelbase = kernelbase;
1476 1492          *(uintptr_t *)&_userlimit = kernelbase;
1477 1493  #if defined(__amd64)
1478 1494          *(uintptr_t *)&_userlimit -= KERNELBASE - USERLIMIT;
     1495 +#if !defined(__xpv)
     1496 +        kpti_kbase = kernelbase;
     1497 +#endif
1479 1498  #else
1480 1499          *(uintptr_t *)&_userlimit32 = _userlimit;
1481 1500  #endif
1482 1501          PRM_DEBUG(_kernelbase);
1483 1502          PRM_DEBUG(_userlimit);
1484 1503          PRM_DEBUG(_userlimit32);
1485 1504  
     1505 +        /* We have to re-do this now that we've modified _userlimit. */
     1506 +        mmu_calc_user_slots();
     1507 +
1486 1508          layout_kernel_va();
1487 1509  
1488 1510  #if defined(__i386)
1489 1511          /*
1490 1512           * If segmap is too large we can push the bottom of the kernel heap
1491 1513           * higher than the base.  Or worse, it could exceed the top of the
1492 1514           * VA space entirely, causing it to wrap around.
1493 1515           */
1494 1516          if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase)
1495 1517                  panic("too little address space available for kernelheap,"
↓ open down ↓ 618 lines elided ↑ open up ↑
2114 2136  #endif  /* __i386 */
2115 2137  
2116 2138  
2117 2139          /*
2118 2140           * Now that we've got more VA, as well as the ability to allocate from
2119 2141           * it, tell the debugger.
2120 2142           */
2121 2143          if (boothowto & RB_DEBUG)
2122 2144                  kdi_dvec_memavail();
2123 2145  
2124      -        /*
2125      -         * The following code installs a special page fault handler (#pf)
2126      -         * to work around a pentium bug.
2127      -         */
2128      -#if !defined(__amd64) && !defined(__xpv)
2129      -        if (x86_type == X86_TYPE_P5) {
2130      -                desctbr_t idtr;
2131      -                gate_desc_t *newidt;
2132      -
2133      -                if ((newidt = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP)) == NULL)
2134      -                        panic("failed to install pentium_pftrap");
2135      -
2136      -                bcopy(idt0, newidt, NIDT * sizeof (*idt0));
2137      -                set_gatesegd(&newidt[T_PGFLT], &pentium_pftrap,
2138      -                    KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
2139      -
2140      -                (void) as_setprot(&kas, (caddr_t)newidt, MMU_PAGESIZE,
2141      -                    PROT_READ | PROT_EXEC);
2142      -
2143      -                CPU->cpu_idt = newidt;
2144      -                idtr.dtr_base = (uintptr_t)CPU->cpu_idt;
2145      -                idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
2146      -                wr_idtr(&idtr);
2147      -        }
2148      -#endif  /* !__amd64 */
2149      -
2150 2146  #if !defined(__xpv)
2151 2147          /*
2152 2148           * Map page pfn=0 for drivers, such as kd, that need to pick up
2153 2149           * parameters left there by controllers/BIOS.
2154 2150           */
2155 2151          PRM_POINT("setup up p0_va");
2156 2152          p0_va = i86devmap(0, 1, PROT_READ);
2157 2153          PRM_DEBUG(p0_va);
2158 2154  #endif
2159 2155  
↓ open down ↓ 42 lines elided ↑ open up ↑
2202 2198                  panic("startup: segkp_create failed");
2203 2199                  /*NOTREACHED*/
2204 2200          }
2205 2201          PRM_DEBUG(segkp);
2206 2202          rw_exit(&kas.a_lock);
2207 2203  
2208 2204          /*
2209 2205           * kpm segment
2210 2206           */
2211 2207          segmap_kpm = 0;
2212      -        if (kpm_desired) {
     2208 +        if (kpm_desired)
2213 2209                  kpm_init();
2214      -                kpm_enable = 1;
2215      -        }
2216 2210  
2217 2211          /*
2218 2212           * Now create segmap segment.
2219 2213           */
2220 2214          rw_enter(&kas.a_lock, RW_WRITER);
2221 2215          if (seg_attach(&kas, (caddr_t)segmap_start, segmapsize, segmap) < 0) {
2222 2216                  panic("cannot attach segmap");
2223 2217                  /*NOTREACHED*/
2224 2218          }
2225 2219          PRM_DEBUG(segmap);
↓ open down ↓ 102 lines elided ↑ open up ↑
2328 2322           */
2329 2323          PRM_POINT("NULLing out bootops");
2330 2324          *bootopsp = (struct bootops *)NULL;
2331 2325          bootops = (struct bootops *)NULL;
2332 2326  
2333 2327  #if defined(__xpv)
2334 2328          ec_init_debug_irq();
2335 2329          xs_domu_init();
2336 2330  #endif
2337 2331  
2338      -#if defined(__amd64) && !defined(__xpv)
     2332 +#if !defined(__xpv)
2339 2333          /*
2340 2334           * Intel IOMMU has been setup/initialized in ddi_impl.c
2341 2335           * Start it up now.
2342 2336           */
2343 2337          immu_startup();
     2338 +
     2339 +        /*
     2340 +         * Now that we're no longer going to drop into real mode for a BIOS call
     2341 +         * via bootops, we can enable PCID (which requires CR0.PG).
     2342 +         */
     2343 +        enable_pcid();
2344 2344  #endif
2345 2345  
2346 2346          PRM_POINT("Enabling interrupts");
2347 2347          (*picinitf)();
2348 2348          sti();
2349 2349  #if defined(__xpv)
2350 2350          ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0);
2351 2351          xen_late_startup();
2352 2352  #endif
2353 2353  
↓ open down ↓ 1038 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX