Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
  25  * Copyright 2017 Nexenta Systems, Inc.
  26  * Copyright 2015 Joyent, Inc.
  27  * Copyright (c) 2015 by Delphix. All rights reserved.
  28  */
  29 /*
  30  * Copyright (c) 2010, Intel Corporation.
  31  * All rights reserved.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/t_lock.h>
  36 #include <sys/param.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/signal.h>
  39 #include <sys/systm.h>
  40 #include <sys/user.h>
  41 #include <sys/mman.h>
  42 #include <sys/vm.h>
  43 #include <sys/conf.h>
  44 #include <sys/avintr.h>
  45 #include <sys/autoconf.h>
  46 #include <sys/disp.h>


 429  *              |       user text       |
 430  * 0x08048000  -|-----------------------|
 431  *              |       user stack      |
 432  *              :                       :
 433  *              |       invalid         |
 434  * 0x00000000   +-----------------------+
 435  *
 436  *
 437  *              64-bit Kernel's Virtual memory layout. (assuming 64 bit app)
 438  *                      +-----------------------+
 439  *                      |                       |
 440  * 0xFFFFFFFF.FFC00000  |-----------------------|- ARGSBASE
 441  *                      |       debugger (?)    |
 442  * 0xFFFFFFFF.FF800000  |-----------------------|- SEGDEBUGBASE
 443  *                      |      unused           |
 444  *                      +-----------------------+
 445  *                      |      Kernel Data      |
 446  * 0xFFFFFFFF.FBC00000  |-----------------------|
 447  *                      |      Kernel Text      |
 448  * 0xFFFFFFFF.FB800000  |-----------------------|- KERNEL_TEXT
 449  *                      |---       GDT       ---|- GDT page (GDT_VA)
 450  *                      |---    debug info   ---|- debug info (DEBUG_INFO_VA)



 451  *                      |                       |
 452  *                      |      Core heap        | (used for loadable modules)
 453  * 0xFFFFFFFF.C0000000  |-----------------------|- core_base / ekernelheap
 454  *                      |        Kernel         |
 455  *                      |         heap          |
 456  * 0xFFFFFXXX.XXX00000  |-----------------------|- kernelheap (floating)
 457  *                      |        segmap         |
 458  * 0xFFFFFXXX.XXX00000  |-----------------------|- segmap_start (floating)
 459  *                      |    device mappings    |
 460  * 0xFFFFFXXX.XXX00000  |-----------------------|- toxic_addr (floating)
 461  *                      |         segzio        |
 462  * 0xFFFFFXXX.XXX00000  |-----------------------|- segzio_base (floating)
 463  *                      |         segkp         |
 464  * ---                  |-----------------------|- segkp_base (floating)
 465  *                      |   page_t structures   |  valloc_base + valloc_sz
 466  *                      |   memsegs, memlists,  |
 467  *                      |   page hash, etc.     |
 468  * 0xFFFFFF00.00000000  |-----------------------|- valloc_base (lower if >256GB)
 469  *                      |        segkpm         |
 470  * 0xFFFFFE00.00000000  |-----------------------|


 934         kpm_pgshft = MMU_PAGESHIFT;
 935         kpm_pgsz =  MMU_PAGESIZE;
 936         kpm_pgoff = MMU_PAGEOFFSET;
 937         kpmp2pshft = 0;
 938         kpmpnpgs = 1;
 939         ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0);
 940 
 941         PRM_POINT("about to create segkpm");
 942         rw_enter(&kas.a_lock, RW_WRITER);
 943 
 944         if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0)
 945                 panic("cannot attach segkpm");
 946 
 947         b.prot = PROT_READ | PROT_WRITE;
 948         b.nvcolors = 1;
 949 
 950         if (segkpm_create(segkpm, (caddr_t)&b) != 0)
 951                 panic("segkpm_create segkpm");
 952 
 953         rw_exit(&kas.a_lock);











 954 }
 955 
 956 /*
 957  * The debug info page provides enough information to allow external
 958  * inspectors (e.g. when running under a hypervisor) to bootstrap
 959  * themselves into allowing full-blown kernel debugging.
 960  */
 961 static void
 962 init_debug_info(void)
 963 {
 964         caddr_t mem;
 965         debug_info_t *di;
 966 
 967 #ifndef __lint
 968         ASSERT(sizeof (debug_info_t) < MMU_PAGESIZE);
 969 #endif
 970 
 971         mem = BOP_ALLOC(bootops, (caddr_t)DEBUG_INFO_VA, MMU_PAGESIZE,
 972             MMU_PAGESIZE);
 973 


1397         PRM_POINT("startup_memlist() done");
1398 
1399         PRM_DEBUG(valloc_sz);
1400 
1401 #if defined(__amd64)
1402         if ((availrmem >> (30 - MMU_PAGESHIFT)) >=
1403             textrepl_min_gb && l2cache_sz <= 2 << 20) {
1404                 extern size_t textrepl_size_thresh;
1405                 textrepl_size_thresh = (16 << 20) - 1;
1406         }
1407 #endif
1408 }
1409 
1410 /*
1411  * Layout the kernel's part of address space and initialize kmem allocator.
1412  */
1413 static void
1414 startup_kmem(void)
1415 {
1416         extern void page_set_colorequiv_arr(void);



1417 
1418         PRM_POINT("startup_kmem() starting...");
1419 
1420 #if defined(__amd64)
1421         if (eprom_kernelbase && eprom_kernelbase != KERNELBASE)
1422                 cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit "
1423                     "systems.");
1424         kernelbase = segkpm_base - KERNEL_REDZONE_SIZE;
1425         core_base = (uintptr_t)COREHEAP_BASE;
1426         core_size = (size_t)MISC_VA_BASE - COREHEAP_BASE;
1427 #else   /* __i386 */
1428         /*
1429          * We configure kernelbase based on:
1430          *
1431          * 1. user specified kernelbase via eeprom command. Value cannot exceed
1432          *    KERNELBASE_MAX. we large page align eprom_kernelbase
1433          *
1434          * 2. Default to KERNELBASE and adjust to 2X less the size for page_t.
1435          *    On large memory systems we must lower kernelbase to allow
1436          *    enough room for page_t's for all of memory.


1459 #endif  /* __i386 */
1460 
1461         ekernelheap = (char *)core_base;
1462         PRM_DEBUG(ekernelheap);
1463 
1464         /*
1465          * Now that we know the real value of kernelbase,
1466          * update variables that were initialized with a value of
1467          * KERNELBASE (in common/conf/param.c).
1468          *
1469          * XXX  The problem with this sort of hackery is that the
1470          *      compiler just may feel like putting the const declarations
1471          *      (in param.c) into the .text section.  Perhaps they should
1472          *      just be declared as variables there?
1473          */
1474 
1475         *(uintptr_t *)&_kernelbase = kernelbase;
1476         *(uintptr_t *)&_userlimit = kernelbase;
1477 #if defined(__amd64)
1478         *(uintptr_t *)&_userlimit -= KERNELBASE - USERLIMIT;



1479 #else
1480         *(uintptr_t *)&_userlimit32 = _userlimit;
1481 #endif
1482         PRM_DEBUG(_kernelbase);
1483         PRM_DEBUG(_userlimit);
1484         PRM_DEBUG(_userlimit32);
1485 



1486         layout_kernel_va();
1487 
1488 #if defined(__i386)
1489         /*
1490          * If segmap is too large we can push the bottom of the kernel heap
1491          * higher than the base.  Or worse, it could exceed the top of the
1492          * VA space entirely, causing it to wrap around.
1493          */
1494         if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase)
1495                 panic("too little address space available for kernelheap,"
1496                     " use eeprom for lower kernelbase or smaller segmapsize");
1497 #endif  /* __i386 */
1498 
1499         /*
1500          * Initialize the kernel heap. Note 3rd argument must be > 1st.
1501          */
1502         kernelheap_init(kernelheap, ekernelheap,
1503             kernelheap + MMU_PAGESIZE,
1504             (void *)core_base, (void *)(core_base + core_size));
1505 


2104         /*
2105          * allocate the bit map that tracks toxic pages
2106          */
2107         toxic_bit_map_len = btop((ulong_t)(valloc_base - kernelbase));
2108         PRM_DEBUG(toxic_bit_map_len);
2109         toxic_bit_map =
2110             kmem_zalloc(BT_SIZEOFMAP(toxic_bit_map_len), KM_NOSLEEP);
2111         ASSERT(toxic_bit_map != NULL);
2112         PRM_DEBUG(toxic_bit_map);
2113 
2114 #endif  /* __i386 */
2115 
2116 
2117         /*
2118          * Now that we've got more VA, as well as the ability to allocate from
2119          * it, tell the debugger.
2120          */
2121         if (boothowto & RB_DEBUG)
2122                 kdi_dvec_memavail();
2123 
2124         /*
2125          * The following code installs a special page fault handler (#pf)
2126          * to work around a pentium bug.
2127          */
2128 #if !defined(__amd64) && !defined(__xpv)
2129         if (x86_type == X86_TYPE_P5) {
2130                 desctbr_t idtr;
2131                 gate_desc_t *newidt;
2132 
2133                 if ((newidt = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP)) == NULL)
2134                         panic("failed to install pentium_pftrap");
2135 
2136                 bcopy(idt0, newidt, NIDT * sizeof (*idt0));
2137                 set_gatesegd(&newidt[T_PGFLT], &pentium_pftrap,
2138                     KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
2139 
2140                 (void) as_setprot(&kas, (caddr_t)newidt, MMU_PAGESIZE,
2141                     PROT_READ | PROT_EXEC);
2142 
2143                 CPU->cpu_idt = newidt;
2144                 idtr.dtr_base = (uintptr_t)CPU->cpu_idt;
2145                 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
2146                 wr_idtr(&idtr);
2147         }
2148 #endif  /* !__amd64 */
2149 
2150 #if !defined(__xpv)
2151         /*
2152          * Map page pfn=0 for drivers, such as kd, that need to pick up
2153          * parameters left there by controllers/BIOS.
2154          */
2155         PRM_POINT("setup up p0_va");
2156         p0_va = i86devmap(0, 1, PROT_READ);
2157         PRM_DEBUG(p0_va);
2158 #endif
2159 
2160         cmn_err(CE_CONT, "?mem = %luK (0x%lx)\n",
2161             physinstalled << (MMU_PAGESHIFT - 10), ptob(physinstalled));
2162 
2163         /*
2164          * disable automatic large pages for small memory systems or
2165          * when the disable flag is set.
2166          *
2167          * Do not yet consider page sizes larger than 2m/4m.
2168          */
2169         if (!auto_lpg_disable && mmu.max_page_level > 0) {


2192         PRM_POINT("Attaching segkp");
2193         if (segkp_fromheap) {
2194                 segkp->s_as = &kas;
2195         } else if (seg_attach(&kas, (caddr_t)segkp_base, mmu_ptob(segkpsize),
2196             segkp) < 0) {
2197                 panic("startup: cannot attach segkp");
2198                 /*NOTREACHED*/
2199         }
2200         PRM_POINT("Doing segkp_create()");
2201         if (segkp_create(segkp) != 0) {
2202                 panic("startup: segkp_create failed");
2203                 /*NOTREACHED*/
2204         }
2205         PRM_DEBUG(segkp);
2206         rw_exit(&kas.a_lock);
2207 
2208         /*
2209          * kpm segment
2210          */
2211         segmap_kpm = 0;
2212         if (kpm_desired) {
2213                 kpm_init();
2214                 kpm_enable = 1;
2215         }
2216 
2217         /*
2218          * Now create segmap segment.
2219          */
2220         rw_enter(&kas.a_lock, RW_WRITER);
2221         if (seg_attach(&kas, (caddr_t)segmap_start, segmapsize, segmap) < 0) {
2222                 panic("cannot attach segmap");
2223                 /*NOTREACHED*/
2224         }
2225         PRM_DEBUG(segmap);
2226 
2227         a.prot = PROT_READ | PROT_WRITE;
2228         a.shmsize = 0;
2229         a.nfreelist = segmapfreelists;
2230 
2231         if (segmap_create(segmap, (caddr_t)&a) != 0)
2232                 panic("segmap_create segmap");
2233         rw_exit(&kas.a_lock);
2234 
2235         setup_vaddr_for_ppcopy(CPU);


2318          * Set the isa_list string to the defined instruction sets we
2319          * support.
2320          */
2321         setx86isalist();
2322         cpu_intr_alloc(CPU, NINTR_THREADS);
2323         psm_install();
2324 
2325         /*
2326          * We're done with bootops.  We don't unmap the bootstrap yet because
2327          * we're still using bootsvcs.
2328          */
2329         PRM_POINT("NULLing out bootops");
2330         *bootopsp = (struct bootops *)NULL;
2331         bootops = (struct bootops *)NULL;
2332 
2333 #if defined(__xpv)
2334         ec_init_debug_irq();
2335         xs_domu_init();
2336 #endif
2337 
2338 #if defined(__amd64) && !defined(__xpv)
2339         /*
2340          * Intel IOMMU has been setup/initialized in ddi_impl.c
2341          * Start it up now.
2342          */
2343         immu_startup();






2344 #endif
2345 
2346         PRM_POINT("Enabling interrupts");
2347         (*picinitf)();
2348         sti();
2349 #if defined(__xpv)
2350         ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0);
2351         xen_late_startup();
2352 #endif
2353 
2354         (void) add_avsoftintr((void *)&softlevel1_hdl, 1, softlevel1,
2355             "softlevel1", NULL, NULL); /* XXX to be moved later */
2356 
2357         /*
2358          * Register software interrupt handlers for ddi_periodic_add(9F).
2359          * Software interrupts up to the level 10 are supported.
2360          */
2361         for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
2362                 (void) add_avsoftintr((void *)&softlevel_hdl[i-1], i,
2363                     (avfunc)ddi_periodic_softintr, "ddi_periodic",




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
  25  * Copyright 2017 Nexenta Systems, Inc.
  26  * Copyright (c) 2018 Joyent, Inc.
  27  * Copyright (c) 2015 by Delphix. All rights reserved.
  28  */
  29 /*
  30  * Copyright (c) 2010, Intel Corporation.
  31  * All rights reserved.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/t_lock.h>
  36 #include <sys/param.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/signal.h>
  39 #include <sys/systm.h>
  40 #include <sys/user.h>
  41 #include <sys/mman.h>
  42 #include <sys/vm.h>
  43 #include <sys/conf.h>
  44 #include <sys/avintr.h>
  45 #include <sys/autoconf.h>
  46 #include <sys/disp.h>


 429  *              |       user text       |
 430  * 0x08048000  -|-----------------------|
 431  *              |       user stack      |
 432  *              :                       :
 433  *              |       invalid         |
 434  * 0x00000000   +-----------------------+
 435  *
 436  *
 437  *              64-bit Kernel's Virtual memory layout. (assuming 64 bit app)
 438  *                      +-----------------------+
 439  *                      |                       |
 440  * 0xFFFFFFFF.FFC00000  |-----------------------|- ARGSBASE
 441  *                      |       debugger (?)    |
 442  * 0xFFFFFFFF.FF800000  |-----------------------|- SEGDEBUGBASE
 443  *                      |      unused           |
 444  *                      +-----------------------+
 445  *                      |      Kernel Data      |
 446  * 0xFFFFFFFF.FBC00000  |-----------------------|
 447  *                      |      Kernel Text      |
 448  * 0xFFFFFFFF.FB800000  |-----------------------|- KERNEL_TEXT

 449  *                      |---    debug info   ---|- debug info (DEBUG_INFO_VA)
 450  *                      |---       GDT       ---|- GDT page (GDT_VA)
 451  *                      |---       IDT       ---|- IDT page (IDT_VA)
 452  *                      |---       LDT       ---|- LDT pages (LDT_VA)
 453  *                      |                       |
 454  *                      |      Core heap        | (used for loadable modules)
 455  * 0xFFFFFFFF.C0000000  |-----------------------|- core_base / ekernelheap
 456  *                      |        Kernel         |
 457  *                      |         heap          |
 458  * 0xFFFFFXXX.XXX00000  |-----------------------|- kernelheap (floating)
 459  *                      |        segmap         |
 460  * 0xFFFFFXXX.XXX00000  |-----------------------|- segmap_start (floating)
 461  *                      |    device mappings    |
 462  * 0xFFFFFXXX.XXX00000  |-----------------------|- toxic_addr (floating)
 463  *                      |         segzio        |
 464  * 0xFFFFFXXX.XXX00000  |-----------------------|- segzio_base (floating)
 465  *                      |         segkp         |
 466  * ---                  |-----------------------|- segkp_base (floating)
 467  *                      |   page_t structures   |  valloc_base + valloc_sz
 468  *                      |   memsegs, memlists,  |
 469  *                      |   page hash, etc.     |
 470  * 0xFFFFFF00.00000000  |-----------------------|- valloc_base (lower if >256GB)
 471  *                      |        segkpm         |
 472  * 0xFFFFFE00.00000000  |-----------------------|


 936         kpm_pgshft = MMU_PAGESHIFT;
 937         kpm_pgsz =  MMU_PAGESIZE;
 938         kpm_pgoff = MMU_PAGEOFFSET;
 939         kpmp2pshft = 0;
 940         kpmpnpgs = 1;
 941         ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0);
 942 
 943         PRM_POINT("about to create segkpm");
 944         rw_enter(&kas.a_lock, RW_WRITER);
 945 
 946         if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0)
 947                 panic("cannot attach segkpm");
 948 
 949         b.prot = PROT_READ | PROT_WRITE;
 950         b.nvcolors = 1;
 951 
 952         if (segkpm_create(segkpm, (caddr_t)&b) != 0)
 953                 panic("segkpm_create segkpm");
 954 
 955         rw_exit(&kas.a_lock);
 956 
 957         kpm_enable = 1;
 958 
 959         /*
 960          * As the KPM was disabled while setting up the system, go back and fix
 961          * CPU zero's access to its user page table. This is a bit gross, but
 962          * we have a chicken and egg problem otherwise.
 963          */
 964         ASSERT(CPU->cpu_hat_info->hci_user_l3ptes == NULL);
 965         CPU->cpu_hat_info->hci_user_l3ptes =
 966             (x86pte_t *)hat_kpm_mapin_pfn(CPU->cpu_hat_info->hci_user_l3pfn);
 967 }
 968 
 969 /*
 970  * The debug info page provides enough information to allow external
 971  * inspectors (e.g. when running under a hypervisor) to bootstrap
 972  * themselves into allowing full-blown kernel debugging.
 973  */
 974 static void
 975 init_debug_info(void)
 976 {
 977         caddr_t mem;
 978         debug_info_t *di;
 979 
 980 #ifndef __lint
 981         ASSERT(sizeof (debug_info_t) < MMU_PAGESIZE);
 982 #endif
 983 
 984         mem = BOP_ALLOC(bootops, (caddr_t)DEBUG_INFO_VA, MMU_PAGESIZE,
 985             MMU_PAGESIZE);
 986 


1410         PRM_POINT("startup_memlist() done");
1411 
1412         PRM_DEBUG(valloc_sz);
1413 
1414 #if defined(__amd64)
1415         if ((availrmem >> (30 - MMU_PAGESHIFT)) >=
1416             textrepl_min_gb && l2cache_sz <= 2 << 20) {
1417                 extern size_t textrepl_size_thresh;
1418                 textrepl_size_thresh = (16 << 20) - 1;
1419         }
1420 #endif
1421 }
1422 
1423 /*
1424  * Layout the kernel's part of address space and initialize kmem allocator.
1425  */
1426 static void
1427 startup_kmem(void)
1428 {
1429         extern void page_set_colorequiv_arr(void);
1430 #if !defined(__xpv)
1431         extern uint64_t kpti_kbase;
1432 #endif
1433 
1434         PRM_POINT("startup_kmem() starting...");
1435 
1436 #if defined(__amd64)
1437         if (eprom_kernelbase && eprom_kernelbase != KERNELBASE)
1438                 cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit "
1439                     "systems.");
1440         kernelbase = segkpm_base - KERNEL_REDZONE_SIZE;
1441         core_base = (uintptr_t)COREHEAP_BASE;
1442         core_size = (size_t)MISC_VA_BASE - COREHEAP_BASE;
1443 #else   /* __i386 */
1444         /*
1445          * We configure kernelbase based on:
1446          *
1447          * 1. user specified kernelbase via eeprom command. Value cannot exceed
1448          *    KERNELBASE_MAX. we large page align eprom_kernelbase
1449          *
1450          * 2. Default to KERNELBASE and adjust to 2X less the size for page_t.
1451          *    On large memory systems we must lower kernelbase to allow
1452          *    enough room for page_t's for all of memory.


1475 #endif  /* __i386 */
1476 
1477         ekernelheap = (char *)core_base;
1478         PRM_DEBUG(ekernelheap);
1479 
1480         /*
1481          * Now that we know the real value of kernelbase,
1482          * update variables that were initialized with a value of
1483          * KERNELBASE (in common/conf/param.c).
1484          *
1485          * XXX  The problem with this sort of hackery is that the
1486          *      compiler just may feel like putting the const declarations
1487          *      (in param.c) into the .text section.  Perhaps they should
1488          *      just be declared as variables there?
1489          */
1490 
1491         *(uintptr_t *)&_kernelbase = kernelbase;
1492         *(uintptr_t *)&_userlimit = kernelbase;
1493 #if defined(__amd64)
1494         *(uintptr_t *)&_userlimit -= KERNELBASE - USERLIMIT;
1495 #if !defined(__xpv)
1496         kpti_kbase = kernelbase;
1497 #endif
1498 #else
1499         *(uintptr_t *)&_userlimit32 = _userlimit;
1500 #endif
1501         PRM_DEBUG(_kernelbase);
1502         PRM_DEBUG(_userlimit);
1503         PRM_DEBUG(_userlimit32);
1504 
1505         /* We have to re-do this now that we've modified _userlimit. */
1506         mmu_calc_user_slots();
1507 
1508         layout_kernel_va();
1509 
1510 #if defined(__i386)
1511         /*
1512          * If segmap is too large we can push the bottom of the kernel heap
1513          * higher than the base.  Or worse, it could exceed the top of the
1514          * VA space entirely, causing it to wrap around.
1515          */
1516         if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase)
1517                 panic("too little address space available for kernelheap,"
1518                     " use eeprom for lower kernelbase or smaller segmapsize");
1519 #endif  /* __i386 */
1520 
1521         /*
1522          * Initialize the kernel heap. Note 3rd argument must be > 1st.
1523          */
1524         kernelheap_init(kernelheap, ekernelheap,
1525             kernelheap + MMU_PAGESIZE,
1526             (void *)core_base, (void *)(core_base + core_size));
1527 


2126         /*
2127          * allocate the bit map that tracks toxic pages
2128          */
2129         toxic_bit_map_len = btop((ulong_t)(valloc_base - kernelbase));
2130         PRM_DEBUG(toxic_bit_map_len);
2131         toxic_bit_map =
2132             kmem_zalloc(BT_SIZEOFMAP(toxic_bit_map_len), KM_NOSLEEP);
2133         ASSERT(toxic_bit_map != NULL);
2134         PRM_DEBUG(toxic_bit_map);
2135 
2136 #endif  /* __i386 */
2137 
2138 
2139         /*
2140          * Now that we've got more VA, as well as the ability to allocate from
2141          * it, tell the debugger.
2142          */
2143         if (boothowto & RB_DEBUG)
2144                 kdi_dvec_memavail();
2145 


























2146 #if !defined(__xpv)
2147         /*
2148          * Map page pfn=0 for drivers, such as kd, that need to pick up
2149          * parameters left there by controllers/BIOS.
2150          */
2151         PRM_POINT("setup up p0_va");
2152         p0_va = i86devmap(0, 1, PROT_READ);
2153         PRM_DEBUG(p0_va);
2154 #endif
2155 
2156         cmn_err(CE_CONT, "?mem = %luK (0x%lx)\n",
2157             physinstalled << (MMU_PAGESHIFT - 10), ptob(physinstalled));
2158 
2159         /*
2160          * disable automatic large pages for small memory systems or
2161          * when the disable flag is set.
2162          *
2163          * Do not yet consider page sizes larger than 2m/4m.
2164          */
2165         if (!auto_lpg_disable && mmu.max_page_level > 0) {


2188         PRM_POINT("Attaching segkp");
2189         if (segkp_fromheap) {
2190                 segkp->s_as = &kas;
2191         } else if (seg_attach(&kas, (caddr_t)segkp_base, mmu_ptob(segkpsize),
2192             segkp) < 0) {
2193                 panic("startup: cannot attach segkp");
2194                 /*NOTREACHED*/
2195         }
2196         PRM_POINT("Doing segkp_create()");
2197         if (segkp_create(segkp) != 0) {
2198                 panic("startup: segkp_create failed");
2199                 /*NOTREACHED*/
2200         }
2201         PRM_DEBUG(segkp);
2202         rw_exit(&kas.a_lock);
2203 
2204         /*
2205          * kpm segment
2206          */
2207         segmap_kpm = 0;
2208         if (kpm_desired)
2209                 kpm_init();


2210 
2211         /*
2212          * Now create segmap segment.
2213          */
2214         rw_enter(&kas.a_lock, RW_WRITER);
2215         if (seg_attach(&kas, (caddr_t)segmap_start, segmapsize, segmap) < 0) {
2216                 panic("cannot attach segmap");
2217                 /*NOTREACHED*/
2218         }
2219         PRM_DEBUG(segmap);
2220 
2221         a.prot = PROT_READ | PROT_WRITE;
2222         a.shmsize = 0;
2223         a.nfreelist = segmapfreelists;
2224 
2225         if (segmap_create(segmap, (caddr_t)&a) != 0)
2226                 panic("segmap_create segmap");
2227         rw_exit(&kas.a_lock);
2228 
2229         setup_vaddr_for_ppcopy(CPU);


2312          * Set the isa_list string to the defined instruction sets we
2313          * support.
2314          */
2315         setx86isalist();
2316         cpu_intr_alloc(CPU, NINTR_THREADS);
2317         psm_install();
2318 
2319         /*
2320          * We're done with bootops.  We don't unmap the bootstrap yet because
2321          * we're still using bootsvcs.
2322          */
2323         PRM_POINT("NULLing out bootops");
2324         *bootopsp = (struct bootops *)NULL;
2325         bootops = (struct bootops *)NULL;
2326 
2327 #if defined(__xpv)
2328         ec_init_debug_irq();
2329         xs_domu_init();
2330 #endif
2331 
2332 #if !defined(__xpv)
2333         /*
2334          * Intel IOMMU has been setup/initialized in ddi_impl.c
2335          * Start it up now.
2336          */
2337         immu_startup();
2338 
2339         /*
2340          * Now that we're no longer going to drop into real mode for a BIOS call
2341          * via bootops, we can enable PCID (which requires CR0.PG).
2342          */
2343         enable_pcid();
2344 #endif
2345 
2346         PRM_POINT("Enabling interrupts");
2347         (*picinitf)();
2348         sti();
2349 #if defined(__xpv)
2350         ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0);
2351         xen_late_startup();
2352 #endif
2353 
2354         (void) add_avsoftintr((void *)&softlevel1_hdl, 1, softlevel1,
2355             "softlevel1", NULL, NULL); /* XXX to be moved later */
2356 
2357         /*
2358          * Register software interrupt handlers for ddi_periodic_add(9F).
2359          * Software interrupts up to the level 10 are supported.
2360          */
2361         for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
2362                 (void) add_avsoftintr((void *)&softlevel_hdl[i-1], i,
2363                     (avfunc)ddi_periodic_softintr, "ddi_periodic",