1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * Copyright 2020 Joyent, Inc.
  27  */
  28 
  29 
  30 #include <sys/types.h>
  31 #include <sys/machparam.h>
  32 #include <sys/x86_archext.h>
  33 #include <sys/systm.h>
  34 #include <sys/mach_mmu.h>
  35 #include <sys/multiboot.h>
  36 #include <sys/multiboot2.h>
  37 #include <sys/multiboot2_impl.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/framebuffer.h>
  40 #include <sys/sha1.h>
  41 #include <util/string.h>
  42 #include <util/strtolctype.h>
  43 #include <sys/efi.h>
  44 
  45 /*
  46  * Compile time debug knob. We do not have any early mechanism to control it
  47  * as the boot is the earliest mechanism we have, and we do not want to have
  48  * it being switched on by default.
  49  */
  50 int dboot_debug = 0;
  51 
  52 #if defined(__xpv)
  53 
  54 #include <sys/hypervisor.h>
  55 uintptr_t xen_virt_start;
  56 pfn_t *mfn_to_pfn_mapping;
  57 
  58 #else /* !__xpv */
  59 
  60 extern multiboot_header_t mb_header;
  61 extern uint32_t mb2_load_addr;
  62 extern int have_cpuid(void);
  63 
  64 #endif /* !__xpv */
  65 
  66 #include <sys/inttypes.h>
  67 #include <sys/bootinfo.h>
  68 #include <sys/mach_mmu.h>
  69 #include <sys/boot_console.h>
  70 
  71 #include "dboot_asm.h"
  72 #include "dboot_printf.h"
  73 #include "dboot_xboot.h"
  74 #include "dboot_elfload.h"
  75 
  76 #define SHA1_ASCII_LENGTH       (SHA1_DIGEST_LENGTH * 2)
  77 
  78 /*
  79  * This file contains code that runs to transition us from either a multiboot
  80  * compliant loader (32 bit non-paging) or a XPV domain loader to
  81  * regular kernel execution. Its task is to setup the kernel memory image
  82  * and page tables.
  83  *
  84  * The code executes as:
  85  *      - 32 bits under GRUB (for 32 or 64 bit Solaris)
  86  *      - a 32 bit program for the 32-bit PV hypervisor
  87  *      - a 64 bit program for the 64-bit PV hypervisor (at least for now)
  88  *
  89  * Under the PV hypervisor, we must create mappings for any memory beyond the
  90  * initial start of day allocation (such as the kernel itself).
  91  *
  92  * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
  93  * Since we are running in real mode, so all such memory is accessible.
  94  */
  95 
  96 /*
  97  * Standard bits used in PTE (page level) and PTP (internal levels)
  98  */
  99 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
 100 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
 101 
 102 /*
 103  * This is the target addresses (physical) where the kernel text and data
 104  * nucleus pages will be unpacked. On the hypervisor this is actually a
 105  * virtual address.
 106  */
 107 paddr_t ktext_phys;
 108 uint32_t ksize = 2 * FOUR_MEG;  /* kernel nucleus is 8Meg */
 109 
 110 static uint64_t target_kernel_text;     /* value to use for KERNEL_TEXT */
 111 
 112 /*
 113  * The stack is setup in assembler before entering startup_kernel()
 114  */
 115 char stack_space[STACK_SIZE];
 116 
 117 /*
 118  * Used to track physical memory allocation
 119  */
 120 static paddr_t next_avail_addr = 0;
 121 
 122 #if defined(__xpv)
 123 /*
 124  * Additional information needed for hypervisor memory allocation.
 125  * Only memory up to scratch_end is mapped by page tables.
 126  * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
 127  * to derive a pfn from a pointer, you subtract mfn_base.
 128  */
 129 
 130 static paddr_t scratch_end = 0; /* we can't write all of mem here */
 131 static paddr_t mfn_base;                /* addr corresponding to mfn_list[0] */
 132 start_info_t *xen_info;
 133 
 134 #else   /* __xpv */
 135 
 136 /*
 137  * If on the metal, then we have a multiboot loader.
 138  */
 139 uint32_t mb_magic;                      /* magic from boot loader */
 140 uint32_t mb_addr;                       /* multiboot info package from loader */
 141 int multiboot_version;
 142 multiboot_info_t *mb_info;
 143 multiboot2_info_header_t *mb2_info;
 144 multiboot_tag_mmap_t *mb2_mmap_tagp;
 145 int num_entries;                        /* mmap entry count */
 146 boolean_t num_entries_set;              /* is mmap entry count set */
 147 uintptr_t load_addr;
 148 static boot_framebuffer_t framebuffer __aligned(16);
 149 static boot_framebuffer_t *fb;
 150 
 151 /* can not be automatic variables because of alignment */
 152 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID;
 153 static efi_guid_t smbios = SMBIOS_TABLE_GUID;
 154 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID;
 155 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID;
 156 #endif  /* __xpv */
 157 
 158 /*
 159  * This contains information passed to the kernel
 160  */
 161 struct xboot_info boot_info __aligned(16);
 162 struct xboot_info *bi;
 163 
 164 /*
 165  * Page table and memory stuff.
 166  */
 167 static paddr_t max_mem;                 /* maximum memory address */
 168 
 169 /*
 170  * Information about processor MMU
 171  */
 172 int amd64_support = 0;
 173 int largepage_support = 0;
 174 int pae_support = 0;
 175 int pge_support = 0;
 176 int NX_support = 0;
 177 int PAT_support = 0;
 178 
 179 /*
 180  * Low 32 bits of kernel entry address passed back to assembler.
 181  * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
 182  */
 183 uint32_t entry_addr_low;
 184 
 185 /*
 186  * Memlists for the kernel. We shouldn't need a lot of these.
 187  */
 188 #define MAX_MEMLIST (50)
 189 struct boot_memlist memlists[MAX_MEMLIST];
 190 uint_t memlists_used = 0;
 191 struct boot_memlist pcimemlists[MAX_MEMLIST];
 192 uint_t pcimemlists_used = 0;
 193 struct boot_memlist rsvdmemlists[MAX_MEMLIST];
 194 uint_t rsvdmemlists_used = 0;
 195 
 196 /*
 197  * This should match what's in the bootloader.  It's arbitrary, but GRUB
 198  * in particular has limitations on how much space it can use before it
 199  * stops working properly.  This should be enough.
 200  */
 201 struct boot_modules modules[MAX_BOOT_MODULES];
 202 uint_t modules_used = 0;
 203 
 204 #ifdef __xpv
 205 /*
 206  * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
 207  * definition in Xen source.
 208  */
 209 typedef struct {
 210         uint32_t        base_addr_low;
 211         uint32_t        base_addr_high;
 212         uint32_t        length_low;
 213         uint32_t        length_high;
 214         uint32_t        type;
 215 } mmap_t;
 216 
 217 /*
 218  * There is 512KB of scratch area after the boot stack page.
 219  * We'll use that for everything except the kernel nucleus pages which are too
 220  * big to fit there and are allocated last anyway.
 221  */
 222 #define MAXMAPS 100
 223 static mmap_t map_buffer[MAXMAPS];
 224 #else
 225 typedef mb_memory_map_t mmap_t;
 226 #endif
 227 
 228 /*
 229  * Debugging macros
 230  */
 231 uint_t prom_debug = 0;
 232 uint_t map_debug = 0;
 233 
 234 static char noname[2] = "-";
 235 
 236 /*
 237  * Either hypervisor-specific or grub-specific code builds the initial
 238  * memlists. This code does the sort/merge/link for final use.
 239  */
 240 static void
 241 sort_physinstall(void)
 242 {
 243         int i;
 244 #if !defined(__xpv)
 245         int j;
 246         struct boot_memlist tmp;
 247 
 248         /*
 249          * Now sort the memlists, in case they weren't in order.
 250          * Yeah, this is a bubble sort; small, simple and easy to get right.
 251          */
 252         DBG_MSG("Sorting phys-installed list\n");
 253         for (j = memlists_used - 1; j > 0; --j) {
 254                 for (i = 0; i < j; ++i) {
 255                         if (memlists[i].addr < memlists[i + 1].addr)
 256                                 continue;
 257                         tmp = memlists[i];
 258                         memlists[i] = memlists[i + 1];
 259                         memlists[i + 1] = tmp;
 260                 }
 261         }
 262 
 263         /*
 264          * Merge any memlists that don't have holes between them.
 265          */
 266         for (i = 0; i <= memlists_used - 1; ++i) {
 267                 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
 268                         continue;
 269 
 270                 if (prom_debug)
 271                         dboot_printf(
 272                             "merging mem segs %" PRIx64 "...%" PRIx64
 273                             " w/ %" PRIx64 "...%" PRIx64 "\n",
 274                             memlists[i].addr,
 275                             memlists[i].addr + memlists[i].size,
 276                             memlists[i + 1].addr,
 277                             memlists[i + 1].addr + memlists[i + 1].size);
 278 
 279                 memlists[i].size += memlists[i + 1].size;
 280                 for (j = i + 1; j < memlists_used - 1; ++j)
 281                         memlists[j] = memlists[j + 1];
 282                 --memlists_used;
 283                 DBG(memlists_used);
 284                 --i;    /* after merging we need to reexamine, so do this */
 285         }
 286 #endif  /* __xpv */
 287 
 288         if (prom_debug) {
 289                 dboot_printf("\nFinal memlists:\n");
 290                 for (i = 0; i < memlists_used; ++i) {
 291                         dboot_printf("\t%d: addr=%" PRIx64 " size=%"
 292                             PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
 293                 }
 294         }
 295 
 296         /*
 297          * link together the memlists with native size pointers
 298          */
 299         memlists[0].next = 0;
 300         memlists[0].prev = 0;
 301         for (i = 1; i < memlists_used; ++i) {
 302                 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
 303                 memlists[i].next = 0;
 304                 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
 305         }
 306         bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
 307         DBG(bi->bi_phys_install);
 308 }
 309 
 310 /*
 311  * build bios reserved memlists
 312  */
 313 static void
 314 build_rsvdmemlists(void)
 315 {
 316         int i;
 317 
 318         rsvdmemlists[0].next = 0;
 319         rsvdmemlists[0].prev = 0;
 320         for (i = 1; i < rsvdmemlists_used; ++i) {
 321                 rsvdmemlists[i].prev =
 322                     (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
 323                 rsvdmemlists[i].next = 0;
 324                 rsvdmemlists[i - 1].next =
 325                     (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
 326         }
 327         bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
 328         DBG(bi->bi_rsvdmem);
 329 }
 330 
 331 #if defined(__xpv)
 332 
 333 /*
 334  * halt on the hypervisor after a delay to drain console output
 335  */
 336 void
 337 dboot_halt(void)
 338 {
 339         uint_t i = 10000;
 340 
 341         while (--i)
 342                 (void) HYPERVISOR_yield();
 343         (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
 344 }
 345 
 346 /*
 347  * From a machine address, find the corresponding pseudo-physical address.
 348  * Pseudo-physical address are contiguous and run from mfn_base in each VM.
 349  * Machine addresses are the real underlying hardware addresses.
 350  * These are needed for page table entries. Note that this routine is
 351  * poorly protected. A bad value of "ma" will cause a page fault.
 352  */
 353 paddr_t
 354 ma_to_pa(maddr_t ma)
 355 {
 356         ulong_t pgoff = ma & MMU_PAGEOFFSET;
 357         ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
 358         paddr_t pa;
 359 
 360         if (pfn >= xen_info->nr_pages)
 361                 return (-(paddr_t)1);
 362         pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
 363 #ifdef DEBUG
 364         if (ma != pa_to_ma(pa))
 365                 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
 366                     "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
 367 #endif
 368         return (pa);
 369 }
 370 
 371 /*
 372  * From a pseudo-physical address, find the corresponding machine address.
 373  */
 374 maddr_t
 375 pa_to_ma(paddr_t pa)
 376 {
 377         pfn_t pfn;
 378         ulong_t mfn;
 379 
 380         pfn = mmu_btop(pa - mfn_base);
 381         if (pa < mfn_base || pfn >= xen_info->nr_pages)
 382                 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
 383         mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
 384 #ifdef DEBUG
 385         if (mfn_to_pfn_mapping[mfn] != pfn)
 386                 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
 387                     pfn, mfn, mfn_to_pfn_mapping[mfn]);
 388 #endif
 389         return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
 390 }
 391 
 392 #endif  /* __xpv */
 393 
 394 x86pte_t
 395 get_pteval(paddr_t table, uint_t index)
 396 {
 397         if (pae_support)
 398                 return (((x86pte_t *)(uintptr_t)table)[index]);
 399         return (((x86pte32_t *)(uintptr_t)table)[index]);
 400 }
 401 
 402 /*ARGSUSED*/
 403 void
 404 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
 405 {
 406 #ifdef __xpv
 407         mmu_update_t t;
 408         maddr_t mtable = pa_to_ma(table);
 409         int retcnt;
 410 
 411         t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
 412         t.val = pteval;
 413         if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
 414                 dboot_panic("HYPERVISOR_mmu_update() failed");
 415 #else /* __xpv */
 416         uintptr_t tab_addr = (uintptr_t)table;
 417 
 418         if (pae_support)
 419                 ((x86pte_t *)tab_addr)[index] = pteval;
 420         else
 421                 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
 422         if (level == top_level && level == 2)
 423                 reload_cr3();
 424 #endif /* __xpv */
 425 }
 426 
 427 paddr_t
 428 make_ptable(x86pte_t *pteval, uint_t level)
 429 {
 430         paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
 431 
 432         if (level == top_level && level == 2)
 433                 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
 434         else
 435                 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
 436 
 437 #ifdef __xpv
 438         /* Remove write permission to the new page table. */
 439         if (HYPERVISOR_update_va_mapping(new_table,
 440             *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
 441                 dboot_panic("HYP_update_va_mapping error");
 442 #endif
 443 
 444         if (map_debug)
 445                 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
 446                     PRIx64 "\n", level, (ulong_t)new_table, *pteval);
 447         return (new_table);
 448 }
 449 
 450 x86pte_t *
 451 map_pte(paddr_t table, uint_t index)
 452 {
 453         return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
 454 }
 455 
 456 /*
 457  * dump out the contents of page tables...
 458  */
 459 static void
 460 dump_tables(void)
 461 {
 462         uint_t save_index[4];   /* for recursion */
 463         char *save_table[4];    /* for recursion */
 464         uint_t  l;
 465         uint64_t va;
 466         uint64_t pgsize;
 467         int index;
 468         int i;
 469         x86pte_t pteval;
 470         char *table;
 471         static char *tablist = "\t\t\t";
 472         char *tabs = tablist + 3 - top_level;
 473         uint_t pa, pa1;
 474 #if !defined(__xpv)
 475 #define maddr_t paddr_t
 476 #endif /* !__xpv */
 477 
 478         dboot_printf("Finished pagetables:\n");
 479         table = (char *)(uintptr_t)top_page_table;
 480         l = top_level;
 481         va = 0;
 482         for (index = 0; index < ptes_per_table; ++index) {
 483                 pgsize = 1ull << shift_amt[l];
 484                 if (pae_support)
 485                         pteval = ((x86pte_t *)table)[index];
 486                 else
 487                         pteval = ((x86pte32_t *)table)[index];
 488                 if (pteval == 0)
 489                         goto next_entry;
 490 
 491                 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
 492                     tabs + l, (void *)table, index, (uint64_t)pteval, va);
 493                 pa = ma_to_pa(pteval & MMU_PAGEMASK);
 494                 dboot_printf(" physaddr=%x\n", pa);
 495 
 496                 /*
 497                  * Don't try to walk hypervisor private pagetables
 498                  */
 499                 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
 500                         save_table[l] = table;
 501                         save_index[l] = index;
 502                         --l;
 503                         index = -1;
 504                         table = (char *)(uintptr_t)
 505                             ma_to_pa(pteval & MMU_PAGEMASK);
 506                         goto recursion;
 507                 }
 508 
 509                 /*
 510                  * shorten dump for consecutive mappings
 511                  */
 512                 for (i = 1; index + i < ptes_per_table; ++i) {
 513                         if (pae_support)
 514                                 pteval = ((x86pte_t *)table)[index + i];
 515                         else
 516                                 pteval = ((x86pte32_t *)table)[index + i];
 517                         if (pteval == 0)
 518                                 break;
 519                         pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
 520                         if (pa1 != pa + i * pgsize)
 521                                 break;
 522                 }
 523                 if (i > 2) {
 524                         dboot_printf("%s...\n", tabs + l);
 525                         va += pgsize * (i - 2);
 526                         index += i - 2;
 527                 }
 528 next_entry:
 529                 va += pgsize;
 530                 if (l == 3 && index == 256)     /* VA hole */
 531                         va = 0xffff800000000000ull;
 532 recursion:
 533                 ;
 534         }
 535         if (l < top_level) {
 536                 ++l;
 537                 index = save_index[l];
 538                 table = save_table[l];
 539                 goto recursion;
 540         }
 541 }
 542 
 543 /*
 544  * Add a mapping for the machine page at the given virtual address.
 545  */
 546 static void
 547 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
 548 {
 549         x86pte_t *ptep;
 550         x86pte_t pteval;
 551 
 552         pteval = ma | pte_bits;
 553         if (level > 0)
 554                 pteval |= PT_PAGESIZE;
 555         if (va >= target_kernel_text && pge_support)
 556                 pteval |= PT_GLOBAL;
 557 
 558         if (map_debug && ma != va)
 559                 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
 560                     " pte=0x%" PRIx64 " l=%d\n",
 561                     (uint64_t)ma, (uint64_t)va, pteval, level);
 562 
 563 #if defined(__xpv)
 564         /*
 565          * see if we can avoid find_pte() on the hypervisor
 566          */
 567         if (HYPERVISOR_update_va_mapping(va, pteval,
 568             UVMF_INVLPG | UVMF_LOCAL) == 0)
 569                 return;
 570 #endif
 571 
 572         /*
 573          * Find the pte that will map this address. This creates any
 574          * missing intermediate level page tables
 575          */
 576         ptep = find_pte(va, NULL, level, 0);
 577 
 578         /*
 579          * When paravirtualized, we must use hypervisor calls to modify the
 580          * PTE, since paging is active. On real hardware we just write to
 581          * the pagetables which aren't in use yet.
 582          */
 583 #if defined(__xpv)
 584         ptep = ptep;    /* shut lint up */
 585         if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
 586                 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
 587                     " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
 588                     (uint64_t)va, level, (uint64_t)ma, pteval);
 589 #else
 590         if (va < 1024 * 1024)
 591                 pteval |= PT_NOCACHE;           /* for video RAM */
 592         if (pae_support)
 593                 *ptep = pteval;
 594         else
 595                 *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
 596 #endif
 597 }
 598 
 599 /*
 600  * Add a mapping for the physical page at the given virtual address.
 601  */
 602 static void
 603 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
 604 {
 605         map_ma_at_va(pa_to_ma(pa), va, level);
 606 }
 607 
 608 /*
 609  * This is called to remove start..end from the
 610  * possible range of PCI addresses.
 611  */
 612 const uint64_t pci_lo_limit = 0x00100000ul;
 613 const uint64_t pci_hi_limit = 0xfff00000ul;
 614 static void
 615 exclude_from_pci(uint64_t start, uint64_t end)
 616 {
 617         int i;
 618         int j;
 619         struct boot_memlist *ml;
 620 
 621         for (i = 0; i < pcimemlists_used; ++i) {
 622                 ml = &pcimemlists[i];
 623 
 624                 /* delete the entire range? */
 625                 if (start <= ml->addr && ml->addr + ml->size <= end) {
 626                         --pcimemlists_used;
 627                         for (j = i; j < pcimemlists_used; ++j)
 628                                 pcimemlists[j] = pcimemlists[j + 1];
 629                         --i;    /* to revisit the new one at this index */
 630                 }
 631 
 632                 /* split a range? */
 633                 else if (ml->addr < start && end < ml->addr + ml->size) {
 634 
 635                         ++pcimemlists_used;
 636                         if (pcimemlists_used > MAX_MEMLIST)
 637                                 dboot_panic("too many pcimemlists");
 638 
 639                         for (j = pcimemlists_used - 1; j > i; --j)
 640                                 pcimemlists[j] = pcimemlists[j - 1];
 641                         ml->size = start - ml->addr;
 642 
 643                         ++ml;
 644                         ml->size = (ml->addr + ml->size) - end;
 645                         ml->addr = end;
 646                         ++i;    /* skip on to next one */
 647                 }
 648 
 649                 /* cut memory off the start? */
 650                 else if (ml->addr < end && end < ml->addr + ml->size) {
 651                         ml->size -= end - ml->addr;
 652                         ml->addr = end;
 653                 }
 654 
 655                 /* cut memory off the end? */
 656                 else if (ml->addr <= start && start < ml->addr + ml->size) {
 657                         ml->size = start - ml->addr;
 658                 }
 659         }
 660 }
 661 
 662 /*
 663  * During memory allocation, find the highest address not used yet.
 664  */
 665 static void
 666 check_higher(paddr_t a)
 667 {
 668         if (a < next_avail_addr)
 669                 return;
 670         next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
 671         DBG(next_avail_addr);
 672 }
 673 
 674 static int
 675 dboot_loader_mmap_entries(void)
 676 {
 677 #if !defined(__xpv)
 678         if (num_entries_set == B_TRUE)
 679                 return (num_entries);
 680 
 681         switch (multiboot_version) {
 682         case 1:
 683                 DBG(mb_info->flags);
 684                 if (mb_info->flags & 0x40) {
 685                         mb_memory_map_t *mmap;
 686                         caddr32_t mmap_addr;
 687 
 688                         DBG(mb_info->mmap_addr);
 689                         DBG(mb_info->mmap_length);
 690                         check_higher(mb_info->mmap_addr + mb_info->mmap_length);
 691 
 692                         for (mmap_addr = mb_info->mmap_addr;
 693                             mmap_addr < mb_info->mmap_addr +
 694                             mb_info->mmap_length;
 695                             mmap_addr += mmap->size + sizeof (mmap->size)) {
 696                                 mmap = (mb_memory_map_t *)(uintptr_t)mmap_addr;
 697                                 ++num_entries;
 698                         }
 699 
 700                         num_entries_set = B_TRUE;
 701                 }
 702                 break;
 703         case 2:
 704                 num_entries_set = B_TRUE;
 705                 num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
 706                     mb2_mmap_tagp);
 707                 break;
 708         default:
 709                 dboot_panic("Unknown multiboot version: %d\n",
 710                     multiboot_version);
 711                 break;
 712         }
 713         return (num_entries);
 714 #else
 715         return (MAXMAPS);
 716 #endif
 717 }
 718 
 719 static uint32_t
 720 dboot_loader_mmap_get_type(int index)
 721 {
 722 #if !defined(__xpv)
 723         mb_memory_map_t *mp, *mpend;
 724         caddr32_t mmap_addr;
 725         int i;
 726 
 727         switch (multiboot_version) {
 728         case 1:
 729                 mp = (mb_memory_map_t *)(uintptr_t)mb_info->mmap_addr;
 730                 mpend = (mb_memory_map_t *)(uintptr_t)
 731                     (mb_info->mmap_addr + mb_info->mmap_length);
 732 
 733                 for (i = 0; mp < mpend && i != index; i++)
 734                         mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
 735                             sizeof (mp->size));
 736                 if (mp >= mpend) {
 737                         dboot_panic("dboot_loader_mmap_get_type(): index "
 738                             "out of bounds: %d\n", index);
 739                 }
 740                 return (mp->type);
 741 
 742         case 2:
 743                 return (dboot_multiboot2_mmap_get_type(mb2_info,
 744                     mb2_mmap_tagp, index));
 745 
 746         default:
 747                 dboot_panic("Unknown multiboot version: %d\n",
 748                     multiboot_version);
 749                 break;
 750         }
 751         return (0);
 752 #else
 753         return (map_buffer[index].type);
 754 #endif
 755 }
 756 
 757 static uint64_t
 758 dboot_loader_mmap_get_base(int index)
 759 {
 760 #if !defined(__xpv)
 761         mb_memory_map_t *mp, *mpend;
 762         int i;
 763 
 764         switch (multiboot_version) {
 765         case 1:
 766                 mp = (mb_memory_map_t *)mb_info->mmap_addr;
 767                 mpend = (mb_memory_map_t *)
 768                     (mb_info->mmap_addr + mb_info->mmap_length);
 769 
 770                 for (i = 0; mp < mpend && i != index; i++)
 771                         mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
 772                             sizeof (mp->size));
 773                 if (mp >= mpend) {
 774                         dboot_panic("dboot_loader_mmap_get_base(): index "
 775                             "out of bounds: %d\n", index);
 776                 }
 777                 return (((uint64_t)mp->base_addr_high << 32) +
 778                     (uint64_t)mp->base_addr_low);
 779 
 780         case 2:
 781                 return (dboot_multiboot2_mmap_get_base(mb2_info,
 782                     mb2_mmap_tagp, index));
 783 
 784         default:
 785                 dboot_panic("Unknown multiboot version: %d\n",
 786                     multiboot_version);
 787                 break;
 788         }
 789         return (0);
 790 #else
 791         return (((uint64_t)map_buffer[index].base_addr_high << 32) +
 792             (uint64_t)map_buffer[index].base_addr_low);
 793 #endif
 794 }
 795 
 796 static uint64_t
 797 dboot_loader_mmap_get_length(int index)
 798 {
 799 #if !defined(__xpv)
 800         mb_memory_map_t *mp, *mpend;
 801         int i;
 802 
 803         switch (multiboot_version) {
 804         case 1:
 805                 mp = (mb_memory_map_t *)mb_info->mmap_addr;
 806                 mpend = (mb_memory_map_t *)
 807                     (mb_info->mmap_addr + mb_info->mmap_length);
 808 
 809                 for (i = 0; mp < mpend && i != index; i++)
 810                         mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
 811                             sizeof (mp->size));
 812                 if (mp >= mpend) {
 813                         dboot_panic("dboot_loader_mmap_get_length(): index "
 814                             "out of bounds: %d\n", index);
 815                 }
 816                 return (((uint64_t)mp->length_high << 32) +
 817                     (uint64_t)mp->length_low);
 818 
 819         case 2:
 820                 return (dboot_multiboot2_mmap_get_length(mb2_info,
 821                     mb2_mmap_tagp, index));
 822 
 823         default:
 824                 dboot_panic("Unknown multiboot version: %d\n",
 825                     multiboot_version);
 826                 break;
 827         }
 828         return (0);
 829 #else
 830         return (((uint64_t)map_buffer[index].length_high << 32) +
 831             (uint64_t)map_buffer[index].length_low);
 832 #endif
 833 }
 834 
 835 static void
 836 build_pcimemlists(void)
 837 {
 838         uint64_t page_offset = MMU_PAGEOFFSET;  /* needs to be 64 bits */
 839         uint64_t start;
 840         uint64_t end;
 841         int i, num;
 842 
 843         /*
 844          * initialize
 845          */
 846         pcimemlists[0].addr = pci_lo_limit;
 847         pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
 848         pcimemlists_used = 1;
 849 
 850         num = dboot_loader_mmap_entries();
 851         /*
 852          * Fill in PCI memlists.
 853          */
 854         for (i = 0; i < num; ++i) {
 855                 start = dboot_loader_mmap_get_base(i);
 856                 end = start + dboot_loader_mmap_get_length(i);
 857 
 858                 if (prom_debug)
 859                         dboot_printf("\ttype: %d %" PRIx64 "..%"
 860                             PRIx64 "\n", dboot_loader_mmap_get_type(i),
 861                             start, end);
 862 
 863                 /*
 864                  * page align start and end
 865                  */
 866                 start = (start + page_offset) & ~page_offset;
 867                 end &= ~page_offset;
 868                 if (end <= start)
 869                         continue;
 870 
 871                 exclude_from_pci(start, end);
 872         }
 873 
 874         /*
 875          * Finish off the pcimemlist
 876          */
 877         if (prom_debug) {
 878                 for (i = 0; i < pcimemlists_used; ++i) {
 879                         dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
 880                             PRIx64 "\n", pcimemlists[i].addr,
 881                             pcimemlists[i].addr + pcimemlists[i].size);
 882                 }
 883         }
 884         pcimemlists[0].next = 0;
 885         pcimemlists[0].prev = 0;
 886         for (i = 1; i < pcimemlists_used; ++i) {
 887                 pcimemlists[i].prev =
 888                     (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
 889                 pcimemlists[i].next = 0;
 890                 pcimemlists[i - 1].next =
 891                     (native_ptr_t)(uintptr_t)(pcimemlists + i);
 892         }
 893         bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
 894         DBG(bi->bi_pcimem);
 895 }
 896 
 897 #if defined(__xpv)
 898 /*
 899  * Initialize memory allocator stuff from hypervisor-supplied start info.
 900  */
 901 static void
 902 init_mem_alloc(void)
 903 {
 904         int     local;  /* variables needed to find start region */
 905         paddr_t scratch_start;
 906         xen_memory_map_t map;
 907 
 908         DBG_MSG("Entered init_mem_alloc()\n");
 909 
 910         /*
 911          * Free memory follows the stack. There's at least 512KB of scratch
 912          * space, rounded up to at least 2Mb alignment.  That should be enough
 913          * for the page tables we'll need to build.  The nucleus memory is
 914          * allocated last and will be outside the addressible range.  We'll
 915          * switch to new page tables before we unpack the kernel
 916          */
 917         scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
 918         DBG(scratch_start);
 919         scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
 920         DBG(scratch_end);
 921 
 922         /*
 923          * For paranoia, leave some space between hypervisor data and ours.
 924          * Use 500 instead of 512.
 925          */
 926         next_avail_addr = scratch_end - 500 * 1024;
 927         DBG(next_avail_addr);
 928 
 929         /*
 930          * The domain builder gives us at most 1 module
 931          */
 932         DBG(xen_info->mod_len);
 933         if (xen_info->mod_len > 0) {
 934                 DBG(xen_info->mod_start);
 935                 modules[0].bm_addr =
 936                     (native_ptr_t)(uintptr_t)xen_info->mod_start;
 937                 modules[0].bm_size = xen_info->mod_len;
 938                 bi->bi_module_cnt = 1;
 939                 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
 940         } else {
 941                 bi->bi_module_cnt = 0;
 942                 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL;
 943         }
 944         DBG(bi->bi_module_cnt);
 945         DBG(bi->bi_modules);
 946 
 947         DBG(xen_info->mfn_list);
 948         DBG(xen_info->nr_pages);
 949         max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
 950         DBG(max_mem);
 951 
 952         /*
 953          * Using pseudo-physical addresses, so only 1 memlist element
 954          */
 955         memlists[0].addr = 0;
 956         DBG(memlists[0].addr);
 957         memlists[0].size = max_mem;
 958         DBG(memlists[0].size);
 959         memlists_used = 1;
 960         DBG(memlists_used);
 961 
 962         /*
 963          * finish building physinstall list
 964          */
 965         sort_physinstall();
 966 
 967         /*
 968          * build bios reserved memlists
 969          */
 970         build_rsvdmemlists();
 971 
 972         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
 973                 /*
 974                  * build PCI Memory list
 975                  */
 976                 map.nr_entries = MAXMAPS;
 977                 /*LINTED: constant in conditional context*/
 978                 set_xen_guest_handle(map.buffer, map_buffer);
 979                 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
 980                         dboot_panic("getting XENMEM_machine_memory_map failed");
 981                 build_pcimemlists();
 982         }
 983 }
 984 
 985 #else   /* !__xpv */
 986 
 987 static void
 988 dboot_multiboot1_xboot_consinfo(void)
 989 {
 990         fb->framebuffer = 0;
 991 }
 992 
 993 static void
 994 dboot_multiboot2_xboot_consinfo(void)
 995 {
 996         multiboot_tag_framebuffer_t *fbtag;
 997         fbtag = dboot_multiboot2_find_tag(mb2_info,
 998             MULTIBOOT_TAG_TYPE_FRAMEBUFFER);
 999         fb->framebuffer = (uint64_t)(uintptr_t)fbtag;
1000 }
1001 
1002 static int
1003 dboot_multiboot_modcount(void)
1004 {
1005         switch (multiboot_version) {
1006         case 1:
1007                 return (mb_info->mods_count);
1008 
1009         case 2:
1010                 return (dboot_multiboot2_modcount(mb2_info));
1011 
1012         default:
1013                 dboot_panic("Unknown multiboot version: %d\n",
1014                     multiboot_version);
1015                 break;
1016         }
1017         return (0);
1018 }
1019 
1020 static uint32_t
1021 dboot_multiboot_modstart(int index)
1022 {
1023         switch (multiboot_version) {
1024         case 1:
1025                 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1026 
1027         case 2:
1028                 return (dboot_multiboot2_modstart(mb2_info, index));
1029 
1030         default:
1031                 dboot_panic("Unknown multiboot version: %d\n",
1032                     multiboot_version);
1033                 break;
1034         }
1035         return (0);
1036 }
1037 
1038 static uint32_t
1039 dboot_multiboot_modend(int index)
1040 {
1041         switch (multiboot_version) {
1042         case 1:
1043                 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1044 
1045         case 2:
1046                 return (dboot_multiboot2_modend(mb2_info, index));
1047 
1048         default:
1049                 dboot_panic("Unknown multiboot version: %d\n",
1050                     multiboot_version);
1051                 break;
1052         }
1053         return (0);
1054 }
1055 
1056 static char *
1057 dboot_multiboot_modcmdline(int index)
1058 {
1059         switch (multiboot_version) {
1060         case 1:
1061                 return ((char *)((mb_module_t *)
1062                     mb_info->mods_addr)[index].mod_name);
1063 
1064         case 2:
1065                 return (dboot_multiboot2_modcmdline(mb2_info, index));
1066 
1067         default:
1068                 dboot_panic("Unknown multiboot version: %d\n",
1069                     multiboot_version);
1070                 break;
1071         }
1072         return (0);
1073 }
1074 
1075 /*
1076  * Find the modules used by console setup.
1077  * Since we need the console to print early boot messages, the console is set up
1078  * before anything else and therefore we need to pick up the needed modules.
1079  *
1080  * Note, we just will search for and if found, will pass the modules
1081  * to console setup, the proper module list processing will happen later.
1082  * Currently used modules are boot environment and console font.
1083  */
1084 static void
1085 dboot_find_console_modules(void)
1086 {
1087         int i, modcount;
1088         uint32_t mod_start, mod_end;
1089         char *cmdline;
1090 
1091         modcount = dboot_multiboot_modcount();
1092         bi->bi_module_cnt = 0;
1093         for (i = 0; i < modcount; ++i) {
1094                 cmdline = dboot_multiboot_modcmdline(i);
1095                 if (cmdline == NULL)
1096                         continue;
1097 
1098                 if (strstr(cmdline, "type=console-font") != NULL)
1099                         modules[bi->bi_module_cnt].bm_type = BMT_FONT;
1100                 else if (strstr(cmdline, "type=environment") != NULL)
1101                         modules[bi->bi_module_cnt].bm_type = BMT_ENV;
1102                 else
1103                         continue;
1104 
1105                 mod_start = dboot_multiboot_modstart(i);
1106                 mod_end = dboot_multiboot_modend(i);
1107                 modules[bi->bi_module_cnt].bm_addr =
1108                     (native_ptr_t)(uintptr_t)mod_start;
1109                 modules[bi->bi_module_cnt].bm_size = mod_end - mod_start;
1110                 modules[bi->bi_module_cnt].bm_name =
1111                     (native_ptr_t)(uintptr_t)NULL;
1112                 modules[bi->bi_module_cnt].bm_hash =
1113                     (native_ptr_t)(uintptr_t)NULL;
1114                 bi->bi_module_cnt++;
1115         }
1116         if (bi->bi_module_cnt != 0)
1117                 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1118 }
1119 
1120 static boolean_t
1121 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1122 {
1123         boolean_t rv = B_FALSE;
1124 
1125         switch (multiboot_version) {
1126         case 1:
1127                 if (mb_info->flags & 0x01) {
1128                         *lower = mb_info->mem_lower;
1129                         *upper = mb_info->mem_upper;
1130                         rv = B_TRUE;
1131                 }
1132                 break;
1133 
1134         case 2:
1135                 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1136 
1137         default:
1138                 dboot_panic("Unknown multiboot version: %d\n",
1139                     multiboot_version);
1140                 break;
1141         }
1142         return (rv);
1143 }
1144 
1145 static uint8_t
1146 dboot_a2h(char v)
1147 {
1148         if (v >= 'a')
1149                 return (v - 'a' + 0xa);
1150         else if (v >= 'A')
1151                 return (v - 'A' + 0xa);
1152         else if (v >= '0')
1153                 return (v - '0');
1154         else
1155                 dboot_panic("bad ASCII hex character %c\n", v);
1156 
1157         return (0);
1158 }
1159 
1160 static void
1161 digest_a2h(const char *ascii, uint8_t *digest)
1162 {
1163         unsigned int i;
1164 
1165         for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1166                 digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1167                 digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1168         }
1169 }
1170 
1171 /*
1172  * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1173  * the ASCII-format hash found in the 40-byte buffer at ascii.  If they
1174  * match, return 0, otherwise -1.  This works only for images smaller than
1175  * 4 GB, which should not be a problem.
1176  */
1177 static int
1178 check_image_hash(uint_t midx)
1179 {
1180         const char *ascii;
1181         const void *image;
1182         size_t len;
1183         SHA1_CTX ctx;
1184         uint8_t digest[SHA1_DIGEST_LENGTH];
1185         uint8_t baseline[SHA1_DIGEST_LENGTH];
1186         unsigned int i;
1187 
1188         ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
1189         image = (const void *)(uintptr_t)modules[midx].bm_addr;
1190         len = (size_t)modules[midx].bm_size;
1191 
1192         digest_a2h(ascii, baseline);
1193 
1194         SHA1Init(&ctx);
1195         SHA1Update(&ctx, image, len);
1196         SHA1Final(digest, &ctx);
1197 
1198         for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1199                 if (digest[i] != baseline[i])
1200                         return (-1);
1201         }
1202 
1203         return (0);
1204 }
1205 
1206 static const char *
1207 type_to_str(boot_module_type_t type)
1208 {
1209         switch (type) {
1210         case BMT_ROOTFS:
1211                 return ("rootfs");
1212         case BMT_FILE:
1213                 return ("file");
1214         case BMT_HASH:
1215                 return ("hash");
1216         case BMT_ENV:
1217                 return ("environment");
1218         case BMT_FONT:
1219                 return ("console-font");
1220         default:
1221                 return ("unknown");
1222         }
1223 }
1224 
1225 static void
1226 check_images(void)
1227 {
1228         uint_t i;
1229         char displayhash[SHA1_ASCII_LENGTH + 1];
1230 
1231         for (i = 0; i < modules_used; i++) {
1232                 if (prom_debug) {
1233                         dboot_printf("module #%d: name %s type %s "
1234                             "addr %lx size %lx\n",
1235                             i, (char *)(uintptr_t)modules[i].bm_name,
1236                             type_to_str(modules[i].bm_type),
1237                             (ulong_t)modules[i].bm_addr,
1238                             (ulong_t)modules[i].bm_size);
1239                 }
1240 
1241                 if (modules[i].bm_type == BMT_HASH ||
1242                     modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) {
1243                         DBG_MSG("module has no hash; skipping check\n");
1244                         continue;
1245                 }
1246                 (void) memcpy(displayhash,
1247                     (void *)(uintptr_t)modules[i].bm_hash,
1248                     SHA1_ASCII_LENGTH);
1249                 displayhash[SHA1_ASCII_LENGTH] = '\0';
1250                 if (prom_debug) {
1251                         dboot_printf("checking expected hash [%s]: ",
1252                             displayhash);
1253                 }
1254 
1255                 if (check_image_hash(i) != 0)
1256                         dboot_panic("hash mismatch!\n");
1257                 else
1258                         DBG_MSG("OK\n");
1259         }
1260 }
1261 
1262 /*
1263  * Determine the module's starting address, size, name, and type, and fill the
1264  * boot_modules structure.  This structure is used by the bop code, except for
1265  * hashes which are checked prior to transferring control to the kernel.
1266  */
1267 static void
1268 process_module(int midx)
1269 {
1270         uint32_t mod_start = dboot_multiboot_modstart(midx);
1271         uint32_t mod_end = dboot_multiboot_modend(midx);
1272         char *cmdline = dboot_multiboot_modcmdline(midx);
1273         char *p, *q;
1274 
1275         check_higher(mod_end);
1276         if (prom_debug) {
1277                 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1278                     midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
1279         }
1280 
1281         if (mod_start > mod_end) {
1282                 dboot_panic("module #%d: module start address 0x%lx greater "
1283                     "than end address 0x%lx", midx,
1284                     (ulong_t)mod_start, (ulong_t)mod_end);
1285         }
1286 
1287         /*
1288          * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1289          * the address of the last valid byte in a module plus 1 as mod_end.
1290          * This is of course a bug; the multiboot specification simply states
1291          * that mod_start and mod_end "contain the start and end addresses of
1292          * the boot module itself" which is pretty obviously not what GRUB is
1293          * doing.  However, fixing it requires that not only this code be
1294          * changed but also that other code consuming this value and values
1295          * derived from it be fixed, and that the kernel and GRUB must either
1296          * both have the bug or neither.  While there are a lot of combinations
1297          * that will work, there are also some that won't, so for simplicity
1298          * we'll just cope with the bug.  That means we won't actually hash the
1299          * byte at mod_end, and we will expect that mod_end for the hash file
1300          * itself is one greater than some multiple of 41 (40 bytes of ASCII
1301          * hash plus a newline for each module).  We set bm_size to the true
1302          * correct number of bytes in each module, achieving exactly this.
1303          */
1304 
1305         modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start;
1306         modules[midx].bm_size = mod_end - mod_start;
1307         modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
1308         modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL;
1309         modules[midx].bm_type = BMT_FILE;
1310 
1311         if (cmdline == NULL) {
1312                 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1313                 return;
1314         }
1315 
1316         p = cmdline;
1317         modules[midx].bm_name =
1318             (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
1319 
1320         while (p != NULL) {
1321                 q = strsep(&p, " \t\f\n\r");
1322                 if (strncmp(q, "name=", 5) == 0) {
1323                         if (q[5] != '\0' && !isspace(q[5])) {
1324                                 modules[midx].bm_name =
1325                                     (native_ptr_t)(uintptr_t)(q + 5);
1326                         }
1327                         continue;
1328                 }
1329 
1330                 if (strncmp(q, "type=", 5) == 0) {
1331                         if (q[5] == '\0' || isspace(q[5]))
1332                                 continue;
1333                         q += 5;
1334                         if (strcmp(q, "rootfs") == 0) {
1335                                 modules[midx].bm_type = BMT_ROOTFS;
1336                         } else if (strcmp(q, "hash") == 0) {
1337                                 modules[midx].bm_type = BMT_HASH;
1338                         } else if (strcmp(q, "environment") == 0) {
1339                                 modules[midx].bm_type = BMT_ENV;
1340                         } else if (strcmp(q, "console-font") == 0) {
1341                                 modules[midx].bm_type = BMT_FONT;
1342                         } else if (strcmp(q, "file") != 0) {
1343                                 dboot_printf("\tmodule #%d: unknown module "
1344                                     "type '%s'; defaulting to 'file'\n",
1345                                     midx, q);
1346                         }
1347                         continue;
1348                 }
1349 
1350                 if (strncmp(q, "hash=", 5) == 0) {
1351                         if (q[5] != '\0' && !isspace(q[5])) {
1352                                 modules[midx].bm_hash =
1353                                     (native_ptr_t)(uintptr_t)(q + 5);
1354                         }
1355                         continue;
1356                 }
1357 
1358                 dboot_printf("ignoring unknown option '%s'\n", q);
1359         }
1360 }
1361 
1362 /*
1363  * Backward compatibility: if there are exactly one or two modules, both
1364  * of type 'file' and neither with an embedded hash value, we have been
1365  * given the legacy style modules.  In this case we need to treat the first
1366  * module as a rootfs and the second as a hash referencing that module.
1367  * Otherwise, even if the configuration is invalid, we assume that the
1368  * operator knows what he's doing or at least isn't being bitten by this
1369  * interface change.
1370  */
1371 static void
1372 fixup_modules(void)
1373 {
1374         if (modules_used == 0 || modules_used > 2)
1375                 return;
1376 
1377         if (modules[0].bm_type != BMT_FILE ||
1378             modules_used > 1 && modules[1].bm_type != BMT_FILE) {
1379                 return;
1380         }
1381 
1382         if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL ||
1383             modules_used > 1 &&
1384             modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1385                 return;
1386         }
1387 
1388         modules[0].bm_type = BMT_ROOTFS;
1389         if (modules_used > 1) {
1390                 modules[1].bm_type = BMT_HASH;
1391                 modules[1].bm_name = modules[0].bm_name;
1392         }
1393 }
1394 
1395 /*
1396  * For modules that do not have assigned hashes but have a separate hash module,
1397  * find the assigned hash module and set the primary module's bm_hash to point
1398  * to the hash data from that module.  We will then ignore modules of type
1399  * BMT_HASH from this point forward.
1400  */
1401 static void
1402 assign_module_hashes(void)
1403 {
1404         uint_t i, j;
1405 
1406         for (i = 0; i < modules_used; i++) {
1407                 if (modules[i].bm_type == BMT_HASH ||
1408                     modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1409                         continue;
1410                 }
1411 
1412                 for (j = 0; j < modules_used; j++) {
1413                         if (modules[j].bm_type != BMT_HASH ||
1414                             strcmp((char *)(uintptr_t)modules[j].bm_name,
1415                             (char *)(uintptr_t)modules[i].bm_name) != 0) {
1416                                 continue;
1417                         }
1418 
1419                         if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
1420                                 dboot_printf("Short hash module of length "
1421                                     "0x%lx bytes; ignoring\n",
1422                                     (ulong_t)modules[j].bm_size);
1423                         } else {
1424                                 modules[i].bm_hash = modules[j].bm_addr;
1425                         }
1426                         break;
1427                 }
1428         }
1429 }
1430 
1431 /*
1432  * Walk through the module information finding the last used address.
1433  * The first available address will become the top level page table.
1434  */
1435 static void
1436 dboot_process_modules(void)
1437 {
1438         int i, modcount;
1439         extern char _end[];
1440 
1441         DBG_MSG("\nFinding Modules\n");
1442         modcount = dboot_multiboot_modcount();
1443         if (modcount > MAX_BOOT_MODULES) {
1444                 dboot_panic("Too many modules (%d) -- the maximum is %d.",
1445                     modcount, MAX_BOOT_MODULES);
1446         }
1447         /*
1448          * search the modules to find the last used address
1449          * we'll build the module list while we're walking through here
1450          */
1451         check_higher((paddr_t)(uintptr_t)&_end);
1452         for (i = 0; i < modcount; ++i) {
1453                 process_module(i);
1454                 modules_used++;
1455         }
1456         bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1457         DBG(bi->bi_modules);
1458         bi->bi_module_cnt = modcount;
1459         DBG(bi->bi_module_cnt);
1460 
1461         fixup_modules();
1462         assign_module_hashes();
1463         check_images();
1464 }
1465 
1466 /*
1467  * We then build the phys_install memlist from the multiboot information.
1468  */
1469 static void
1470 dboot_process_mmap(void)
1471 {
1472         uint64_t start;
1473         uint64_t end;
1474         uint64_t page_offset = MMU_PAGEOFFSET;  /* needs to be 64 bits */
1475         uint32_t lower, upper;
1476         int i, mmap_entries;
1477 
1478         /*
1479          * Walk through the memory map from multiboot and build our memlist
1480          * structures. Note these will have native format pointers.
1481          */
1482         DBG_MSG("\nFinding Memory Map\n");
1483         num_entries = 0;
1484         num_entries_set = B_FALSE;
1485         max_mem = 0;
1486         if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1487                 for (i = 0; i < mmap_entries; i++) {
1488                         uint32_t type = dboot_loader_mmap_get_type(i);
1489                         start = dboot_loader_mmap_get_base(i);
1490                         end = start + dboot_loader_mmap_get_length(i);
1491 
1492                         if (prom_debug)
1493                                 dboot_printf("\ttype: %d %" PRIx64 "..%"
1494                                     PRIx64 "\n", type, start, end);
1495 
1496                         /*
1497                          * page align start and end
1498                          */
1499                         start = (start + page_offset) & ~page_offset;
1500                         end &= ~page_offset;
1501                         if (end <= start)
1502                                 continue;
1503 
1504                         /*
1505                          * only type 1 is usable RAM
1506                          */
1507                         switch (type) {
1508                         case 1:
1509                                 if (end > max_mem)
1510                                         max_mem = end;
1511                                 memlists[memlists_used].addr = start;
1512                                 memlists[memlists_used].size = end - start;
1513                                 ++memlists_used;
1514                                 if (memlists_used > MAX_MEMLIST)
1515                                         dboot_panic("too many memlists");
1516                                 break;
1517                         case 2:
1518                                 rsvdmemlists[rsvdmemlists_used].addr = start;
1519                                 rsvdmemlists[rsvdmemlists_used].size =
1520                                     end - start;
1521                                 ++rsvdmemlists_used;
1522                                 if (rsvdmemlists_used > MAX_MEMLIST)
1523                                         dboot_panic("too many rsvdmemlists");
1524                                 break;
1525                         default:
1526                                 continue;
1527                         }
1528                 }
1529                 build_pcimemlists();
1530         } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1531                 DBG(lower);
1532                 memlists[memlists_used].addr = 0;
1533                 memlists[memlists_used].size = lower * 1024;
1534                 ++memlists_used;
1535                 DBG(upper);
1536                 memlists[memlists_used].addr = 1024 * 1024;
1537                 memlists[memlists_used].size = upper * 1024;
1538                 ++memlists_used;
1539 
1540                 /*
1541                  * Old platform - assume I/O space at the end of memory.
1542                  */
1543                 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1544                 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1545                 pcimemlists[0].next = 0;
1546                 pcimemlists[0].prev = 0;
1547                 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1548                 DBG(bi->bi_pcimem);
1549         } else {
1550                 dboot_panic("No memory info from boot loader!!!");
1551         }
1552 
1553         /*
1554          * finish processing the physinstall list
1555          */
1556         sort_physinstall();
1557 
1558         /*
1559          * build bios reserved mem lists
1560          */
1561         build_rsvdmemlists();
1562 }
1563 
1564 /*
1565  * The highest address is used as the starting point for dboot's simple
1566  * memory allocator.
1567  *
1568  * Finding the highest address in case of Multiboot 1 protocol is
1569  * quite painful in the sense that some information provided by
1570  * the multiboot info structure points to BIOS data, and some to RAM.
1571  *
1572  * The module list was processed and checked already by dboot_process_modules(),
1573  * so we will check the command line string and the memory map.
1574  *
1575  * This list of to be checked items is based on our current knowledge of
1576  * allocations made by grub1 and will need to be reviewed if there
1577  * are updates about the information provided by Multiboot 1.
1578  *
1579  * In the case of the Multiboot 2, our life is much simpler, as the MB2
1580  * information tag list is one contiguous chunk of memory.
1581  */
1582 static paddr_t
1583 dboot_multiboot1_highest_addr(void)
1584 {
1585         paddr_t addr = (paddr_t)(uintptr_t)NULL;
1586         char *cmdl = (char *)mb_info->cmdline;
1587 
1588         if (mb_info->flags & MB_INFO_CMDLINE)
1589                 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1590 
1591         if (mb_info->flags & MB_INFO_MEM_MAP)
1592                 addr = MAX(addr,
1593                     ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1594         return (addr);
1595 }
1596 
1597 static void
1598 dboot_multiboot_highest_addr(void)
1599 {
1600         paddr_t addr;
1601 
1602         switch (multiboot_version) {
1603         case 1:
1604                 addr = dboot_multiboot1_highest_addr();
1605                 if (addr != (paddr_t)(uintptr_t)NULL)
1606                         check_higher(addr);
1607                 break;
1608         case 2:
1609                 addr = dboot_multiboot2_highest_addr(mb2_info);
1610                 if (addr != (paddr_t)(uintptr_t)NULL)
1611                         check_higher(addr);
1612                 break;
1613         default:
1614                 dboot_panic("Unknown multiboot version: %d\n",
1615                     multiboot_version);
1616                 break;
1617         }
1618 }
1619 
1620 /*
1621  * Walk the boot loader provided information and find the highest free address.
1622  */
1623 static void
1624 init_mem_alloc(void)
1625 {
1626         DBG_MSG("Entered init_mem_alloc()\n");
1627         dboot_process_modules();
1628         dboot_process_mmap();
1629         dboot_multiboot_highest_addr();
1630 }
1631 
1632 static int
1633 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2)
1634 {
1635         int i;
1636 
1637         if (g1->time_low != g2->time_low)
1638                 return (0);
1639         if (g1->time_mid != g2->time_mid)
1640                 return (0);
1641         if (g1->time_hi_and_version != g2->time_hi_and_version)
1642                 return (0);
1643         if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved)
1644                 return (0);
1645         if (g1->clock_seq_low != g2->clock_seq_low)
1646                 return (0);
1647 
1648         for (i = 0; i < 6; i++) {
1649                 if (g1->node_addr[i] != g2->node_addr[i])
1650                         return (0);
1651         }
1652         return (1);
1653 }
1654 
1655 static void
1656 process_efi32(EFI_SYSTEM_TABLE32 *efi)
1657 {
1658         uint32_t entries;
1659         EFI_CONFIGURATION_TABLE32 *config;
1660         efi_guid_t VendorGuid;
1661         int i;
1662 
1663         entries = efi->NumberOfTableEntries;
1664         config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1665             efi->ConfigurationTable;
1666 
1667         for (i = 0; i < entries; i++) {
1668                 (void) memcpy(&VendorGuid, &config[i].VendorGuid,
1669                     sizeof (VendorGuid));
1670                 if (dboot_same_guids(&VendorGuid, &smbios3)) {
1671                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1672                             config[i].VendorTable;
1673                 }
1674                 if (bi->bi_smbios == 0 &&
1675                     dboot_same_guids(&VendorGuid, &smbios)) {
1676                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1677                             config[i].VendorTable;
1678                 }
1679                 if (dboot_same_guids(&VendorGuid, &acpi2)) {
1680                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1681                             config[i].VendorTable;
1682                 }
1683                 if (bi->bi_acpi_rsdp == 0 &&
1684                     dboot_same_guids(&VendorGuid, &acpi1)) {
1685                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1686                             config[i].VendorTable;
1687                 }
1688         }
1689 }
1690 
1691 static void
1692 process_efi64(EFI_SYSTEM_TABLE64 *efi)
1693 {
1694         uint64_t entries;
1695         EFI_CONFIGURATION_TABLE64 *config;
1696         efi_guid_t VendorGuid;
1697         int i;
1698 
1699         entries = efi->NumberOfTableEntries;
1700         config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1701             efi->ConfigurationTable;
1702 
1703         for (i = 0; i < entries; i++) {
1704                 (void) memcpy(&VendorGuid, &config[i].VendorGuid,
1705                     sizeof (VendorGuid));
1706                 if (dboot_same_guids(&VendorGuid, &smbios3)) {
1707                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1708                             config[i].VendorTable;
1709                 }
1710                 if (bi->bi_smbios == 0 &&
1711                     dboot_same_guids(&VendorGuid, &smbios)) {
1712                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1713                             config[i].VendorTable;
1714                 }
1715                 /* Prefer acpi v2+ over v1. */
1716                 if (dboot_same_guids(&VendorGuid, &acpi2)) {
1717                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1718                             config[i].VendorTable;
1719                 }
1720                 if (bi->bi_acpi_rsdp == 0 &&
1721                     dboot_same_guids(&VendorGuid, &acpi1)) {
1722                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1723                             config[i].VendorTable;
1724                 }
1725         }
1726 }
1727 
1728 static void
1729 dboot_multiboot_get_fwtables(void)
1730 {
1731         multiboot_tag_new_acpi_t *nacpitagp;
1732         multiboot_tag_old_acpi_t *oacpitagp;
1733         multiboot_tag_efi64_t *efi64tagp = NULL;
1734         multiboot_tag_efi32_t *efi32tagp = NULL;
1735 
1736         /* no fw tables from multiboot 1 */
1737         if (multiboot_version != 2)
1738                 return;
1739 
1740         efi64tagp = (multiboot_tag_efi64_t *)
1741             dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64);
1742         if (efi64tagp != NULL) {
1743                 bi->bi_uefi_arch = XBI_UEFI_ARCH_64;
1744                 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1745                     efi64tagp->mb_pointer;
1746                 process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
1747                     efi64tagp->mb_pointer);
1748         } else {
1749                 efi32tagp = (multiboot_tag_efi32_t *)
1750                     dboot_multiboot2_find_tag(mb2_info,
1751                     MULTIBOOT_TAG_TYPE_EFI32);
1752                 if (efi32tagp != NULL) {
1753                         bi->bi_uefi_arch = XBI_UEFI_ARCH_32;
1754                         bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1755                             efi32tagp->mb_pointer;
1756                         process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
1757                             efi32tagp->mb_pointer);
1758                 }
1759         }
1760 
1761         /*
1762          * The multiboot2 info contains a copy of the RSDP; stash a pointer to
1763          * it (see find_rsdp() in fakebop).
1764          */
1765         nacpitagp = (multiboot_tag_new_acpi_t *)
1766             dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_NEW);
1767         oacpitagp = (multiboot_tag_old_acpi_t *)
1768             dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_OLD);
1769 
1770         if (nacpitagp != NULL) {
1771                 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t)
1772                     &nacpitagp->mb_rsdp[0];
1773         } else if (oacpitagp != NULL) {
1774                 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t)
1775                     &oacpitagp->mb_rsdp[0];
1776         }
1777 }
1778 
1779 /* print out EFI version string with newline */
1780 static void
1781 dboot_print_efi_version(uint32_t ver)
1782 {
1783         int rev;
1784 
1785         dboot_printf("%d.", EFI_REV_MAJOR(ver));
1786 
1787         rev = EFI_REV_MINOR(ver);
1788         if ((rev % 10) != 0) {
1789                 dboot_printf("%d.%d\n", rev / 10, rev % 10);
1790         } else {
1791                 dboot_printf("%d\n", rev / 10);
1792         }
1793 }
1794 
1795 static void
1796 print_efi32(EFI_SYSTEM_TABLE32 *efi)
1797 {
1798         uint16_t *data;
1799         EFI_CONFIGURATION_TABLE32 *conf;
1800         int i;
1801 
1802         dboot_printf("EFI32 signature: %llx\n",
1803             (unsigned long long)efi->Hdr.Signature);
1804         dboot_printf("EFI system version: ");
1805         dboot_print_efi_version(efi->Hdr.Revision);
1806         dboot_printf("EFI system vendor: ");
1807         data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1808         for (i = 0; data[i] != 0; i++)
1809                 dboot_printf("%c", (char)data[i]);
1810         dboot_printf("\nEFI firmware revision: ");
1811         dboot_print_efi_version(efi->FirmwareRevision);
1812         dboot_printf("EFI system table number of entries: %d\n",
1813             efi->NumberOfTableEntries);
1814         conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1815             efi->ConfigurationTable;
1816         for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1817                 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1818                     conf[i].VendorGuid.time_low,
1819                     conf[i].VendorGuid.time_mid,
1820                     conf[i].VendorGuid.time_hi_and_version,
1821                     conf[i].VendorGuid.clock_seq_hi_and_reserved,
1822                     conf[i].VendorGuid.clock_seq_low);
1823                 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1824                     conf[i].VendorGuid.node_addr[0],
1825                     conf[i].VendorGuid.node_addr[1],
1826                     conf[i].VendorGuid.node_addr[2],
1827                     conf[i].VendorGuid.node_addr[3],
1828                     conf[i].VendorGuid.node_addr[4],
1829                     conf[i].VendorGuid.node_addr[5]);
1830         }
1831 }
1832 
1833 static void
1834 print_efi64(EFI_SYSTEM_TABLE64 *efi)
1835 {
1836         uint16_t *data;
1837         EFI_CONFIGURATION_TABLE64 *conf;
1838         int i;
1839 
1840         dboot_printf("EFI64 signature: %llx\n",
1841             (unsigned long long)efi->Hdr.Signature);
1842         dboot_printf("EFI system version: ");
1843         dboot_print_efi_version(efi->Hdr.Revision);
1844         dboot_printf("EFI system vendor: ");
1845         data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1846         for (i = 0; data[i] != 0; i++)
1847                 dboot_printf("%c", (char)data[i]);
1848         dboot_printf("\nEFI firmware revision: ");
1849         dboot_print_efi_version(efi->FirmwareRevision);
1850         dboot_printf("EFI system table number of entries: %" PRIu64 "\n",
1851             efi->NumberOfTableEntries);
1852         conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1853             efi->ConfigurationTable;
1854         for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1855                 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1856                     conf[i].VendorGuid.time_low,
1857                     conf[i].VendorGuid.time_mid,
1858                     conf[i].VendorGuid.time_hi_and_version,
1859                     conf[i].VendorGuid.clock_seq_hi_and_reserved,
1860                     conf[i].VendorGuid.clock_seq_low);
1861                 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1862                     conf[i].VendorGuid.node_addr[0],
1863                     conf[i].VendorGuid.node_addr[1],
1864                     conf[i].VendorGuid.node_addr[2],
1865                     conf[i].VendorGuid.node_addr[3],
1866                     conf[i].VendorGuid.node_addr[4],
1867                     conf[i].VendorGuid.node_addr[5]);
1868         }
1869 }
1870 #endif /* !__xpv */
1871 
1872 /*
1873  * Simple memory allocator, allocates aligned physical memory.
1874  * Note that startup_kernel() only allocates memory, never frees.
1875  * Memory usage just grows in an upward direction.
1876  */
1877 static void *
1878 do_mem_alloc(uint32_t size, uint32_t align)
1879 {
1880         uint_t i;
1881         uint64_t best;
1882         uint64_t start;
1883         uint64_t end;
1884 
1885         /*
1886          * make sure size is a multiple of pagesize
1887          */
1888         size = RNDUP(size, MMU_PAGESIZE);
1889         next_avail_addr = RNDUP(next_avail_addr, align);
1890 
1891         /*
1892          * XXPV fixme joe
1893          *
1894          * a really large bootarchive that causes you to run out of memory
1895          * may cause this to blow up
1896          */
1897         /* LINTED E_UNEXPECTED_UINT_PROMOTION */
1898         best = (uint64_t)-size;
1899         for (i = 0; i < memlists_used; ++i) {
1900                 start = memlists[i].addr;
1901 #if defined(__xpv)
1902                 start += mfn_base;
1903 #endif
1904                 end = start + memlists[i].size;
1905 
1906                 /*
1907                  * did we find the desired address?
1908                  */
1909                 if (start <= next_avail_addr && next_avail_addr + size <= end) {
1910                         best = next_avail_addr;
1911                         goto done;
1912                 }
1913 
1914                 /*
1915                  * if not is this address the best so far?
1916                  */
1917                 if (start > next_avail_addr && start < best &&
1918                     RNDUP(start, align) + size <= end)
1919                         best = RNDUP(start, align);
1920         }
1921 
1922         /*
1923          * We didn't find exactly the address we wanted, due to going off the
1924          * end of a memory region. Return the best found memory address.
1925          */
1926 done:
1927         next_avail_addr = best + size;
1928 #if defined(__xpv)
1929         if (next_avail_addr > scratch_end)
1930                 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
1931                     "0x%lx", (ulong_t)next_avail_addr,
1932                     (ulong_t)scratch_end);
1933 #endif
1934         (void) memset((void *)(uintptr_t)best, 0, size);
1935         return ((void *)(uintptr_t)best);
1936 }
1937 
1938 void *
1939 mem_alloc(uint32_t size)
1940 {
1941         return (do_mem_alloc(size, MMU_PAGESIZE));
1942 }
1943 
1944 
1945 /*
1946  * Build page tables to map all of memory used so far as well as the kernel.
1947  */
1948 static void
1949 build_page_tables(void)
1950 {
1951         uint32_t psize;
1952         uint32_t level;
1953         uint32_t off;
1954         uint64_t start;
1955 #if !defined(__xpv)
1956         uint32_t i;
1957         uint64_t end;
1958 #endif  /* __xpv */
1959 
1960         /*
1961          * If we're on metal, we need to create the top level pagetable.
1962          */
1963 #if defined(__xpv)
1964         top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
1965 #else /* __xpv */
1966         top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1967 #endif /* __xpv */
1968         DBG((uintptr_t)top_page_table);
1969 
1970         /*
1971          * Determine if we'll use large mappings for kernel, then map it.
1972          */
1973         if (largepage_support) {
1974                 psize = lpagesize;
1975                 level = 1;
1976         } else {
1977                 psize = MMU_PAGESIZE;
1978                 level = 0;
1979         }
1980 
1981         DBG_MSG("Mapping kernel\n");
1982         DBG(ktext_phys);
1983         DBG(target_kernel_text);
1984         DBG(ksize);
1985         DBG(psize);
1986         for (off = 0; off < ksize; off += psize)
1987                 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1988 
1989         /*
1990          * The kernel will need a 1 page window to work with page tables
1991          */
1992         bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1993         DBG(bi->bi_pt_window);
1994         bi->bi_pte_to_pt_window =
1995             (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1996         DBG(bi->bi_pte_to_pt_window);
1997 
1998 #if defined(__xpv)
1999         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
2000                 /* If this is a domU we're done. */
2001                 DBG_MSG("\nPage tables constructed\n");
2002                 return;
2003         }
2004 #endif /* __xpv */
2005 
2006         /*
2007          * We need 1:1 mappings for the lower 1M of memory to access
2008          * BIOS tables used by a couple of drivers during boot.
2009          *
2010          * The following code works because our simple memory allocator
2011          * only grows usage in an upwards direction.
2012          *
2013          * Note that by this point in boot some mappings for low memory
2014          * may already exist because we've already accessed device in low
2015          * memory.  (Specifically the video frame buffer and keyboard
2016          * status ports.)  If we're booting on raw hardware then GRUB
2017          * created these mappings for us.  If we're booting under a
2018          * hypervisor then we went ahead and remapped these devices into
2019          * memory allocated within dboot itself.
2020          */
2021         if (map_debug)
2022                 dboot_printf("1:1 map pa=0..1Meg\n");
2023         for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
2024 #if defined(__xpv)
2025                 map_ma_at_va(start, start, 0);
2026 #else /* __xpv */
2027                 map_pa_at_va(start, start, 0);
2028 #endif /* __xpv */
2029         }
2030 
2031 #if !defined(__xpv)
2032 
2033         for (i = 0; i < memlists_used; ++i) {
2034                 start = memlists[i].addr;
2035                 end = start + memlists[i].size;
2036 
2037                 if (map_debug)
2038                         dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2039                             start, end);
2040                 while (start < end && start < next_avail_addr) {
2041                         map_pa_at_va(start, start, 0);
2042                         start += MMU_PAGESIZE;
2043                 }
2044                 if (start >= next_avail_addr)
2045                         break;
2046         }
2047 
2048         /*
2049          * Map framebuffer memory as PT_NOCACHE as this is memory from a
2050          * device and therefore must not be cached.
2051          */
2052         if (fb != NULL && fb->framebuffer != 0) {
2053                 multiboot_tag_framebuffer_t *fb_tagp;
2054                 fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t)
2055                     fb->framebuffer;
2056 
2057                 start = fb_tagp->framebuffer_common.framebuffer_addr;
2058                 end = start + fb_tagp->framebuffer_common.framebuffer_height *
2059                     fb_tagp->framebuffer_common.framebuffer_pitch;
2060 
2061                 if (map_debug)
2062                         dboot_printf("FB 1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2063                             start, end);
2064                 pte_bits |= PT_NOCACHE;
2065                 if (PAT_support != 0)
2066                         pte_bits |= PT_PAT_4K;
2067 
2068                 while (start < end) {
2069                         map_pa_at_va(start, start, 0);
2070                         start += MMU_PAGESIZE;
2071                 }
2072                 pte_bits &= ~PT_NOCACHE;
2073                 if (PAT_support != 0)
2074                         pte_bits &= ~PT_PAT_4K;
2075         }
2076 #endif /* !__xpv */
2077 
2078         DBG_MSG("\nPage tables constructed\n");
2079 }
2080 
2081 #define NO_MULTIBOOT    \
2082 "multiboot is no longer used to boot the Solaris Operating System.\n\
2083 The grub entry should be changed to:\n\
2084 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
2085 module$ /platform/i86pc/$ISADIR/boot_archive\n\
2086 See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
2087 
2088 static void
2089 dboot_init_xboot_consinfo(void)
2090 {
2091         bi = &boot_info;
2092 
2093 #if !defined(__xpv)
2094         fb = &framebuffer;
2095         bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb;
2096 
2097         switch (multiboot_version) {
2098         case 1:
2099                 dboot_multiboot1_xboot_consinfo();
2100                 break;
2101         case 2:
2102                 dboot_multiboot2_xboot_consinfo();
2103                 break;
2104         default:
2105                 dboot_panic("Unknown multiboot version: %d\n",
2106                     multiboot_version);
2107                 break;
2108         }
2109         dboot_find_console_modules();
2110 #endif
2111 }
2112 
2113 /*
2114  * Set up basic data from the boot loader.
2115  * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
2116  * 32-bit dboot code setup used to set up and start 64-bit kernel.
2117  * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
2118  * start 64-bit illumos kernel.
2119  */
2120 static void
2121 dboot_loader_init(void)
2122 {
2123 #if !defined(__xpv)
2124         mb_info = NULL;
2125         mb2_info = NULL;
2126 
2127         switch (mb_magic) {
2128         case MB_BOOTLOADER_MAGIC:
2129                 multiboot_version = 1;
2130                 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
2131 #if defined(_BOOT_TARGET_amd64)
2132                 load_addr = mb_header.load_addr;
2133 #endif
2134                 break;
2135 
2136         case MULTIBOOT2_BOOTLOADER_MAGIC:
2137                 multiboot_version = 2;
2138                 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
2139                 mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
2140 #if defined(_BOOT_TARGET_amd64)
2141                 load_addr = mb2_load_addr;
2142 #endif
2143                 break;
2144 
2145         default:
2146                 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
2147                 break;
2148         }
2149 #endif  /* !defined(__xpv) */
2150 }
2151 
2152 /* Extract the kernel command line from [multi]boot information. */
2153 static char *
2154 dboot_loader_cmdline(void)
2155 {
2156         char *line = NULL;
2157 
2158 #if defined(__xpv)
2159         line = (char *)xen_info->cmd_line;
2160 #else /* __xpv */
2161 
2162         switch (multiboot_version) {
2163         case 1:
2164                 if (mb_info->flags & MB_INFO_CMDLINE)
2165                         line = (char *)mb_info->cmdline;
2166                 break;
2167 
2168         case 2:
2169                 line = dboot_multiboot2_cmdline(mb2_info);
2170                 break;
2171 
2172         default:
2173                 dboot_panic("Unknown multiboot version: %d\n",
2174                     multiboot_version);
2175                 break;
2176         }
2177 
2178 #endif /* __xpv */
2179 
2180         /*
2181          * Make sure we have valid pointer so the string operations
2182          * will not crash us.
2183          */
2184         if (line == NULL)
2185                 line = "";
2186 
2187         return (line);
2188 }
2189 
2190 static char *
2191 dboot_loader_name(void)
2192 {
2193 #if defined(__xpv)
2194         return (NULL);
2195 #else /* __xpv */
2196         multiboot_tag_string_t *tag;
2197 
2198         switch (multiboot_version) {
2199         case 1:
2200                 return ((char *)(uintptr_t)mb_info->boot_loader_name);
2201 
2202         case 2:
2203                 tag = dboot_multiboot2_find_tag(mb2_info,
2204                     MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
2205                 return (tag->mb_string);
2206         default:
2207                 dboot_panic("Unknown multiboot version: %d\n",
2208                     multiboot_version);
2209                 break;
2210         }
2211 
2212         return (NULL);
2213 #endif /* __xpv */
2214 }
2215 
2216 /*
2217  * startup_kernel has a pretty simple job. It builds pagetables which reflect
2218  * 1:1 mappings for all memory in use. It then also adds mappings for
2219  * the kernel nucleus at virtual address of target_kernel_text using large page
2220  * mappings. The page table pages are also accessible at 1:1 mapped
2221  * virtual addresses.
2222  */
2223 /*ARGSUSED*/
2224 void
2225 startup_kernel(void)
2226 {
2227         char *cmdline;
2228         char *bootloader;
2229 #if defined(__xpv)
2230         physdev_set_iopl_t set_iopl;
2231 #endif /* __xpv */
2232 
2233         if (dboot_debug == 1)
2234                 bcons_init(NULL);       /* Set very early console to ttya. */
2235         dboot_loader_init();
2236         /*
2237          * At this point we are executing in a 32 bit real mode.
2238          */
2239 
2240         bootloader = dboot_loader_name();
2241         cmdline = dboot_loader_cmdline();
2242 
2243 #if defined(__xpv)
2244         /*
2245          * For dom0, before we initialize the console subsystem we'll
2246          * need to enable io operations, so set I/O priveldge level to 1.
2247          */
2248         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2249                 set_iopl.iopl = 1;
2250                 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
2251         }
2252 #endif /* __xpv */
2253 
2254         dboot_init_xboot_consinfo();
2255         bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
2256         bcons_init(bi);         /* Now we can set the real console. */
2257 
2258         prom_debug = (find_boot_prop("prom_debug") != NULL);
2259         map_debug = (find_boot_prop("map_debug") != NULL);
2260 
2261 #if !defined(__xpv)
2262         dboot_multiboot_get_fwtables();
2263 #endif
2264         DBG_MSG("\n\nillumos prekernel set: ");
2265         DBG_MSG(cmdline);
2266         DBG_MSG("\n");
2267 
2268         if (bootloader != NULL && prom_debug) {
2269                 dboot_printf("Kernel loaded by: %s\n", bootloader);
2270 #if !defined(__xpv)
2271                 dboot_printf("Using multiboot %d boot protocol.\n",
2272                     multiboot_version);
2273 #endif
2274         }
2275 
2276         if (strstr(cmdline, "multiboot") != NULL) {
2277                 dboot_panic(NO_MULTIBOOT);
2278         }
2279 
2280         DBG((uintptr_t)bi);
2281 #if !defined(__xpv)
2282         DBG((uintptr_t)mb_info);
2283         DBG((uintptr_t)mb2_info);
2284         if (mb2_info != NULL)
2285                 DBG(mb2_info->mbi_total_size);
2286         DBG(bi->bi_acpi_rsdp);
2287         DBG(bi->bi_acpi_rsdp_copy);
2288         DBG(bi->bi_smbios);
2289         DBG(bi->bi_uefi_arch);
2290         DBG(bi->bi_uefi_systab);
2291 
2292         if (bi->bi_uefi_systab && prom_debug) {
2293                 if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) {
2294                         print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
2295                             bi->bi_uefi_systab);
2296                 } else {
2297                         print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
2298                             bi->bi_uefi_systab);
2299                 }
2300         }
2301 #endif
2302 
2303         /*
2304          * Need correct target_kernel_text value
2305          */
2306 #if defined(_BOOT_TARGET_amd64)
2307         target_kernel_text = KERNEL_TEXT_amd64;
2308 #elif defined(__xpv)
2309         target_kernel_text = KERNEL_TEXT_i386_xpv;
2310 #else
2311         target_kernel_text = KERNEL_TEXT_i386;
2312 #endif
2313         DBG(target_kernel_text);
2314 
2315 #if defined(__xpv)
2316 
2317         /*
2318          * XXPV Derive this stuff from CPUID / what the hypervisor has enabled
2319          */
2320 
2321 #if defined(_BOOT_TARGET_amd64)
2322         /*
2323          * 64-bit hypervisor.
2324          */
2325         amd64_support = 1;
2326         pae_support = 1;
2327 
2328 #else   /* _BOOT_TARGET_amd64 */
2329 
2330         /*
2331          * See if we are running on a PAE Hypervisor
2332          */
2333         {
2334                 xen_capabilities_info_t caps;
2335 
2336                 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2337                         dboot_panic("HYPERVISOR_xen_version(caps) failed");
2338                 caps[sizeof (caps) - 1] = 0;
2339                 if (prom_debug)
2340                         dboot_printf("xen capabilities %s\n", caps);
2341                 if (strstr(caps, "x86_32p") != NULL)
2342                         pae_support = 1;
2343         }
2344 
2345 #endif  /* _BOOT_TARGET_amd64 */
2346         {
2347                 xen_platform_parameters_t p;
2348 
2349                 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2350                         dboot_panic("HYPERVISOR_xen_version(parms) failed");
2351                 DBG(p.virt_start);
2352                 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2353         }
2354 
2355         /*
2356          * The hypervisor loads stuff starting at 1Gig
2357          */
2358         mfn_base = ONE_GIG;
2359         DBG(mfn_base);
2360 
2361         /*
2362          * enable writable page table mode for the hypervisor
2363          */
2364         if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2365             VMASST_TYPE_writable_pagetables) < 0)
2366                 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2367 
2368         /*
2369          * check for NX support
2370          */
2371         if (pae_support) {
2372                 uint32_t eax = 0x80000000;
2373                 uint32_t edx = get_cpuid_edx(&eax);
2374 
2375                 if (eax >= 0x80000001) {
2376                         eax = 0x80000001;
2377                         edx = get_cpuid_edx(&eax);
2378                         if (edx & CPUID_AMD_EDX_NX)
2379                                 NX_support = 1;
2380                 }
2381         }
2382 
2383         /*
2384          * check for PAT support
2385          */
2386         {
2387                 uint32_t eax = 1;
2388                 uint32_t edx = get_cpuid_edx(&eax);
2389 
2390                 if (edx & CPUID_INTC_EDX_PAT)
2391                         PAT_support = 1;
2392         }
2393 #if !defined(_BOOT_TARGET_amd64)
2394 
2395         /*
2396          * The 32-bit hypervisor uses segmentation to protect itself from
2397          * guests. This means when a guest attempts to install a flat 4GB
2398          * code or data descriptor the 32-bit hypervisor will protect itself
2399          * by silently shrinking the segment such that if the guest attempts
2400          * any access where the hypervisor lives a #gp fault is generated.
2401          * The problem is that some applications expect a full 4GB flat
2402          * segment for their current thread pointer and will use negative
2403          * offset segment wrap around to access data. TLS support in linux
2404          * brand is one example of this.
2405          *
2406          * The 32-bit hypervisor can catch the #gp fault in these cases
2407          * and emulate the access without passing the #gp fault to the guest
2408          * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2409          * Seems like this should have been the default.
2410          * Either way, we want the hypervisor -- and not Solaris -- to deal
2411          * to deal with emulating these accesses.
2412          */
2413         if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2414             VMASST_TYPE_4gb_segments) < 0)
2415                 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2416 #endif  /* !_BOOT_TARGET_amd64 */
2417 
2418 #else   /* __xpv */
2419 
2420         /*
2421          * use cpuid to enable MMU features
2422          */
2423         if (have_cpuid()) {
2424                 uint32_t eax, edx;
2425 
2426                 eax = 1;
2427                 edx = get_cpuid_edx(&eax);
2428                 if (edx & CPUID_INTC_EDX_PSE)
2429                         largepage_support = 1;
2430                 if (edx & CPUID_INTC_EDX_PGE)
2431                         pge_support = 1;
2432                 if (edx & CPUID_INTC_EDX_PAE)
2433                         pae_support = 1;
2434                 if (edx & CPUID_INTC_EDX_PAT)
2435                         PAT_support = 1;
2436 
2437                 eax = 0x80000000;
2438                 edx = get_cpuid_edx(&eax);
2439                 if (eax >= 0x80000001) {
2440                         eax = 0x80000001;
2441                         edx = get_cpuid_edx(&eax);
2442                         if (edx & CPUID_AMD_EDX_LM)
2443                                 amd64_support = 1;
2444                         if (edx & CPUID_AMD_EDX_NX)
2445                                 NX_support = 1;
2446                 }
2447         } else {
2448                 dboot_printf("cpuid not supported\n");
2449         }
2450 #endif /* __xpv */
2451 
2452 
2453 #if defined(_BOOT_TARGET_amd64)
2454         if (amd64_support == 0)
2455                 dboot_panic("long mode not supported, rebooting");
2456         else if (pae_support == 0)
2457                 dboot_panic("long mode, but no PAE; rebooting");
2458 #else
2459         /*
2460          * Allow the command line to over-ride use of PAE for 32 bit.
2461          */
2462         if (strstr(cmdline, "disablePAE=true") != NULL) {
2463                 pae_support = 0;
2464                 NX_support = 0;
2465                 amd64_support = 0;
2466         }
2467 #endif
2468 
2469         /*
2470          * initialize the simple memory allocator
2471          */
2472         init_mem_alloc();
2473 
2474 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2475         /*
2476          * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2477          */
2478         if (max_mem < FOUR_GIG && NX_support == 0)
2479                 pae_support = 0;
2480 #endif
2481 
2482         /*
2483          * configure mmu information
2484          */
2485         if (pae_support) {
2486                 shift_amt = shift_amt_pae;
2487                 ptes_per_table = 512;
2488                 pte_size = 8;
2489                 lpagesize = TWO_MEG;
2490 #if defined(_BOOT_TARGET_amd64)
2491                 top_level = 3;
2492 #else
2493                 top_level = 2;
2494 #endif
2495         } else {
2496                 pae_support = 0;
2497                 NX_support = 0;
2498                 shift_amt = shift_amt_nopae;
2499                 ptes_per_table = 1024;
2500                 pte_size = 4;
2501                 lpagesize = FOUR_MEG;
2502                 top_level = 1;
2503         }
2504 
2505         DBG(PAT_support);
2506         DBG(pge_support);
2507         DBG(NX_support);
2508         DBG(largepage_support);
2509         DBG(amd64_support);
2510         DBG(top_level);
2511         DBG(pte_size);
2512         DBG(ptes_per_table);
2513         DBG(lpagesize);
2514 
2515 #if defined(__xpv)
2516         ktext_phys = ONE_GIG;           /* from UNIX Mapfile */
2517 #else
2518         ktext_phys = FOUR_MEG;          /* from UNIX Mapfile */
2519 #endif
2520 
2521 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2522         /*
2523          * For grub, copy kernel bits from the ELF64 file to final place.
2524          */
2525         DBG_MSG("\nAllocating nucleus pages.\n");
2526         ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2527 
2528         if (ktext_phys == 0)
2529                 dboot_panic("failed to allocate aligned kernel memory");
2530         DBG(load_addr);
2531         if (dboot_elfload64(load_addr) != 0)
2532                 dboot_panic("failed to parse kernel ELF image, rebooting");
2533 #endif
2534 
2535         DBG(ktext_phys);
2536 
2537         /*
2538          * Allocate page tables.
2539          */
2540         build_page_tables();
2541 
2542         /*
2543          * return to assembly code to switch to running kernel
2544          */
2545         entry_addr_low = (uint32_t)target_kernel_text;
2546         DBG(entry_addr_low);
2547         bi->bi_use_largepage = largepage_support;
2548         bi->bi_use_pae = pae_support;
2549         bi->bi_use_pge = pge_support;
2550         bi->bi_use_nx = NX_support;
2551 
2552 #if defined(__xpv)
2553 
2554         bi->bi_next_paddr = next_avail_addr - mfn_base;
2555         DBG(bi->bi_next_paddr);
2556         bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2557         DBG(bi->bi_next_vaddr);
2558 
2559         /*
2560          * unmap unused pages in start area to make them available for DMA
2561          */
2562         while (next_avail_addr < scratch_end) {
2563                 (void) HYPERVISOR_update_va_mapping(next_avail_addr,
2564                     0, UVMF_INVLPG | UVMF_LOCAL);
2565                 next_avail_addr += MMU_PAGESIZE;
2566         }
2567 
2568         bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info;
2569         DBG((uintptr_t)HYPERVISOR_shared_info);
2570         bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2571         bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2572 
2573 #else /* __xpv */
2574 
2575         bi->bi_next_paddr = next_avail_addr;
2576         DBG(bi->bi_next_paddr);
2577         bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2578         DBG(bi->bi_next_vaddr);
2579         bi->bi_mb_version = multiboot_version;
2580 
2581         switch (multiboot_version) {
2582         case 1:
2583                 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info;
2584                 break;
2585         case 2:
2586                 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info;
2587                 break;
2588         default:
2589                 dboot_panic("Unknown multiboot version: %d\n",
2590                     multiboot_version);
2591                 break;
2592         }
2593         bi->bi_top_page_table = (uintptr_t)top_page_table;
2594 
2595 #endif /* __xpv */
2596 
2597         bi->bi_kseg_size = FOUR_MEG;
2598         DBG(bi->bi_kseg_size);
2599 
2600 #ifndef __xpv
2601         if (map_debug)
2602                 dump_tables();
2603 #endif
2604 
2605         DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2606 
2607 #ifndef __xpv
2608         /* Update boot info with FB data */
2609         fb->cursor.origin.x = fb_info.cursor.origin.x;
2610         fb->cursor.origin.y = fb_info.cursor.origin.y;
2611         fb->cursor.pos.x = fb_info.cursor.pos.x;
2612         fb->cursor.pos.y = fb_info.cursor.pos.y;
2613         fb->cursor.visible = fb_info.cursor.visible;
2614 #endif
2615 }