1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #include <sys/types.h> 29 #include <sys/machparam.h> 30 #include <sys/x86_archext.h> 31 #include <sys/systm.h> 32 #include <sys/mach_mmu.h> 33 #include <sys/multiboot.h> 34 35 #if defined(__xpv) 36 37 #include <sys/hypervisor.h> 38 uintptr_t xen_virt_start; 39 pfn_t *mfn_to_pfn_mapping; 40 41 #else /* !__xpv */ 42 43 extern multiboot_header_t mb_header; 44 extern int have_cpuid(void); 45 46 #endif /* !__xpv */ 47 48 #include <sys/inttypes.h> 49 #include <sys/bootinfo.h> 50 #include <sys/mach_mmu.h> 51 #include <sys/boot_console.h> 52 53 #include "dboot_asm.h" 54 #include "dboot_printf.h" 55 #include "dboot_xboot.h" 56 #include "dboot_elfload.h" 57 58 /* 59 * This file contains code that runs to transition us from either a multiboot 60 * compliant loader (32 bit non-paging) or a XPV domain loader to 61 * regular kernel execution. Its task is to setup the kernel memory image 62 * and page tables. 63 * 64 * The code executes as: 65 * - 32 bits under GRUB (for 32 or 64 bit Solaris) 66 * - a 32 bit program for the 32-bit PV hypervisor 67 * - a 64 bit program for the 64-bit PV hypervisor (at least for now) 68 * 69 * Under the PV hypervisor, we must create mappings for any memory beyond the 70 * initial start of day allocation (such as the kernel itself). 71 * 72 * When on the metal, the mapping between maddr_t and paddr_t is 1:1. 73 * Since we are running in real mode, so all such memory is accessible. 74 */ 75 76 /* 77 * Standard bits used in PTE (page level) and PTP (internal levels) 78 */ 79 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER; 80 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST; 81 82 /* 83 * This is the target addresses (physical) where the kernel text and data 84 * nucleus pages will be unpacked. On the hypervisor this is actually a 85 * virtual address. 86 */ 87 paddr_t ktext_phys; 88 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 89 90 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 91 92 /* 93 * The stack is setup in assembler before entering startup_kernel() 94 */ 95 char stack_space[STACK_SIZE]; 96 97 /* 98 * Used to track physical memory allocation 99 */ 100 static paddr_t next_avail_addr = 0; 101 102 #if defined(__xpv) 103 /* 104 * Additional information needed for hypervisor memory allocation. 105 * Only memory up to scratch_end is mapped by page tables. 106 * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so 107 * to derive a pfn from a pointer, you subtract mfn_base. 108 */ 109 110 static paddr_t scratch_end = 0; /* we can't write all of mem here */ 111 static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */ 112 start_info_t *xen_info; 113 114 #else /* __xpv */ 115 116 /* 117 * If on the metal, then we have a multiboot loader. 118 */ 119 multiboot_info_t *mb_info; 120 121 #endif /* __xpv */ 122 123 /* 124 * This contains information passed to the kernel 125 */ 126 struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */ 127 struct xboot_info *bi; 128 129 /* 130 * Page table and memory stuff. 131 */ 132 static paddr_t max_mem; /* maximum memory address */ 133 134 /* 135 * Information about processor MMU 136 */ 137 int amd64_support = 0; 138 int largepage_support = 0; 139 int pae_support = 0; 140 int pge_support = 0; 141 int NX_support = 0; 142 143 /* 144 * Low 32 bits of kernel entry address passed back to assembler. 145 * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 146 */ 147 uint32_t entry_addr_low; 148 149 /* 150 * Memlists for the kernel. We shouldn't need a lot of these. 151 */ 152 #define MAX_MEMLIST (50) 153 struct boot_memlist memlists[MAX_MEMLIST]; 154 uint_t memlists_used = 0; 155 struct boot_memlist pcimemlists[MAX_MEMLIST]; 156 uint_t pcimemlists_used = 0; 157 struct boot_memlist rsvdmemlists[MAX_MEMLIST]; 158 uint_t rsvdmemlists_used = 0; 159 160 #define MAX_MODULES (10) 161 struct boot_modules modules[MAX_MODULES]; 162 uint_t modules_used = 0; 163 164 /* 165 * Debugging macros 166 */ 167 uint_t prom_debug = 0; 168 uint_t map_debug = 0; 169 170 /* 171 * Either hypervisor-specific or grub-specific code builds the initial 172 * memlists. This code does the sort/merge/link for final use. 173 */ 174 static void 175 sort_physinstall(void) 176 { 177 int i; 178 #if !defined(__xpv) 179 int j; 180 struct boot_memlist tmp; 181 182 /* 183 * Now sort the memlists, in case they weren't in order. 184 * Yeah, this is a bubble sort; small, simple and easy to get right. 185 */ 186 DBG_MSG("Sorting phys-installed list\n"); 187 for (j = memlists_used - 1; j > 0; --j) { 188 for (i = 0; i < j; ++i) { 189 if (memlists[i].addr < memlists[i + 1].addr) 190 continue; 191 tmp = memlists[i]; 192 memlists[i] = memlists[i + 1]; 193 memlists[i + 1] = tmp; 194 } 195 } 196 197 /* 198 * Merge any memlists that don't have holes between them. 199 */ 200 for (i = 0; i <= memlists_used - 1; ++i) { 201 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 202 continue; 203 204 if (prom_debug) 205 dboot_printf( 206 "merging mem segs %" PRIx64 "...%" PRIx64 207 " w/ %" PRIx64 "...%" PRIx64 "\n", 208 memlists[i].addr, 209 memlists[i].addr + memlists[i].size, 210 memlists[i + 1].addr, 211 memlists[i + 1].addr + memlists[i + 1].size); 212 213 memlists[i].size += memlists[i + 1].size; 214 for (j = i + 1; j < memlists_used - 1; ++j) 215 memlists[j] = memlists[j + 1]; 216 --memlists_used; 217 DBG(memlists_used); 218 --i; /* after merging we need to reexamine, so do this */ 219 } 220 #endif /* __xpv */ 221 222 if (prom_debug) { 223 dboot_printf("\nFinal memlists:\n"); 224 for (i = 0; i < memlists_used; ++i) { 225 dboot_printf("\t%d: addr=%" PRIx64 " size=%" 226 PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 227 } 228 } 229 230 /* 231 * link together the memlists with native size pointers 232 */ 233 memlists[0].next = 0; 234 memlists[0].prev = 0; 235 for (i = 1; i < memlists_used; ++i) { 236 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 237 memlists[i].next = 0; 238 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 239 } 240 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists; 241 DBG(bi->bi_phys_install); 242 } 243 244 /* 245 * build bios reserved memlists 246 */ 247 static void 248 build_rsvdmemlists(void) 249 { 250 int i; 251 252 rsvdmemlists[0].next = 0; 253 rsvdmemlists[0].prev = 0; 254 for (i = 1; i < rsvdmemlists_used; ++i) { 255 rsvdmemlists[i].prev = 256 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1); 257 rsvdmemlists[i].next = 0; 258 rsvdmemlists[i - 1].next = 259 (native_ptr_t)(uintptr_t)(rsvdmemlists + i); 260 } 261 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists; 262 DBG(bi->bi_rsvdmem); 263 } 264 265 #if defined(__xpv) 266 267 /* 268 * halt on the hypervisor after a delay to drain console output 269 */ 270 void 271 dboot_halt(void) 272 { 273 uint_t i = 10000; 274 275 while (--i) 276 (void) HYPERVISOR_yield(); 277 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 278 } 279 280 /* 281 * From a machine address, find the corresponding pseudo-physical address. 282 * Pseudo-physical address are contiguous and run from mfn_base in each VM. 283 * Machine addresses are the real underlying hardware addresses. 284 * These are needed for page table entries. Note that this routine is 285 * poorly protected. A bad value of "ma" will cause a page fault. 286 */ 287 paddr_t 288 ma_to_pa(maddr_t ma) 289 { 290 ulong_t pgoff = ma & MMU_PAGEOFFSET; 291 ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; 292 paddr_t pa; 293 294 if (pfn >= xen_info->nr_pages) 295 return (-(paddr_t)1); 296 pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; 297 #ifdef DEBUG 298 if (ma != pa_to_ma(pa)) 299 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " 300 "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); 301 #endif 302 return (pa); 303 } 304 305 /* 306 * From a pseudo-physical address, find the corresponding machine address. 307 */ 308 maddr_t 309 pa_to_ma(paddr_t pa) 310 { 311 pfn_t pfn; 312 ulong_t mfn; 313 314 pfn = mmu_btop(pa - mfn_base); 315 if (pa < mfn_base || pfn >= xen_info->nr_pages) 316 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); 317 mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; 318 #ifdef DEBUG 319 if (mfn_to_pfn_mapping[mfn] != pfn) 320 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", 321 pfn, mfn, mfn_to_pfn_mapping[mfn]); 322 #endif 323 return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); 324 } 325 326 #endif /* __xpv */ 327 328 x86pte_t 329 get_pteval(paddr_t table, uint_t index) 330 { 331 if (pae_support) 332 return (((x86pte_t *)(uintptr_t)table)[index]); 333 return (((x86pte32_t *)(uintptr_t)table)[index]); 334 } 335 336 /*ARGSUSED*/ 337 void 338 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 339 { 340 #ifdef __xpv 341 mmu_update_t t; 342 maddr_t mtable = pa_to_ma(table); 343 int retcnt; 344 345 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 346 t.val = pteval; 347 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 348 dboot_panic("HYPERVISOR_mmu_update() failed"); 349 #else /* __xpv */ 350 uintptr_t tab_addr = (uintptr_t)table; 351 352 if (pae_support) 353 ((x86pte_t *)tab_addr)[index] = pteval; 354 else 355 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 356 if (level == top_level && level == 2) 357 reload_cr3(); 358 #endif /* __xpv */ 359 } 360 361 paddr_t 362 make_ptable(x86pte_t *pteval, uint_t level) 363 { 364 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 365 366 if (level == top_level && level == 2) 367 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 368 else 369 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 370 371 #ifdef __xpv 372 /* Remove write permission to the new page table. */ 373 if (HYPERVISOR_update_va_mapping(new_table, 374 *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL)) 375 dboot_panic("HYP_update_va_mapping error"); 376 #endif 377 378 if (map_debug) 379 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 380 PRIx64 "\n", level, (ulong_t)new_table, *pteval); 381 return (new_table); 382 } 383 384 x86pte_t * 385 map_pte(paddr_t table, uint_t index) 386 { 387 return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 388 } 389 390 /* 391 * dump out the contents of page tables... 392 */ 393 static void 394 dump_tables(void) 395 { 396 uint_t save_index[4]; /* for recursion */ 397 char *save_table[4]; /* for recursion */ 398 uint_t l; 399 uint64_t va; 400 uint64_t pgsize; 401 int index; 402 int i; 403 x86pte_t pteval; 404 char *table; 405 static char *tablist = "\t\t\t"; 406 char *tabs = tablist + 3 - top_level; 407 uint_t pa, pa1; 408 #if !defined(__xpv) 409 #define maddr_t paddr_t 410 #endif /* !__xpv */ 411 412 dboot_printf("Finished pagetables:\n"); 413 table = (char *)(uintptr_t)top_page_table; 414 l = top_level; 415 va = 0; 416 for (index = 0; index < ptes_per_table; ++index) { 417 pgsize = 1ull << shift_amt[l]; 418 if (pae_support) 419 pteval = ((x86pte_t *)table)[index]; 420 else 421 pteval = ((x86pte32_t *)table)[index]; 422 if (pteval == 0) 423 goto next_entry; 424 425 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64, 426 tabs + l, (void *)table, index, (uint64_t)pteval, va); 427 pa = ma_to_pa(pteval & MMU_PAGEMASK); 428 dboot_printf(" physaddr=%x\n", pa); 429 430 /* 431 * Don't try to walk hypervisor private pagetables 432 */ 433 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 434 save_table[l] = table; 435 save_index[l] = index; 436 --l; 437 index = -1; 438 table = (char *)(uintptr_t) 439 ma_to_pa(pteval & MMU_PAGEMASK); 440 goto recursion; 441 } 442 443 /* 444 * shorten dump for consecutive mappings 445 */ 446 for (i = 1; index + i < ptes_per_table; ++i) { 447 if (pae_support) 448 pteval = ((x86pte_t *)table)[index + i]; 449 else 450 pteval = ((x86pte32_t *)table)[index + i]; 451 if (pteval == 0) 452 break; 453 pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 454 if (pa1 != pa + i * pgsize) 455 break; 456 } 457 if (i > 2) { 458 dboot_printf("%s...\n", tabs + l); 459 va += pgsize * (i - 2); 460 index += i - 2; 461 } 462 next_entry: 463 va += pgsize; 464 if (l == 3 && index == 256) /* VA hole */ 465 va = 0xffff800000000000ull; 466 recursion: 467 ; 468 } 469 if (l < top_level) { 470 ++l; 471 index = save_index[l]; 472 table = save_table[l]; 473 goto recursion; 474 } 475 } 476 477 /* 478 * Add a mapping for the machine page at the given virtual address. 479 */ 480 static void 481 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level) 482 { 483 x86pte_t *ptep; 484 x86pte_t pteval; 485 486 pteval = ma | pte_bits; 487 if (level > 0) 488 pteval |= PT_PAGESIZE; 489 if (va >= target_kernel_text && pge_support) 490 pteval |= PT_GLOBAL; 491 492 if (map_debug && ma != va) 493 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64 494 " pte=0x%" PRIx64 " l=%d\n", 495 (uint64_t)ma, (uint64_t)va, pteval, level); 496 497 #if defined(__xpv) 498 /* 499 * see if we can avoid find_pte() on the hypervisor 500 */ 501 if (HYPERVISOR_update_va_mapping(va, pteval, 502 UVMF_INVLPG | UVMF_LOCAL) == 0) 503 return; 504 #endif 505 506 /* 507 * Find the pte that will map this address. This creates any 508 * missing intermediate level page tables 509 */ 510 ptep = find_pte(va, NULL, level, 0); 511 512 /* 513 * When paravirtualized, we must use hypervisor calls to modify the 514 * PTE, since paging is active. On real hardware we just write to 515 * the pagetables which aren't in use yet. 516 */ 517 #if defined(__xpv) 518 ptep = ptep; /* shut lint up */ 519 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 520 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64 521 " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "", 522 (uint64_t)va, level, (uint64_t)ma, pteval); 523 #else 524 if (va < 1024 * 1024) 525 pteval |= PT_NOCACHE; /* for video RAM */ 526 if (pae_support) 527 *ptep = pteval; 528 else 529 *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 530 #endif 531 } 532 533 /* 534 * Add a mapping for the physical page at the given virtual address. 535 */ 536 static void 537 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 538 { 539 map_ma_at_va(pa_to_ma(pa), va, level); 540 } 541 542 /* 543 * This is called to remove start..end from the 544 * possible range of PCI addresses. 545 */ 546 const uint64_t pci_lo_limit = 0x00100000ul; 547 const uint64_t pci_hi_limit = 0xfff00000ul; 548 static void 549 exclude_from_pci(uint64_t start, uint64_t end) 550 { 551 int i; 552 int j; 553 struct boot_memlist *ml; 554 555 for (i = 0; i < pcimemlists_used; ++i) { 556 ml = &pcimemlists[i]; 557 558 /* delete the entire range? */ 559 if (start <= ml->addr && ml->addr + ml->size <= end) { 560 --pcimemlists_used; 561 for (j = i; j < pcimemlists_used; ++j) 562 pcimemlists[j] = pcimemlists[j + 1]; 563 --i; /* to revisit the new one at this index */ 564 } 565 566 /* split a range? */ 567 else if (ml->addr < start && end < ml->addr + ml->size) { 568 569 ++pcimemlists_used; 570 if (pcimemlists_used > MAX_MEMLIST) 571 dboot_panic("too many pcimemlists"); 572 573 for (j = pcimemlists_used - 1; j > i; --j) 574 pcimemlists[j] = pcimemlists[j - 1]; 575 ml->size = start - ml->addr; 576 577 ++ml; 578 ml->size = (ml->addr + ml->size) - end; 579 ml->addr = end; 580 ++i; /* skip on to next one */ 581 } 582 583 /* cut memory off the start? */ 584 else if (ml->addr < end && end < ml->addr + ml->size) { 585 ml->size -= end - ml->addr; 586 ml->addr = end; 587 } 588 589 /* cut memory off the end? */ 590 else if (ml->addr <= start && start < ml->addr + ml->size) { 591 ml->size = start - ml->addr; 592 } 593 } 594 } 595 596 /* 597 * Xen strips the size field out of the mb_memory_map_t, see struct e820entry 598 * definition in Xen source. 599 */ 600 #ifdef __xpv 601 typedef struct { 602 uint32_t base_addr_low; 603 uint32_t base_addr_high; 604 uint32_t length_low; 605 uint32_t length_high; 606 uint32_t type; 607 } mmap_t; 608 #else 609 typedef mb_memory_map_t mmap_t; 610 #endif 611 612 static void 613 build_pcimemlists(mmap_t *mem, int num) 614 { 615 mmap_t *mmap; 616 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 617 uint64_t start; 618 uint64_t end; 619 int i; 620 621 /* 622 * initialize 623 */ 624 pcimemlists[0].addr = pci_lo_limit; 625 pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 626 pcimemlists_used = 1; 627 628 /* 629 * Fill in PCI memlists. 630 */ 631 for (mmap = mem, i = 0; i < num; ++i, ++mmap) { 632 start = ((uint64_t)mmap->base_addr_high << 32) + 633 mmap->base_addr_low; 634 end = start + ((uint64_t)mmap->length_high << 32) + 635 mmap->length_low; 636 637 if (prom_debug) 638 dboot_printf("\ttype: %d %" PRIx64 "..%" 639 PRIx64 "\n", mmap->type, start, end); 640 641 /* 642 * page align start and end 643 */ 644 start = (start + page_offset) & ~page_offset; 645 end &= ~page_offset; 646 if (end <= start) 647 continue; 648 649 exclude_from_pci(start, end); 650 } 651 652 /* 653 * Finish off the pcimemlist 654 */ 655 if (prom_debug) { 656 for (i = 0; i < pcimemlists_used; ++i) { 657 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 658 PRIx64 "\n", pcimemlists[i].addr, 659 pcimemlists[i].addr + pcimemlists[i].size); 660 } 661 } 662 pcimemlists[0].next = 0; 663 pcimemlists[0].prev = 0; 664 for (i = 1; i < pcimemlists_used; ++i) { 665 pcimemlists[i].prev = 666 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 667 pcimemlists[i].next = 0; 668 pcimemlists[i - 1].next = 669 (native_ptr_t)(uintptr_t)(pcimemlists + i); 670 } 671 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 672 DBG(bi->bi_pcimem); 673 } 674 675 #if defined(__xpv) 676 /* 677 * Initialize memory allocator stuff from hypervisor-supplied start info. 678 * 679 * There is 512KB of scratch area after the boot stack page. 680 * We'll use that for everything except the kernel nucleus pages which are too 681 * big to fit there and are allocated last anyway. 682 */ 683 #define MAXMAPS 100 684 static mmap_t map_buffer[MAXMAPS]; 685 static void 686 init_mem_alloc(void) 687 { 688 int local; /* variables needed to find start region */ 689 paddr_t scratch_start; 690 xen_memory_map_t map; 691 692 DBG_MSG("Entered init_mem_alloc()\n"); 693 694 /* 695 * Free memory follows the stack. There's at least 512KB of scratch 696 * space, rounded up to at least 2Mb alignment. That should be enough 697 * for the page tables we'll need to build. The nucleus memory is 698 * allocated last and will be outside the addressible range. We'll 699 * switch to new page tables before we unpack the kernel 700 */ 701 scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE); 702 DBG(scratch_start); 703 scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG); 704 DBG(scratch_end); 705 706 /* 707 * For paranoia, leave some space between hypervisor data and ours. 708 * Use 500 instead of 512. 709 */ 710 next_avail_addr = scratch_end - 500 * 1024; 711 DBG(next_avail_addr); 712 713 /* 714 * The domain builder gives us at most 1 module 715 */ 716 DBG(xen_info->mod_len); 717 if (xen_info->mod_len > 0) { 718 DBG(xen_info->mod_start); 719 modules[0].bm_addr = xen_info->mod_start; 720 modules[0].bm_size = xen_info->mod_len; 721 bi->bi_module_cnt = 1; 722 bi->bi_modules = (native_ptr_t)modules; 723 } else { 724 bi->bi_module_cnt = 0; 725 bi->bi_modules = NULL; 726 } 727 DBG(bi->bi_module_cnt); 728 DBG(bi->bi_modules); 729 730 DBG(xen_info->mfn_list); 731 DBG(xen_info->nr_pages); 732 max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT; 733 DBG(max_mem); 734 735 /* 736 * Using pseudo-physical addresses, so only 1 memlist element 737 */ 738 memlists[0].addr = 0; 739 DBG(memlists[0].addr); 740 memlists[0].size = max_mem; 741 DBG(memlists[0].size); 742 memlists_used = 1; 743 DBG(memlists_used); 744 745 /* 746 * finish building physinstall list 747 */ 748 sort_physinstall(); 749 750 /* 751 * build bios reserved memlists 752 */ 753 build_rsvdmemlists(); 754 755 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 756 /* 757 * build PCI Memory list 758 */ 759 map.nr_entries = MAXMAPS; 760 /*LINTED: constant in conditional context*/ 761 set_xen_guest_handle(map.buffer, map_buffer); 762 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0) 763 dboot_panic("getting XENMEM_machine_memory_map failed"); 764 build_pcimemlists(map_buffer, map.nr_entries); 765 } 766 } 767 768 #else /* !__xpv */ 769 770 /* 771 * During memory allocation, find the highest address not used yet. 772 */ 773 static void 774 check_higher(paddr_t a) 775 { 776 if (a < next_avail_addr) 777 return; 778 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 779 DBG(next_avail_addr); 780 } 781 782 /* 783 * Walk through the module information finding the last used address. 784 * The first available address will become the top level page table. 785 * 786 * We then build the phys_install memlist from the multiboot information. 787 */ 788 static void 789 init_mem_alloc(void) 790 { 791 mb_memory_map_t *mmap; 792 mb_module_t *mod; 793 uint64_t start; 794 uint64_t end; 795 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 796 extern char _end[]; 797 int i; 798 799 DBG_MSG("Entered init_mem_alloc()\n"); 800 DBG((uintptr_t)mb_info); 801 802 if (mb_info->mods_count > MAX_MODULES) { 803 dboot_panic("Too many modules (%d) -- the maximum is %d.", 804 mb_info->mods_count, MAX_MODULES); 805 } 806 /* 807 * search the modules to find the last used address 808 * we'll build the module list while we're walking through here 809 */ 810 DBG_MSG("\nFinding Modules\n"); 811 check_higher((paddr_t)(uintptr_t)&_end); 812 for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0; 813 i < mb_info->mods_count; 814 ++mod, ++i) { 815 if (prom_debug) { 816 dboot_printf("\tmodule #%d: %s at: 0x%lx, len 0x%lx\n", 817 i, (char *)(mod->mod_name), 818 (ulong_t)mod->mod_start, (ulong_t)mod->mod_end); 819 } 820 modules[i].bm_addr = mod->mod_start; 821 if (mod->mod_start > mod->mod_end) { 822 dboot_panic("module[%d]: Invalid module start address " 823 "(0x%llx)", i, (uint64_t)mod->mod_start); 824 } 825 modules[i].bm_size = mod->mod_end - mod->mod_start; 826 827 check_higher(mod->mod_end); 828 } 829 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 830 DBG(bi->bi_modules); 831 bi->bi_module_cnt = mb_info->mods_count; 832 DBG(bi->bi_module_cnt); 833 834 /* 835 * Walk through the memory map from multiboot and build our memlist 836 * structures. Note these will have native format pointers. 837 */ 838 DBG_MSG("\nFinding Memory Map\n"); 839 DBG(mb_info->flags); 840 max_mem = 0; 841 if (mb_info->flags & 0x40) { 842 int cnt = 0; 843 844 DBG(mb_info->mmap_addr); 845 DBG(mb_info->mmap_length); 846 check_higher(mb_info->mmap_addr + mb_info->mmap_length); 847 848 for (mmap = (mb_memory_map_t *)mb_info->mmap_addr; 849 (uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length; 850 mmap = (mb_memory_map_t *)((uint32_t)mmap + mmap->size 851 + sizeof (mmap->size))) { 852 ++cnt; 853 start = ((uint64_t)mmap->base_addr_high << 32) + 854 mmap->base_addr_low; 855 end = start + ((uint64_t)mmap->length_high << 32) + 856 mmap->length_low; 857 858 if (prom_debug) 859 dboot_printf("\ttype: %d %" PRIx64 "..%" 860 PRIx64 "\n", mmap->type, start, end); 861 862 /* 863 * page align start and end 864 */ 865 start = (start + page_offset) & ~page_offset; 866 end &= ~page_offset; 867 if (end <= start) 868 continue; 869 870 /* 871 * only type 1 is usable RAM 872 */ 873 switch (mmap->type) { 874 case 1: 875 if (end > max_mem) 876 max_mem = end; 877 memlists[memlists_used].addr = start; 878 memlists[memlists_used].size = end - start; 879 ++memlists_used; 880 if (memlists_used > MAX_MEMLIST) 881 dboot_panic("too many memlists"); 882 break; 883 case 2: 884 rsvdmemlists[rsvdmemlists_used].addr = start; 885 rsvdmemlists[rsvdmemlists_used].size = 886 end - start; 887 ++rsvdmemlists_used; 888 if (rsvdmemlists_used > MAX_MEMLIST) 889 dboot_panic("too many rsvdmemlists"); 890 break; 891 default: 892 continue; 893 } 894 } 895 build_pcimemlists((mb_memory_map_t *)mb_info->mmap_addr, cnt); 896 } else if (mb_info->flags & 0x01) { 897 DBG(mb_info->mem_lower); 898 memlists[memlists_used].addr = 0; 899 memlists[memlists_used].size = mb_info->mem_lower * 1024; 900 ++memlists_used; 901 DBG(mb_info->mem_upper); 902 memlists[memlists_used].addr = 1024 * 1024; 903 memlists[memlists_used].size = mb_info->mem_upper * 1024; 904 ++memlists_used; 905 906 /* 907 * Old platform - assume I/O space at the end of memory. 908 */ 909 pcimemlists[0].addr = 910 (mb_info->mem_upper * 1024) + (1024 * 1024); 911 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr; 912 pcimemlists[0].next = 0; 913 pcimemlists[0].prev = 0; 914 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 915 DBG(bi->bi_pcimem); 916 } else { 917 dboot_panic("No memory info from boot loader!!!"); 918 } 919 920 check_higher(bi->bi_cmdline); 921 922 /* 923 * finish processing the physinstall list 924 */ 925 sort_physinstall(); 926 927 /* 928 * build bios reserved mem lists 929 */ 930 build_rsvdmemlists(); 931 } 932 #endif /* !__xpv */ 933 934 /* 935 * Simple memory allocator, allocates aligned physical memory. 936 * Note that startup_kernel() only allocates memory, never frees. 937 * Memory usage just grows in an upward direction. 938 */ 939 static void * 940 do_mem_alloc(uint32_t size, uint32_t align) 941 { 942 uint_t i; 943 uint64_t best; 944 uint64_t start; 945 uint64_t end; 946 947 /* 948 * make sure size is a multiple of pagesize 949 */ 950 size = RNDUP(size, MMU_PAGESIZE); 951 next_avail_addr = RNDUP(next_avail_addr, align); 952 953 /* 954 * XXPV fixme joe 955 * 956 * a really large bootarchive that causes you to run out of memory 957 * may cause this to blow up 958 */ 959 /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 960 best = (uint64_t)-size; 961 for (i = 0; i < memlists_used; ++i) { 962 start = memlists[i].addr; 963 #if defined(__xpv) 964 start += mfn_base; 965 #endif 966 end = start + memlists[i].size; 967 968 /* 969 * did we find the desired address? 970 */ 971 if (start <= next_avail_addr && next_avail_addr + size <= end) { 972 best = next_avail_addr; 973 goto done; 974 } 975 976 /* 977 * if not is this address the best so far? 978 */ 979 if (start > next_avail_addr && start < best && 980 RNDUP(start, align) + size <= end) 981 best = RNDUP(start, align); 982 } 983 984 /* 985 * We didn't find exactly the address we wanted, due to going off the 986 * end of a memory region. Return the best found memory address. 987 */ 988 done: 989 next_avail_addr = best + size; 990 #if defined(__xpv) 991 if (next_avail_addr > scratch_end) 992 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: " 993 "0x%lx", (ulong_t)next_avail_addr, 994 (ulong_t)scratch_end); 995 #endif 996 (void) memset((void *)(uintptr_t)best, 0, size); 997 return ((void *)(uintptr_t)best); 998 } 999 1000 void * 1001 mem_alloc(uint32_t size) 1002 { 1003 return (do_mem_alloc(size, MMU_PAGESIZE)); 1004 } 1005 1006 1007 /* 1008 * Build page tables to map all of memory used so far as well as the kernel. 1009 */ 1010 static void 1011 build_page_tables(void) 1012 { 1013 uint32_t psize; 1014 uint32_t level; 1015 uint32_t off; 1016 uint64_t start; 1017 #if !defined(__xpv) 1018 uint32_t i; 1019 uint64_t end; 1020 #endif /* __xpv */ 1021 1022 /* 1023 * If we're on metal, we need to create the top level pagetable. 1024 */ 1025 #if defined(__xpv) 1026 top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base; 1027 #else /* __xpv */ 1028 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 1029 #endif /* __xpv */ 1030 DBG((uintptr_t)top_page_table); 1031 1032 /* 1033 * Determine if we'll use large mappings for kernel, then map it. 1034 */ 1035 if (largepage_support) { 1036 psize = lpagesize; 1037 level = 1; 1038 } else { 1039 psize = MMU_PAGESIZE; 1040 level = 0; 1041 } 1042 1043 DBG_MSG("Mapping kernel\n"); 1044 DBG(ktext_phys); 1045 DBG(target_kernel_text); 1046 DBG(ksize); 1047 DBG(psize); 1048 for (off = 0; off < ksize; off += psize) 1049 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 1050 1051 /* 1052 * The kernel will need a 1 page window to work with page tables 1053 */ 1054 bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE); 1055 DBG(bi->bi_pt_window); 1056 bi->bi_pte_to_pt_window = 1057 (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 1058 DBG(bi->bi_pte_to_pt_window); 1059 1060 #if defined(__xpv) 1061 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 1062 /* If this is a domU we're done. */ 1063 DBG_MSG("\nPage tables constructed\n"); 1064 return; 1065 } 1066 #endif /* __xpv */ 1067 1068 /* 1069 * We need 1:1 mappings for the lower 1M of memory to access 1070 * BIOS tables used by a couple of drivers during boot. 1071 * 1072 * The following code works because our simple memory allocator 1073 * only grows usage in an upwards direction. 1074 * 1075 * Note that by this point in boot some mappings for low memory 1076 * may already exist because we've already accessed device in low 1077 * memory. (Specifically the video frame buffer and keyboard 1078 * status ports.) If we're booting on raw hardware then GRUB 1079 * created these mappings for us. If we're booting under a 1080 * hypervisor then we went ahead and remapped these devices into 1081 * memory allocated within dboot itself. 1082 */ 1083 if (map_debug) 1084 dboot_printf("1:1 map pa=0..1Meg\n"); 1085 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) { 1086 #if defined(__xpv) 1087 map_ma_at_va(start, start, 0); 1088 #else /* __xpv */ 1089 map_pa_at_va(start, start, 0); 1090 #endif /* __xpv */ 1091 } 1092 1093 #if !defined(__xpv) 1094 for (i = 0; i < memlists_used; ++i) { 1095 start = memlists[i].addr; 1096 1097 end = start + memlists[i].size; 1098 1099 if (map_debug) 1100 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 1101 start, end); 1102 while (start < end && start < next_avail_addr) { 1103 map_pa_at_va(start, start, 0); 1104 start += MMU_PAGESIZE; 1105 } 1106 } 1107 #endif /* !__xpv */ 1108 1109 DBG_MSG("\nPage tables constructed\n"); 1110 } 1111 1112 #define NO_MULTIBOOT \ 1113 "multiboot is no longer used to boot the Solaris Operating System.\n\ 1114 The grub entry should be changed to:\n\ 1115 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 1116 module$ /platform/i86pc/$ISADIR/boot_archive\n\ 1117 See http://illumos.org/msg/SUNOS-8000-AK for details.\n" 1118 1119 /* 1120 * startup_kernel has a pretty simple job. It builds pagetables which reflect 1121 * 1:1 mappings for all memory in use. It then also adds mappings for 1122 * the kernel nucleus at virtual address of target_kernel_text using large page 1123 * mappings. The page table pages are also accessible at 1:1 mapped 1124 * virtual addresses. 1125 */ 1126 /*ARGSUSED*/ 1127 void 1128 startup_kernel(void) 1129 { 1130 char *cmdline; 1131 uintptr_t addr; 1132 #if defined(__xpv) 1133 physdev_set_iopl_t set_iopl; 1134 #endif /* __xpv */ 1135 1136 /* 1137 * At this point we are executing in a 32 bit real mode. 1138 */ 1139 #if defined(__xpv) 1140 cmdline = (char *)xen_info->cmd_line; 1141 #else /* __xpv */ 1142 cmdline = (char *)mb_info->cmdline; 1143 #endif /* __xpv */ 1144 1145 prom_debug = (strstr(cmdline, "prom_debug") != NULL); 1146 map_debug = (strstr(cmdline, "map_debug") != NULL); 1147 1148 #if defined(__xpv) 1149 /* 1150 * For dom0, before we initialize the console subsystem we'll 1151 * need to enable io operations, so set I/O priveldge level to 1. 1152 */ 1153 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1154 set_iopl.iopl = 1; 1155 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1156 } 1157 #endif /* __xpv */ 1158 1159 bcons_init(cmdline); 1160 DBG_MSG("\n\nSolaris prekernel set: "); 1161 DBG_MSG(cmdline); 1162 DBG_MSG("\n"); 1163 1164 if (strstr(cmdline, "multiboot") != NULL) { 1165 dboot_panic(NO_MULTIBOOT); 1166 } 1167 1168 /* 1169 * boot info must be 16 byte aligned for 64 bit kernel ABI 1170 */ 1171 addr = (uintptr_t)boot_info; 1172 addr = (addr + 0xf) & ~0xf; 1173 bi = (struct xboot_info *)addr; 1174 DBG((uintptr_t)bi); 1175 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 1176 1177 /* 1178 * Need correct target_kernel_text value 1179 */ 1180 #if defined(_BOOT_TARGET_amd64) 1181 target_kernel_text = KERNEL_TEXT_amd64; 1182 #elif defined(__xpv) 1183 target_kernel_text = KERNEL_TEXT_i386_xpv; 1184 #else 1185 target_kernel_text = KERNEL_TEXT_i386; 1186 #endif 1187 DBG(target_kernel_text); 1188 1189 #if defined(__xpv) 1190 1191 /* 1192 * XXPV Derive this stuff from CPUID / what the hypervisor has enabled 1193 */ 1194 1195 #if defined(_BOOT_TARGET_amd64) 1196 /* 1197 * 64-bit hypervisor. 1198 */ 1199 amd64_support = 1; 1200 pae_support = 1; 1201 1202 #else /* _BOOT_TARGET_amd64 */ 1203 1204 /* 1205 * See if we are running on a PAE Hypervisor 1206 */ 1207 { 1208 xen_capabilities_info_t caps; 1209 1210 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0) 1211 dboot_panic("HYPERVISOR_xen_version(caps) failed"); 1212 caps[sizeof (caps) - 1] = 0; 1213 if (prom_debug) 1214 dboot_printf("xen capabilities %s\n", caps); 1215 if (strstr(caps, "x86_32p") != NULL) 1216 pae_support = 1; 1217 } 1218 1219 #endif /* _BOOT_TARGET_amd64 */ 1220 { 1221 xen_platform_parameters_t p; 1222 1223 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0) 1224 dboot_panic("HYPERVISOR_xen_version(parms) failed"); 1225 DBG(p.virt_start); 1226 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 1227 } 1228 1229 /* 1230 * The hypervisor loads stuff starting at 1Gig 1231 */ 1232 mfn_base = ONE_GIG; 1233 DBG(mfn_base); 1234 1235 /* 1236 * enable writable page table mode for the hypervisor 1237 */ 1238 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 1239 VMASST_TYPE_writable_pagetables) < 0) 1240 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed"); 1241 1242 /* 1243 * check for NX support 1244 */ 1245 if (pae_support) { 1246 uint32_t eax = 0x80000000; 1247 uint32_t edx = get_cpuid_edx(&eax); 1248 1249 if (eax >= 0x80000001) { 1250 eax = 0x80000001; 1251 edx = get_cpuid_edx(&eax); 1252 if (edx & CPUID_AMD_EDX_NX) 1253 NX_support = 1; 1254 } 1255 } 1256 1257 #if !defined(_BOOT_TARGET_amd64) 1258 1259 /* 1260 * The 32-bit hypervisor uses segmentation to protect itself from 1261 * guests. This means when a guest attempts to install a flat 4GB 1262 * code or data descriptor the 32-bit hypervisor will protect itself 1263 * by silently shrinking the segment such that if the guest attempts 1264 * any access where the hypervisor lives a #gp fault is generated. 1265 * The problem is that some applications expect a full 4GB flat 1266 * segment for their current thread pointer and will use negative 1267 * offset segment wrap around to access data. TLS support in linux 1268 * brand is one example of this. 1269 * 1270 * The 32-bit hypervisor can catch the #gp fault in these cases 1271 * and emulate the access without passing the #gp fault to the guest 1272 * but only if VMASST_TYPE_4gb_segments is explicitly turned on. 1273 * Seems like this should have been the default. 1274 * Either way, we want the hypervisor -- and not Solaris -- to deal 1275 * to deal with emulating these accesses. 1276 */ 1277 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 1278 VMASST_TYPE_4gb_segments) < 0) 1279 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed"); 1280 #endif /* !_BOOT_TARGET_amd64 */ 1281 1282 #else /* __xpv */ 1283 1284 /* 1285 * use cpuid to enable MMU features 1286 */ 1287 if (have_cpuid()) { 1288 uint32_t eax, edx; 1289 1290 eax = 1; 1291 edx = get_cpuid_edx(&eax); 1292 if (edx & CPUID_INTC_EDX_PSE) 1293 largepage_support = 1; 1294 if (edx & CPUID_INTC_EDX_PGE) 1295 pge_support = 1; 1296 if (edx & CPUID_INTC_EDX_PAE) 1297 pae_support = 1; 1298 1299 eax = 0x80000000; 1300 edx = get_cpuid_edx(&eax); 1301 if (eax >= 0x80000001) { 1302 eax = 0x80000001; 1303 edx = get_cpuid_edx(&eax); 1304 if (edx & CPUID_AMD_EDX_LM) 1305 amd64_support = 1; 1306 if (edx & CPUID_AMD_EDX_NX) 1307 NX_support = 1; 1308 } 1309 } else { 1310 dboot_printf("cpuid not supported\n"); 1311 } 1312 #endif /* __xpv */ 1313 1314 1315 #if defined(_BOOT_TARGET_amd64) 1316 if (amd64_support == 0) 1317 dboot_panic("long mode not supported, rebooting"); 1318 else if (pae_support == 0) 1319 dboot_panic("long mode, but no PAE; rebooting"); 1320 #else 1321 /* 1322 * Allow the command line to over-ride use of PAE for 32 bit. 1323 */ 1324 if (strstr(cmdline, "disablePAE=true") != NULL) { 1325 pae_support = 0; 1326 NX_support = 0; 1327 amd64_support = 0; 1328 } 1329 #endif 1330 1331 /* 1332 * initialize the simple memory allocator 1333 */ 1334 init_mem_alloc(); 1335 1336 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64) 1337 /* 1338 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory 1339 */ 1340 if (max_mem < FOUR_GIG && NX_support == 0) 1341 pae_support = 0; 1342 #endif 1343 1344 /* 1345 * configure mmu information 1346 */ 1347 if (pae_support) { 1348 shift_amt = shift_amt_pae; 1349 ptes_per_table = 512; 1350 pte_size = 8; 1351 lpagesize = TWO_MEG; 1352 #if defined(_BOOT_TARGET_amd64) 1353 top_level = 3; 1354 #else 1355 top_level = 2; 1356 #endif 1357 } else { 1358 pae_support = 0; 1359 NX_support = 0; 1360 shift_amt = shift_amt_nopae; 1361 ptes_per_table = 1024; 1362 pte_size = 4; 1363 lpagesize = FOUR_MEG; 1364 top_level = 1; 1365 } 1366 1367 DBG(pge_support); 1368 DBG(NX_support); 1369 DBG(largepage_support); 1370 DBG(amd64_support); 1371 DBG(top_level); 1372 DBG(pte_size); 1373 DBG(ptes_per_table); 1374 DBG(lpagesize); 1375 1376 #if defined(__xpv) 1377 ktext_phys = ONE_GIG; /* from UNIX Mapfile */ 1378 #else 1379 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 1380 #endif 1381 1382 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64) 1383 /* 1384 * For grub, copy kernel bits from the ELF64 file to final place. 1385 */ 1386 DBG_MSG("\nAllocating nucleus pages.\n"); 1387 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 1388 if (ktext_phys == 0) 1389 dboot_panic("failed to allocate aligned kernel memory"); 1390 if (dboot_elfload64(mb_header.load_addr) != 0) 1391 dboot_panic("failed to parse kernel ELF image, rebooting"); 1392 #endif 1393 1394 DBG(ktext_phys); 1395 1396 /* 1397 * Allocate page tables. 1398 */ 1399 build_page_tables(); 1400 1401 /* 1402 * return to assembly code to switch to running kernel 1403 */ 1404 entry_addr_low = (uint32_t)target_kernel_text; 1405 DBG(entry_addr_low); 1406 bi->bi_use_largepage = largepage_support; 1407 bi->bi_use_pae = pae_support; 1408 bi->bi_use_pge = pge_support; 1409 bi->bi_use_nx = NX_support; 1410 1411 #if defined(__xpv) 1412 1413 bi->bi_next_paddr = next_avail_addr - mfn_base; 1414 DBG(bi->bi_next_paddr); 1415 bi->bi_next_vaddr = (native_ptr_t)next_avail_addr; 1416 DBG(bi->bi_next_vaddr); 1417 1418 /* 1419 * unmap unused pages in start area to make them available for DMA 1420 */ 1421 while (next_avail_addr < scratch_end) { 1422 (void) HYPERVISOR_update_va_mapping(next_avail_addr, 1423 0, UVMF_INVLPG | UVMF_LOCAL); 1424 next_avail_addr += MMU_PAGESIZE; 1425 } 1426 1427 bi->bi_xen_start_info = (uintptr_t)xen_info; 1428 DBG((uintptr_t)HYPERVISOR_shared_info); 1429 bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info; 1430 bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base; 1431 1432 #else /* __xpv */ 1433 1434 bi->bi_next_paddr = next_avail_addr; 1435 DBG(bi->bi_next_paddr); 1436 bi->bi_next_vaddr = (uintptr_t)next_avail_addr; 1437 DBG(bi->bi_next_vaddr); 1438 bi->bi_mb_info = (uintptr_t)mb_info; 1439 bi->bi_top_page_table = (uintptr_t)top_page_table; 1440 1441 #endif /* __xpv */ 1442 1443 bi->bi_kseg_size = FOUR_MEG; 1444 DBG(bi->bi_kseg_size); 1445 1446 #ifndef __xpv 1447 if (map_debug) 1448 dump_tables(); 1449 #endif 1450 1451 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 1452 }