Print this page
12195 acpidump failed under EFI
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/dboot/dboot_startkern.c
+++ new/usr/src/uts/i86pc/dboot/dboot_startkern.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 *
26 - * Copyright 2013 Joyent, Inc. All rights reserved.
26 + * Copyright 2020 Joyent, Inc.
27 27 */
28 28
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/machparam.h>
32 32 #include <sys/x86_archext.h>
33 33 #include <sys/systm.h>
34 34 #include <sys/mach_mmu.h>
35 35 #include <sys/multiboot.h>
36 36 #include <sys/multiboot2.h>
37 37 #include <sys/multiboot2_impl.h>
38 38 #include <sys/sysmacros.h>
39 39 #include <sys/framebuffer.h>
40 40 #include <sys/sha1.h>
41 41 #include <util/string.h>
42 42 #include <util/strtolctype.h>
43 43 #include <sys/efi.h>
44 44
45 45 /*
46 46 * Compile time debug knob. We do not have any early mechanism to control it
47 47 * as the boot is the earliest mechanism we have, and we do not want to have
48 48 * it being switched on by default.
49 49 */
50 50 int dboot_debug = 0;
51 51
52 52 #if defined(__xpv)
53 53
54 54 #include <sys/hypervisor.h>
55 55 uintptr_t xen_virt_start;
56 56 pfn_t *mfn_to_pfn_mapping;
57 57
58 58 #else /* !__xpv */
59 59
60 60 extern multiboot_header_t mb_header;
61 61 extern uint32_t mb2_load_addr;
62 62 extern int have_cpuid(void);
63 63
64 64 #endif /* !__xpv */
65 65
66 66 #include <sys/inttypes.h>
67 67 #include <sys/bootinfo.h>
68 68 #include <sys/mach_mmu.h>
69 69 #include <sys/boot_console.h>
70 70
71 71 #include "dboot_asm.h"
72 72 #include "dboot_printf.h"
73 73 #include "dboot_xboot.h"
74 74 #include "dboot_elfload.h"
75 75
76 76 #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2)
77 77
78 78 /*
79 79 * This file contains code that runs to transition us from either a multiboot
80 80 * compliant loader (32 bit non-paging) or a XPV domain loader to
81 81 * regular kernel execution. Its task is to setup the kernel memory image
82 82 * and page tables.
83 83 *
84 84 * The code executes as:
85 85 * - 32 bits under GRUB (for 32 or 64 bit Solaris)
86 86 * - a 32 bit program for the 32-bit PV hypervisor
87 87 * - a 64 bit program for the 64-bit PV hypervisor (at least for now)
88 88 *
89 89 * Under the PV hypervisor, we must create mappings for any memory beyond the
90 90 * initial start of day allocation (such as the kernel itself).
91 91 *
92 92 * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
93 93 * Since we are running in real mode, so all such memory is accessible.
94 94 */
95 95
96 96 /*
97 97 * Standard bits used in PTE (page level) and PTP (internal levels)
98 98 */
99 99 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
100 100 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
101 101
102 102 /*
103 103 * This is the target addresses (physical) where the kernel text and data
104 104 * nucleus pages will be unpacked. On the hypervisor this is actually a
105 105 * virtual address.
106 106 */
107 107 paddr_t ktext_phys;
108 108 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */
109 109
110 110 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */
111 111
112 112 /*
113 113 * The stack is setup in assembler before entering startup_kernel()
114 114 */
115 115 char stack_space[STACK_SIZE];
116 116
117 117 /*
118 118 * Used to track physical memory allocation
119 119 */
120 120 static paddr_t next_avail_addr = 0;
121 121
122 122 #if defined(__xpv)
123 123 /*
124 124 * Additional information needed for hypervisor memory allocation.
125 125 * Only memory up to scratch_end is mapped by page tables.
126 126 * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
127 127 * to derive a pfn from a pointer, you subtract mfn_base.
128 128 */
129 129
130 130 static paddr_t scratch_end = 0; /* we can't write all of mem here */
131 131 static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */
132 132 start_info_t *xen_info;
133 133
134 134 #else /* __xpv */
135 135
136 136 /*
137 137 * If on the metal, then we have a multiboot loader.
138 138 */
139 139 uint32_t mb_magic; /* magic from boot loader */
140 140 uint32_t mb_addr; /* multiboot info package from loader */
141 141 int multiboot_version;
142 142 multiboot_info_t *mb_info;
143 143 multiboot2_info_header_t *mb2_info;
144 144 multiboot_tag_mmap_t *mb2_mmap_tagp;
145 145 int num_entries; /* mmap entry count */
146 146 boolean_t num_entries_set; /* is mmap entry count set */
147 147 uintptr_t load_addr;
148 148 static boot_framebuffer_t framebuffer __aligned(16);
149 149 static boot_framebuffer_t *fb;
150 150
151 151 /* can not be automatic variables because of alignment */
152 152 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID;
153 153 static efi_guid_t smbios = SMBIOS_TABLE_GUID;
154 154 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID;
155 155 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID;
156 156 #endif /* __xpv */
157 157
158 158 /*
159 159 * This contains information passed to the kernel
160 160 */
161 161 struct xboot_info boot_info __aligned(16);
162 162 struct xboot_info *bi;
163 163
164 164 /*
165 165 * Page table and memory stuff.
166 166 */
167 167 static paddr_t max_mem; /* maximum memory address */
168 168
169 169 /*
170 170 * Information about processor MMU
171 171 */
172 172 int amd64_support = 0;
173 173 int largepage_support = 0;
174 174 int pae_support = 0;
175 175 int pge_support = 0;
176 176 int NX_support = 0;
177 177 int PAT_support = 0;
178 178
179 179 /*
180 180 * Low 32 bits of kernel entry address passed back to assembler.
181 181 * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
182 182 */
183 183 uint32_t entry_addr_low;
184 184
185 185 /*
186 186 * Memlists for the kernel. We shouldn't need a lot of these.
187 187 */
188 188 #define MAX_MEMLIST (50)
189 189 struct boot_memlist memlists[MAX_MEMLIST];
190 190 uint_t memlists_used = 0;
191 191 struct boot_memlist pcimemlists[MAX_MEMLIST];
192 192 uint_t pcimemlists_used = 0;
193 193 struct boot_memlist rsvdmemlists[MAX_MEMLIST];
194 194 uint_t rsvdmemlists_used = 0;
195 195
196 196 /*
197 197 * This should match what's in the bootloader. It's arbitrary, but GRUB
198 198 * in particular has limitations on how much space it can use before it
199 199 * stops working properly. This should be enough.
200 200 */
201 201 struct boot_modules modules[MAX_BOOT_MODULES];
202 202 uint_t modules_used = 0;
203 203
204 204 #ifdef __xpv
205 205 /*
206 206 * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
207 207 * definition in Xen source.
208 208 */
209 209 typedef struct {
210 210 uint32_t base_addr_low;
211 211 uint32_t base_addr_high;
212 212 uint32_t length_low;
213 213 uint32_t length_high;
214 214 uint32_t type;
215 215 } mmap_t;
216 216
217 217 /*
218 218 * There is 512KB of scratch area after the boot stack page.
219 219 * We'll use that for everything except the kernel nucleus pages which are too
220 220 * big to fit there and are allocated last anyway.
221 221 */
222 222 #define MAXMAPS 100
223 223 static mmap_t map_buffer[MAXMAPS];
224 224 #else
225 225 typedef mb_memory_map_t mmap_t;
226 226 #endif
227 227
228 228 /*
229 229 * Debugging macros
230 230 */
231 231 uint_t prom_debug = 0;
232 232 uint_t map_debug = 0;
233 233
234 234 static char noname[2] = "-";
235 235
236 236 /*
237 237 * Either hypervisor-specific or grub-specific code builds the initial
238 238 * memlists. This code does the sort/merge/link for final use.
239 239 */
240 240 static void
241 241 sort_physinstall(void)
242 242 {
243 243 int i;
244 244 #if !defined(__xpv)
245 245 int j;
246 246 struct boot_memlist tmp;
247 247
248 248 /*
249 249 * Now sort the memlists, in case they weren't in order.
250 250 * Yeah, this is a bubble sort; small, simple and easy to get right.
251 251 */
252 252 DBG_MSG("Sorting phys-installed list\n");
253 253 for (j = memlists_used - 1; j > 0; --j) {
254 254 for (i = 0; i < j; ++i) {
255 255 if (memlists[i].addr < memlists[i + 1].addr)
256 256 continue;
257 257 tmp = memlists[i];
258 258 memlists[i] = memlists[i + 1];
259 259 memlists[i + 1] = tmp;
260 260 }
261 261 }
262 262
263 263 /*
264 264 * Merge any memlists that don't have holes between them.
265 265 */
266 266 for (i = 0; i <= memlists_used - 1; ++i) {
267 267 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
268 268 continue;
269 269
270 270 if (prom_debug)
271 271 dboot_printf(
272 272 "merging mem segs %" PRIx64 "...%" PRIx64
273 273 " w/ %" PRIx64 "...%" PRIx64 "\n",
274 274 memlists[i].addr,
275 275 memlists[i].addr + memlists[i].size,
276 276 memlists[i + 1].addr,
277 277 memlists[i + 1].addr + memlists[i + 1].size);
278 278
279 279 memlists[i].size += memlists[i + 1].size;
280 280 for (j = i + 1; j < memlists_used - 1; ++j)
281 281 memlists[j] = memlists[j + 1];
282 282 --memlists_used;
283 283 DBG(memlists_used);
284 284 --i; /* after merging we need to reexamine, so do this */
285 285 }
286 286 #endif /* __xpv */
287 287
288 288 if (prom_debug) {
289 289 dboot_printf("\nFinal memlists:\n");
290 290 for (i = 0; i < memlists_used; ++i) {
291 291 dboot_printf("\t%d: addr=%" PRIx64 " size=%"
292 292 PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
293 293 }
294 294 }
295 295
296 296 /*
297 297 * link together the memlists with native size pointers
298 298 */
299 299 memlists[0].next = 0;
300 300 memlists[0].prev = 0;
301 301 for (i = 1; i < memlists_used; ++i) {
302 302 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
303 303 memlists[i].next = 0;
304 304 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
305 305 }
306 306 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
307 307 DBG(bi->bi_phys_install);
308 308 }
309 309
310 310 /*
311 311 * build bios reserved memlists
312 312 */
313 313 static void
314 314 build_rsvdmemlists(void)
315 315 {
316 316 int i;
317 317
318 318 rsvdmemlists[0].next = 0;
319 319 rsvdmemlists[0].prev = 0;
320 320 for (i = 1; i < rsvdmemlists_used; ++i) {
321 321 rsvdmemlists[i].prev =
322 322 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
323 323 rsvdmemlists[i].next = 0;
324 324 rsvdmemlists[i - 1].next =
325 325 (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
326 326 }
327 327 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
328 328 DBG(bi->bi_rsvdmem);
329 329 }
330 330
331 331 #if defined(__xpv)
332 332
333 333 /*
334 334 * halt on the hypervisor after a delay to drain console output
335 335 */
336 336 void
337 337 dboot_halt(void)
338 338 {
339 339 uint_t i = 10000;
340 340
341 341 while (--i)
342 342 (void) HYPERVISOR_yield();
343 343 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
344 344 }
345 345
346 346 /*
347 347 * From a machine address, find the corresponding pseudo-physical address.
348 348 * Pseudo-physical address are contiguous and run from mfn_base in each VM.
349 349 * Machine addresses are the real underlying hardware addresses.
350 350 * These are needed for page table entries. Note that this routine is
351 351 * poorly protected. A bad value of "ma" will cause a page fault.
352 352 */
353 353 paddr_t
354 354 ma_to_pa(maddr_t ma)
355 355 {
356 356 ulong_t pgoff = ma & MMU_PAGEOFFSET;
357 357 ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
358 358 paddr_t pa;
359 359
360 360 if (pfn >= xen_info->nr_pages)
361 361 return (-(paddr_t)1);
362 362 pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
363 363 #ifdef DEBUG
364 364 if (ma != pa_to_ma(pa))
365 365 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
366 366 "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
367 367 #endif
368 368 return (pa);
369 369 }
370 370
371 371 /*
372 372 * From a pseudo-physical address, find the corresponding machine address.
373 373 */
374 374 maddr_t
375 375 pa_to_ma(paddr_t pa)
376 376 {
377 377 pfn_t pfn;
378 378 ulong_t mfn;
379 379
380 380 pfn = mmu_btop(pa - mfn_base);
381 381 if (pa < mfn_base || pfn >= xen_info->nr_pages)
382 382 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
383 383 mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
384 384 #ifdef DEBUG
385 385 if (mfn_to_pfn_mapping[mfn] != pfn)
386 386 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
387 387 pfn, mfn, mfn_to_pfn_mapping[mfn]);
388 388 #endif
389 389 return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
390 390 }
391 391
392 392 #endif /* __xpv */
393 393
394 394 x86pte_t
395 395 get_pteval(paddr_t table, uint_t index)
396 396 {
397 397 if (pae_support)
398 398 return (((x86pte_t *)(uintptr_t)table)[index]);
399 399 return (((x86pte32_t *)(uintptr_t)table)[index]);
400 400 }
401 401
402 402 /*ARGSUSED*/
403 403 void
404 404 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
405 405 {
406 406 #ifdef __xpv
407 407 mmu_update_t t;
408 408 maddr_t mtable = pa_to_ma(table);
409 409 int retcnt;
410 410
411 411 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
412 412 t.val = pteval;
413 413 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
414 414 dboot_panic("HYPERVISOR_mmu_update() failed");
415 415 #else /* __xpv */
416 416 uintptr_t tab_addr = (uintptr_t)table;
417 417
418 418 if (pae_support)
419 419 ((x86pte_t *)tab_addr)[index] = pteval;
420 420 else
421 421 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
422 422 if (level == top_level && level == 2)
423 423 reload_cr3();
424 424 #endif /* __xpv */
425 425 }
426 426
427 427 paddr_t
428 428 make_ptable(x86pte_t *pteval, uint_t level)
429 429 {
430 430 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
431 431
432 432 if (level == top_level && level == 2)
433 433 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
434 434 else
435 435 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
436 436
437 437 #ifdef __xpv
438 438 /* Remove write permission to the new page table. */
439 439 if (HYPERVISOR_update_va_mapping(new_table,
440 440 *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
441 441 dboot_panic("HYP_update_va_mapping error");
442 442 #endif
443 443
444 444 if (map_debug)
445 445 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
446 446 PRIx64 "\n", level, (ulong_t)new_table, *pteval);
447 447 return (new_table);
448 448 }
449 449
450 450 x86pte_t *
451 451 map_pte(paddr_t table, uint_t index)
452 452 {
453 453 return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
454 454 }
455 455
456 456 /*
457 457 * dump out the contents of page tables...
458 458 */
459 459 static void
460 460 dump_tables(void)
461 461 {
462 462 uint_t save_index[4]; /* for recursion */
463 463 char *save_table[4]; /* for recursion */
464 464 uint_t l;
465 465 uint64_t va;
466 466 uint64_t pgsize;
467 467 int index;
468 468 int i;
469 469 x86pte_t pteval;
470 470 char *table;
471 471 static char *tablist = "\t\t\t";
472 472 char *tabs = tablist + 3 - top_level;
473 473 uint_t pa, pa1;
474 474 #if !defined(__xpv)
475 475 #define maddr_t paddr_t
476 476 #endif /* !__xpv */
477 477
478 478 dboot_printf("Finished pagetables:\n");
479 479 table = (char *)(uintptr_t)top_page_table;
480 480 l = top_level;
481 481 va = 0;
482 482 for (index = 0; index < ptes_per_table; ++index) {
483 483 pgsize = 1ull << shift_amt[l];
484 484 if (pae_support)
485 485 pteval = ((x86pte_t *)table)[index];
486 486 else
487 487 pteval = ((x86pte32_t *)table)[index];
488 488 if (pteval == 0)
489 489 goto next_entry;
490 490
491 491 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
492 492 tabs + l, (void *)table, index, (uint64_t)pteval, va);
493 493 pa = ma_to_pa(pteval & MMU_PAGEMASK);
494 494 dboot_printf(" physaddr=%x\n", pa);
495 495
496 496 /*
497 497 * Don't try to walk hypervisor private pagetables
498 498 */
499 499 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
500 500 save_table[l] = table;
501 501 save_index[l] = index;
502 502 --l;
503 503 index = -1;
504 504 table = (char *)(uintptr_t)
505 505 ma_to_pa(pteval & MMU_PAGEMASK);
506 506 goto recursion;
507 507 }
508 508
509 509 /*
510 510 * shorten dump for consecutive mappings
511 511 */
512 512 for (i = 1; index + i < ptes_per_table; ++i) {
513 513 if (pae_support)
514 514 pteval = ((x86pte_t *)table)[index + i];
515 515 else
516 516 pteval = ((x86pte32_t *)table)[index + i];
517 517 if (pteval == 0)
518 518 break;
519 519 pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
520 520 if (pa1 != pa + i * pgsize)
521 521 break;
522 522 }
523 523 if (i > 2) {
524 524 dboot_printf("%s...\n", tabs + l);
525 525 va += pgsize * (i - 2);
526 526 index += i - 2;
527 527 }
528 528 next_entry:
529 529 va += pgsize;
530 530 if (l == 3 && index == 256) /* VA hole */
531 531 va = 0xffff800000000000ull;
532 532 recursion:
533 533 ;
534 534 }
535 535 if (l < top_level) {
536 536 ++l;
537 537 index = save_index[l];
538 538 table = save_table[l];
539 539 goto recursion;
540 540 }
541 541 }
542 542
543 543 /*
544 544 * Add a mapping for the machine page at the given virtual address.
545 545 */
546 546 static void
547 547 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
548 548 {
549 549 x86pte_t *ptep;
550 550 x86pte_t pteval;
551 551
552 552 pteval = ma | pte_bits;
553 553 if (level > 0)
554 554 pteval |= PT_PAGESIZE;
555 555 if (va >= target_kernel_text && pge_support)
556 556 pteval |= PT_GLOBAL;
557 557
558 558 if (map_debug && ma != va)
559 559 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
560 560 " pte=0x%" PRIx64 " l=%d\n",
561 561 (uint64_t)ma, (uint64_t)va, pteval, level);
562 562
563 563 #if defined(__xpv)
564 564 /*
565 565 * see if we can avoid find_pte() on the hypervisor
566 566 */
567 567 if (HYPERVISOR_update_va_mapping(va, pteval,
568 568 UVMF_INVLPG | UVMF_LOCAL) == 0)
569 569 return;
570 570 #endif
571 571
572 572 /*
573 573 * Find the pte that will map this address. This creates any
574 574 * missing intermediate level page tables
575 575 */
576 576 ptep = find_pte(va, NULL, level, 0);
577 577
578 578 /*
579 579 * When paravirtualized, we must use hypervisor calls to modify the
580 580 * PTE, since paging is active. On real hardware we just write to
581 581 * the pagetables which aren't in use yet.
582 582 */
583 583 #if defined(__xpv)
584 584 ptep = ptep; /* shut lint up */
585 585 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
586 586 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
587 587 " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
588 588 (uint64_t)va, level, (uint64_t)ma, pteval);
589 589 #else
590 590 if (va < 1024 * 1024)
591 591 pteval |= PT_NOCACHE; /* for video RAM */
592 592 if (pae_support)
593 593 *ptep = pteval;
594 594 else
595 595 *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
596 596 #endif
597 597 }
598 598
599 599 /*
600 600 * Add a mapping for the physical page at the given virtual address.
601 601 */
602 602 static void
603 603 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
604 604 {
605 605 map_ma_at_va(pa_to_ma(pa), va, level);
606 606 }
607 607
608 608 /*
609 609 * This is called to remove start..end from the
610 610 * possible range of PCI addresses.
611 611 */
612 612 const uint64_t pci_lo_limit = 0x00100000ul;
613 613 const uint64_t pci_hi_limit = 0xfff00000ul;
614 614 static void
615 615 exclude_from_pci(uint64_t start, uint64_t end)
616 616 {
617 617 int i;
618 618 int j;
619 619 struct boot_memlist *ml;
620 620
621 621 for (i = 0; i < pcimemlists_used; ++i) {
622 622 ml = &pcimemlists[i];
623 623
624 624 /* delete the entire range? */
625 625 if (start <= ml->addr && ml->addr + ml->size <= end) {
626 626 --pcimemlists_used;
627 627 for (j = i; j < pcimemlists_used; ++j)
628 628 pcimemlists[j] = pcimemlists[j + 1];
629 629 --i; /* to revisit the new one at this index */
630 630 }
631 631
632 632 /* split a range? */
633 633 else if (ml->addr < start && end < ml->addr + ml->size) {
634 634
635 635 ++pcimemlists_used;
636 636 if (pcimemlists_used > MAX_MEMLIST)
637 637 dboot_panic("too many pcimemlists");
638 638
639 639 for (j = pcimemlists_used - 1; j > i; --j)
640 640 pcimemlists[j] = pcimemlists[j - 1];
641 641 ml->size = start - ml->addr;
642 642
643 643 ++ml;
644 644 ml->size = (ml->addr + ml->size) - end;
645 645 ml->addr = end;
646 646 ++i; /* skip on to next one */
647 647 }
648 648
649 649 /* cut memory off the start? */
650 650 else if (ml->addr < end && end < ml->addr + ml->size) {
651 651 ml->size -= end - ml->addr;
652 652 ml->addr = end;
653 653 }
654 654
655 655 /* cut memory off the end? */
656 656 else if (ml->addr <= start && start < ml->addr + ml->size) {
657 657 ml->size = start - ml->addr;
658 658 }
659 659 }
660 660 }
661 661
662 662 /*
663 663 * During memory allocation, find the highest address not used yet.
664 664 */
665 665 static void
666 666 check_higher(paddr_t a)
667 667 {
668 668 if (a < next_avail_addr)
669 669 return;
670 670 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
671 671 DBG(next_avail_addr);
672 672 }
673 673
674 674 static int
675 675 dboot_loader_mmap_entries(void)
676 676 {
677 677 #if !defined(__xpv)
678 678 if (num_entries_set == B_TRUE)
679 679 return (num_entries);
680 680
681 681 switch (multiboot_version) {
682 682 case 1:
683 683 DBG(mb_info->flags);
684 684 if (mb_info->flags & 0x40) {
685 685 mb_memory_map_t *mmap;
686 686 caddr32_t mmap_addr;
687 687
688 688 DBG(mb_info->mmap_addr);
689 689 DBG(mb_info->mmap_length);
690 690 check_higher(mb_info->mmap_addr + mb_info->mmap_length);
691 691
692 692 for (mmap_addr = mb_info->mmap_addr;
693 693 mmap_addr < mb_info->mmap_addr +
694 694 mb_info->mmap_length;
695 695 mmap_addr += mmap->size + sizeof (mmap->size)) {
696 696 mmap = (mb_memory_map_t *)(uintptr_t)mmap_addr;
697 697 ++num_entries;
698 698 }
699 699
700 700 num_entries_set = B_TRUE;
701 701 }
702 702 break;
703 703 case 2:
704 704 num_entries_set = B_TRUE;
705 705 num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
706 706 mb2_mmap_tagp);
707 707 break;
708 708 default:
709 709 dboot_panic("Unknown multiboot version: %d\n",
710 710 multiboot_version);
711 711 break;
712 712 }
713 713 return (num_entries);
714 714 #else
715 715 return (MAXMAPS);
716 716 #endif
717 717 }
718 718
719 719 static uint32_t
720 720 dboot_loader_mmap_get_type(int index)
721 721 {
722 722 #if !defined(__xpv)
723 723 mb_memory_map_t *mp, *mpend;
724 724 caddr32_t mmap_addr;
725 725 int i;
726 726
727 727 switch (multiboot_version) {
728 728 case 1:
729 729 mp = (mb_memory_map_t *)(uintptr_t)mb_info->mmap_addr;
730 730 mpend = (mb_memory_map_t *)(uintptr_t)
731 731 (mb_info->mmap_addr + mb_info->mmap_length);
732 732
733 733 for (i = 0; mp < mpend && i != index; i++)
734 734 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
735 735 sizeof (mp->size));
736 736 if (mp >= mpend) {
737 737 dboot_panic("dboot_loader_mmap_get_type(): index "
738 738 "out of bounds: %d\n", index);
739 739 }
740 740 return (mp->type);
741 741
742 742 case 2:
743 743 return (dboot_multiboot2_mmap_get_type(mb2_info,
744 744 mb2_mmap_tagp, index));
745 745
746 746 default:
747 747 dboot_panic("Unknown multiboot version: %d\n",
748 748 multiboot_version);
749 749 break;
750 750 }
751 751 return (0);
752 752 #else
753 753 return (map_buffer[index].type);
754 754 #endif
755 755 }
756 756
757 757 static uint64_t
758 758 dboot_loader_mmap_get_base(int index)
759 759 {
760 760 #if !defined(__xpv)
761 761 mb_memory_map_t *mp, *mpend;
762 762 int i;
763 763
764 764 switch (multiboot_version) {
765 765 case 1:
766 766 mp = (mb_memory_map_t *)mb_info->mmap_addr;
767 767 mpend = (mb_memory_map_t *)
768 768 (mb_info->mmap_addr + mb_info->mmap_length);
769 769
770 770 for (i = 0; mp < mpend && i != index; i++)
771 771 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
772 772 sizeof (mp->size));
773 773 if (mp >= mpend) {
774 774 dboot_panic("dboot_loader_mmap_get_base(): index "
775 775 "out of bounds: %d\n", index);
776 776 }
777 777 return (((uint64_t)mp->base_addr_high << 32) +
778 778 (uint64_t)mp->base_addr_low);
779 779
780 780 case 2:
781 781 return (dboot_multiboot2_mmap_get_base(mb2_info,
782 782 mb2_mmap_tagp, index));
783 783
784 784 default:
785 785 dboot_panic("Unknown multiboot version: %d\n",
786 786 multiboot_version);
787 787 break;
788 788 }
789 789 return (0);
790 790 #else
791 791 return (((uint64_t)map_buffer[index].base_addr_high << 32) +
792 792 (uint64_t)map_buffer[index].base_addr_low);
793 793 #endif
794 794 }
795 795
796 796 static uint64_t
797 797 dboot_loader_mmap_get_length(int index)
798 798 {
799 799 #if !defined(__xpv)
800 800 mb_memory_map_t *mp, *mpend;
801 801 int i;
802 802
803 803 switch (multiboot_version) {
804 804 case 1:
805 805 mp = (mb_memory_map_t *)mb_info->mmap_addr;
806 806 mpend = (mb_memory_map_t *)
807 807 (mb_info->mmap_addr + mb_info->mmap_length);
808 808
809 809 for (i = 0; mp < mpend && i != index; i++)
810 810 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
811 811 sizeof (mp->size));
812 812 if (mp >= mpend) {
813 813 dboot_panic("dboot_loader_mmap_get_length(): index "
814 814 "out of bounds: %d\n", index);
815 815 }
816 816 return (((uint64_t)mp->length_high << 32) +
817 817 (uint64_t)mp->length_low);
818 818
819 819 case 2:
820 820 return (dboot_multiboot2_mmap_get_length(mb2_info,
821 821 mb2_mmap_tagp, index));
822 822
823 823 default:
824 824 dboot_panic("Unknown multiboot version: %d\n",
825 825 multiboot_version);
826 826 break;
827 827 }
828 828 return (0);
829 829 #else
830 830 return (((uint64_t)map_buffer[index].length_high << 32) +
831 831 (uint64_t)map_buffer[index].length_low);
832 832 #endif
833 833 }
834 834
835 835 static void
836 836 build_pcimemlists(void)
837 837 {
838 838 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
839 839 uint64_t start;
840 840 uint64_t end;
841 841 int i, num;
842 842
843 843 /*
844 844 * initialize
845 845 */
846 846 pcimemlists[0].addr = pci_lo_limit;
847 847 pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
848 848 pcimemlists_used = 1;
849 849
850 850 num = dboot_loader_mmap_entries();
851 851 /*
852 852 * Fill in PCI memlists.
853 853 */
854 854 for (i = 0; i < num; ++i) {
855 855 start = dboot_loader_mmap_get_base(i);
856 856 end = start + dboot_loader_mmap_get_length(i);
857 857
858 858 if (prom_debug)
859 859 dboot_printf("\ttype: %d %" PRIx64 "..%"
860 860 PRIx64 "\n", dboot_loader_mmap_get_type(i),
861 861 start, end);
862 862
863 863 /*
864 864 * page align start and end
865 865 */
866 866 start = (start + page_offset) & ~page_offset;
867 867 end &= ~page_offset;
868 868 if (end <= start)
869 869 continue;
870 870
871 871 exclude_from_pci(start, end);
872 872 }
873 873
874 874 /*
875 875 * Finish off the pcimemlist
876 876 */
877 877 if (prom_debug) {
878 878 for (i = 0; i < pcimemlists_used; ++i) {
879 879 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
880 880 PRIx64 "\n", pcimemlists[i].addr,
881 881 pcimemlists[i].addr + pcimemlists[i].size);
882 882 }
883 883 }
884 884 pcimemlists[0].next = 0;
885 885 pcimemlists[0].prev = 0;
886 886 for (i = 1; i < pcimemlists_used; ++i) {
887 887 pcimemlists[i].prev =
888 888 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
889 889 pcimemlists[i].next = 0;
890 890 pcimemlists[i - 1].next =
891 891 (native_ptr_t)(uintptr_t)(pcimemlists + i);
892 892 }
893 893 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
894 894 DBG(bi->bi_pcimem);
895 895 }
896 896
897 897 #if defined(__xpv)
898 898 /*
899 899 * Initialize memory allocator stuff from hypervisor-supplied start info.
900 900 */
901 901 static void
902 902 init_mem_alloc(void)
903 903 {
904 904 int local; /* variables needed to find start region */
905 905 paddr_t scratch_start;
906 906 xen_memory_map_t map;
907 907
908 908 DBG_MSG("Entered init_mem_alloc()\n");
909 909
910 910 /*
911 911 * Free memory follows the stack. There's at least 512KB of scratch
912 912 * space, rounded up to at least 2Mb alignment. That should be enough
913 913 * for the page tables we'll need to build. The nucleus memory is
914 914 * allocated last and will be outside the addressible range. We'll
915 915 * switch to new page tables before we unpack the kernel
916 916 */
917 917 scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
918 918 DBG(scratch_start);
919 919 scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
920 920 DBG(scratch_end);
921 921
922 922 /*
923 923 * For paranoia, leave some space between hypervisor data and ours.
924 924 * Use 500 instead of 512.
925 925 */
926 926 next_avail_addr = scratch_end - 500 * 1024;
927 927 DBG(next_avail_addr);
928 928
929 929 /*
930 930 * The domain builder gives us at most 1 module
931 931 */
932 932 DBG(xen_info->mod_len);
933 933 if (xen_info->mod_len > 0) {
934 934 DBG(xen_info->mod_start);
935 935 modules[0].bm_addr =
936 936 (native_ptr_t)(uintptr_t)xen_info->mod_start;
937 937 modules[0].bm_size = xen_info->mod_len;
938 938 bi->bi_module_cnt = 1;
939 939 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
940 940 } else {
941 941 bi->bi_module_cnt = 0;
942 942 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL;
943 943 }
944 944 DBG(bi->bi_module_cnt);
945 945 DBG(bi->bi_modules);
946 946
947 947 DBG(xen_info->mfn_list);
948 948 DBG(xen_info->nr_pages);
949 949 max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
950 950 DBG(max_mem);
951 951
952 952 /*
953 953 * Using pseudo-physical addresses, so only 1 memlist element
954 954 */
955 955 memlists[0].addr = 0;
956 956 DBG(memlists[0].addr);
957 957 memlists[0].size = max_mem;
958 958 DBG(memlists[0].size);
959 959 memlists_used = 1;
960 960 DBG(memlists_used);
961 961
962 962 /*
963 963 * finish building physinstall list
964 964 */
965 965 sort_physinstall();
966 966
967 967 /*
968 968 * build bios reserved memlists
969 969 */
970 970 build_rsvdmemlists();
971 971
972 972 if (DOMAIN_IS_INITDOMAIN(xen_info)) {
973 973 /*
974 974 * build PCI Memory list
975 975 */
976 976 map.nr_entries = MAXMAPS;
977 977 /*LINTED: constant in conditional context*/
978 978 set_xen_guest_handle(map.buffer, map_buffer);
979 979 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
980 980 dboot_panic("getting XENMEM_machine_memory_map failed");
981 981 build_pcimemlists();
982 982 }
983 983 }
984 984
985 985 #else /* !__xpv */
986 986
987 987 static void
988 988 dboot_multiboot1_xboot_consinfo(void)
989 989 {
990 990 fb->framebuffer = 0;
991 991 }
992 992
993 993 static void
994 994 dboot_multiboot2_xboot_consinfo(void)
995 995 {
996 996 multiboot_tag_framebuffer_t *fbtag;
997 997 fbtag = dboot_multiboot2_find_tag(mb2_info,
998 998 MULTIBOOT_TAG_TYPE_FRAMEBUFFER);
999 999 fb->framebuffer = (uint64_t)(uintptr_t)fbtag;
1000 1000 }
1001 1001
1002 1002 static int
1003 1003 dboot_multiboot_modcount(void)
1004 1004 {
1005 1005 switch (multiboot_version) {
1006 1006 case 1:
1007 1007 return (mb_info->mods_count);
1008 1008
1009 1009 case 2:
1010 1010 return (dboot_multiboot2_modcount(mb2_info));
1011 1011
1012 1012 default:
1013 1013 dboot_panic("Unknown multiboot version: %d\n",
1014 1014 multiboot_version);
1015 1015 break;
1016 1016 }
1017 1017 return (0);
1018 1018 }
1019 1019
1020 1020 static uint32_t
1021 1021 dboot_multiboot_modstart(int index)
1022 1022 {
1023 1023 switch (multiboot_version) {
1024 1024 case 1:
1025 1025 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1026 1026
1027 1027 case 2:
1028 1028 return (dboot_multiboot2_modstart(mb2_info, index));
1029 1029
1030 1030 default:
1031 1031 dboot_panic("Unknown multiboot version: %d\n",
1032 1032 multiboot_version);
1033 1033 break;
1034 1034 }
1035 1035 return (0);
1036 1036 }
1037 1037
1038 1038 static uint32_t
1039 1039 dboot_multiboot_modend(int index)
1040 1040 {
1041 1041 switch (multiboot_version) {
1042 1042 case 1:
1043 1043 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1044 1044
1045 1045 case 2:
1046 1046 return (dboot_multiboot2_modend(mb2_info, index));
1047 1047
1048 1048 default:
1049 1049 dboot_panic("Unknown multiboot version: %d\n",
1050 1050 multiboot_version);
1051 1051 break;
1052 1052 }
1053 1053 return (0);
1054 1054 }
1055 1055
1056 1056 static char *
1057 1057 dboot_multiboot_modcmdline(int index)
1058 1058 {
1059 1059 switch (multiboot_version) {
1060 1060 case 1:
1061 1061 return ((char *)((mb_module_t *)
1062 1062 mb_info->mods_addr)[index].mod_name);
1063 1063
1064 1064 case 2:
1065 1065 return (dboot_multiboot2_modcmdline(mb2_info, index));
1066 1066
1067 1067 default:
1068 1068 dboot_panic("Unknown multiboot version: %d\n",
1069 1069 multiboot_version);
1070 1070 break;
1071 1071 }
1072 1072 return (0);
1073 1073 }
1074 1074
1075 1075 /*
1076 1076 * Find the modules used by console setup.
1077 1077 * Since we need the console to print early boot messages, the console is set up
1078 1078 * before anything else and therefore we need to pick up the needed modules.
1079 1079 *
1080 1080 * Note, we just will search for and if found, will pass the modules
1081 1081 * to console setup, the proper module list processing will happen later.
1082 1082 * Currently used modules are boot environment and console font.
1083 1083 */
1084 1084 static void
1085 1085 dboot_find_console_modules(void)
1086 1086 {
1087 1087 int i, modcount;
1088 1088 uint32_t mod_start, mod_end;
1089 1089 char *cmdline;
1090 1090
1091 1091 modcount = dboot_multiboot_modcount();
1092 1092 bi->bi_module_cnt = 0;
1093 1093 for (i = 0; i < modcount; ++i) {
1094 1094 cmdline = dboot_multiboot_modcmdline(i);
1095 1095 if (cmdline == NULL)
1096 1096 continue;
1097 1097
1098 1098 if (strstr(cmdline, "type=console-font") != NULL)
1099 1099 modules[bi->bi_module_cnt].bm_type = BMT_FONT;
1100 1100 else if (strstr(cmdline, "type=environment") != NULL)
1101 1101 modules[bi->bi_module_cnt].bm_type = BMT_ENV;
1102 1102 else
1103 1103 continue;
1104 1104
1105 1105 mod_start = dboot_multiboot_modstart(i);
1106 1106 mod_end = dboot_multiboot_modend(i);
1107 1107 modules[bi->bi_module_cnt].bm_addr =
1108 1108 (native_ptr_t)(uintptr_t)mod_start;
1109 1109 modules[bi->bi_module_cnt].bm_size = mod_end - mod_start;
1110 1110 modules[bi->bi_module_cnt].bm_name =
1111 1111 (native_ptr_t)(uintptr_t)NULL;
1112 1112 modules[bi->bi_module_cnt].bm_hash =
1113 1113 (native_ptr_t)(uintptr_t)NULL;
1114 1114 bi->bi_module_cnt++;
1115 1115 }
1116 1116 if (bi->bi_module_cnt != 0)
1117 1117 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1118 1118 }
1119 1119
1120 1120 static boolean_t
1121 1121 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1122 1122 {
1123 1123 boolean_t rv = B_FALSE;
1124 1124
1125 1125 switch (multiboot_version) {
1126 1126 case 1:
1127 1127 if (mb_info->flags & 0x01) {
1128 1128 *lower = mb_info->mem_lower;
1129 1129 *upper = mb_info->mem_upper;
1130 1130 rv = B_TRUE;
1131 1131 }
1132 1132 break;
1133 1133
1134 1134 case 2:
1135 1135 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1136 1136
1137 1137 default:
1138 1138 dboot_panic("Unknown multiboot version: %d\n",
1139 1139 multiboot_version);
1140 1140 break;
1141 1141 }
1142 1142 return (rv);
1143 1143 }
1144 1144
1145 1145 static uint8_t
1146 1146 dboot_a2h(char v)
1147 1147 {
1148 1148 if (v >= 'a')
1149 1149 return (v - 'a' + 0xa);
1150 1150 else if (v >= 'A')
1151 1151 return (v - 'A' + 0xa);
1152 1152 else if (v >= '0')
1153 1153 return (v - '0');
1154 1154 else
1155 1155 dboot_panic("bad ASCII hex character %c\n", v);
1156 1156
1157 1157 return (0);
1158 1158 }
1159 1159
1160 1160 static void
1161 1161 digest_a2h(const char *ascii, uint8_t *digest)
1162 1162 {
1163 1163 unsigned int i;
1164 1164
1165 1165 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1166 1166 digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1167 1167 digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1168 1168 }
1169 1169 }
1170 1170
1171 1171 /*
1172 1172 * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1173 1173 * the ASCII-format hash found in the 40-byte buffer at ascii. If they
1174 1174 * match, return 0, otherwise -1. This works only for images smaller than
1175 1175 * 4 GB, which should not be a problem.
1176 1176 */
1177 1177 static int
1178 1178 check_image_hash(uint_t midx)
1179 1179 {
1180 1180 const char *ascii;
1181 1181 const void *image;
1182 1182 size_t len;
1183 1183 SHA1_CTX ctx;
1184 1184 uint8_t digest[SHA1_DIGEST_LENGTH];
1185 1185 uint8_t baseline[SHA1_DIGEST_LENGTH];
1186 1186 unsigned int i;
1187 1187
1188 1188 ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
1189 1189 image = (const void *)(uintptr_t)modules[midx].bm_addr;
1190 1190 len = (size_t)modules[midx].bm_size;
1191 1191
1192 1192 digest_a2h(ascii, baseline);
1193 1193
1194 1194 SHA1Init(&ctx);
1195 1195 SHA1Update(&ctx, image, len);
1196 1196 SHA1Final(digest, &ctx);
1197 1197
1198 1198 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1199 1199 if (digest[i] != baseline[i])
1200 1200 return (-1);
1201 1201 }
1202 1202
1203 1203 return (0);
1204 1204 }
1205 1205
1206 1206 static const char *
1207 1207 type_to_str(boot_module_type_t type)
1208 1208 {
1209 1209 switch (type) {
1210 1210 case BMT_ROOTFS:
1211 1211 return ("rootfs");
1212 1212 case BMT_FILE:
1213 1213 return ("file");
1214 1214 case BMT_HASH:
1215 1215 return ("hash");
1216 1216 case BMT_ENV:
1217 1217 return ("environment");
1218 1218 case BMT_FONT:
1219 1219 return ("console-font");
1220 1220 default:
1221 1221 return ("unknown");
1222 1222 }
1223 1223 }
1224 1224
1225 1225 static void
1226 1226 check_images(void)
1227 1227 {
1228 1228 uint_t i;
1229 1229 char displayhash[SHA1_ASCII_LENGTH + 1];
1230 1230
1231 1231 for (i = 0; i < modules_used; i++) {
1232 1232 if (prom_debug) {
1233 1233 dboot_printf("module #%d: name %s type %s "
1234 1234 "addr %lx size %lx\n",
1235 1235 i, (char *)(uintptr_t)modules[i].bm_name,
1236 1236 type_to_str(modules[i].bm_type),
1237 1237 (ulong_t)modules[i].bm_addr,
1238 1238 (ulong_t)modules[i].bm_size);
1239 1239 }
1240 1240
1241 1241 if (modules[i].bm_type == BMT_HASH ||
1242 1242 modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) {
1243 1243 DBG_MSG("module has no hash; skipping check\n");
1244 1244 continue;
1245 1245 }
1246 1246 (void) memcpy(displayhash,
1247 1247 (void *)(uintptr_t)modules[i].bm_hash,
1248 1248 SHA1_ASCII_LENGTH);
1249 1249 displayhash[SHA1_ASCII_LENGTH] = '\0';
1250 1250 if (prom_debug) {
1251 1251 dboot_printf("checking expected hash [%s]: ",
1252 1252 displayhash);
1253 1253 }
1254 1254
1255 1255 if (check_image_hash(i) != 0)
1256 1256 dboot_panic("hash mismatch!\n");
1257 1257 else
1258 1258 DBG_MSG("OK\n");
1259 1259 }
1260 1260 }
1261 1261
1262 1262 /*
1263 1263 * Determine the module's starting address, size, name, and type, and fill the
1264 1264 * boot_modules structure. This structure is used by the bop code, except for
1265 1265 * hashes which are checked prior to transferring control to the kernel.
1266 1266 */
1267 1267 static void
1268 1268 process_module(int midx)
1269 1269 {
1270 1270 uint32_t mod_start = dboot_multiboot_modstart(midx);
1271 1271 uint32_t mod_end = dboot_multiboot_modend(midx);
1272 1272 char *cmdline = dboot_multiboot_modcmdline(midx);
1273 1273 char *p, *q;
1274 1274
1275 1275 check_higher(mod_end);
1276 1276 if (prom_debug) {
1277 1277 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1278 1278 midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
1279 1279 }
1280 1280
1281 1281 if (mod_start > mod_end) {
1282 1282 dboot_panic("module #%d: module start address 0x%lx greater "
1283 1283 "than end address 0x%lx", midx,
1284 1284 (ulong_t)mod_start, (ulong_t)mod_end);
1285 1285 }
1286 1286
1287 1287 /*
1288 1288 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1289 1289 * the address of the last valid byte in a module plus 1 as mod_end.
1290 1290 * This is of course a bug; the multiboot specification simply states
1291 1291 * that mod_start and mod_end "contain the start and end addresses of
1292 1292 * the boot module itself" which is pretty obviously not what GRUB is
1293 1293 * doing. However, fixing it requires that not only this code be
1294 1294 * changed but also that other code consuming this value and values
1295 1295 * derived from it be fixed, and that the kernel and GRUB must either
1296 1296 * both have the bug or neither. While there are a lot of combinations
1297 1297 * that will work, there are also some that won't, so for simplicity
1298 1298 * we'll just cope with the bug. That means we won't actually hash the
1299 1299 * byte at mod_end, and we will expect that mod_end for the hash file
1300 1300 * itself is one greater than some multiple of 41 (40 bytes of ASCII
1301 1301 * hash plus a newline for each module). We set bm_size to the true
1302 1302 * correct number of bytes in each module, achieving exactly this.
1303 1303 */
1304 1304
1305 1305 modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start;
1306 1306 modules[midx].bm_size = mod_end - mod_start;
1307 1307 modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
1308 1308 modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL;
1309 1309 modules[midx].bm_type = BMT_FILE;
1310 1310
1311 1311 if (cmdline == NULL) {
1312 1312 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1313 1313 return;
1314 1314 }
1315 1315
1316 1316 p = cmdline;
1317 1317 modules[midx].bm_name =
1318 1318 (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
1319 1319
1320 1320 while (p != NULL) {
1321 1321 q = strsep(&p, " \t\f\n\r");
1322 1322 if (strncmp(q, "name=", 5) == 0) {
1323 1323 if (q[5] != '\0' && !isspace(q[5])) {
1324 1324 modules[midx].bm_name =
1325 1325 (native_ptr_t)(uintptr_t)(q + 5);
1326 1326 }
1327 1327 continue;
1328 1328 }
1329 1329
1330 1330 if (strncmp(q, "type=", 5) == 0) {
1331 1331 if (q[5] == '\0' || isspace(q[5]))
1332 1332 continue;
1333 1333 q += 5;
1334 1334 if (strcmp(q, "rootfs") == 0) {
1335 1335 modules[midx].bm_type = BMT_ROOTFS;
1336 1336 } else if (strcmp(q, "hash") == 0) {
1337 1337 modules[midx].bm_type = BMT_HASH;
1338 1338 } else if (strcmp(q, "environment") == 0) {
1339 1339 modules[midx].bm_type = BMT_ENV;
1340 1340 } else if (strcmp(q, "console-font") == 0) {
1341 1341 modules[midx].bm_type = BMT_FONT;
1342 1342 } else if (strcmp(q, "file") != 0) {
1343 1343 dboot_printf("\tmodule #%d: unknown module "
1344 1344 "type '%s'; defaulting to 'file'\n",
1345 1345 midx, q);
1346 1346 }
1347 1347 continue;
1348 1348 }
1349 1349
1350 1350 if (strncmp(q, "hash=", 5) == 0) {
1351 1351 if (q[5] != '\0' && !isspace(q[5])) {
1352 1352 modules[midx].bm_hash =
1353 1353 (native_ptr_t)(uintptr_t)(q + 5);
1354 1354 }
1355 1355 continue;
1356 1356 }
1357 1357
1358 1358 dboot_printf("ignoring unknown option '%s'\n", q);
1359 1359 }
1360 1360 }
1361 1361
1362 1362 /*
1363 1363 * Backward compatibility: if there are exactly one or two modules, both
1364 1364 * of type 'file' and neither with an embedded hash value, we have been
1365 1365 * given the legacy style modules. In this case we need to treat the first
1366 1366 * module as a rootfs and the second as a hash referencing that module.
1367 1367 * Otherwise, even if the configuration is invalid, we assume that the
1368 1368 * operator knows what he's doing or at least isn't being bitten by this
1369 1369 * interface change.
1370 1370 */
1371 1371 static void
1372 1372 fixup_modules(void)
1373 1373 {
1374 1374 if (modules_used == 0 || modules_used > 2)
1375 1375 return;
1376 1376
1377 1377 if (modules[0].bm_type != BMT_FILE ||
1378 1378 modules_used > 1 && modules[1].bm_type != BMT_FILE) {
1379 1379 return;
1380 1380 }
1381 1381
1382 1382 if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL ||
1383 1383 modules_used > 1 &&
1384 1384 modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1385 1385 return;
1386 1386 }
1387 1387
1388 1388 modules[0].bm_type = BMT_ROOTFS;
1389 1389 if (modules_used > 1) {
1390 1390 modules[1].bm_type = BMT_HASH;
1391 1391 modules[1].bm_name = modules[0].bm_name;
1392 1392 }
1393 1393 }
1394 1394
1395 1395 /*
1396 1396 * For modules that do not have assigned hashes but have a separate hash module,
1397 1397 * find the assigned hash module and set the primary module's bm_hash to point
1398 1398 * to the hash data from that module. We will then ignore modules of type
1399 1399 * BMT_HASH from this point forward.
1400 1400 */
1401 1401 static void
1402 1402 assign_module_hashes(void)
1403 1403 {
1404 1404 uint_t i, j;
1405 1405
1406 1406 for (i = 0; i < modules_used; i++) {
1407 1407 if (modules[i].bm_type == BMT_HASH ||
1408 1408 modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1409 1409 continue;
1410 1410 }
1411 1411
1412 1412 for (j = 0; j < modules_used; j++) {
1413 1413 if (modules[j].bm_type != BMT_HASH ||
1414 1414 strcmp((char *)(uintptr_t)modules[j].bm_name,
1415 1415 (char *)(uintptr_t)modules[i].bm_name) != 0) {
1416 1416 continue;
1417 1417 }
1418 1418
1419 1419 if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
1420 1420 dboot_printf("Short hash module of length "
1421 1421 "0x%lx bytes; ignoring\n",
1422 1422 (ulong_t)modules[j].bm_size);
1423 1423 } else {
1424 1424 modules[i].bm_hash = modules[j].bm_addr;
1425 1425 }
1426 1426 break;
1427 1427 }
1428 1428 }
1429 1429 }
1430 1430
1431 1431 /*
1432 1432 * Walk through the module information finding the last used address.
1433 1433 * The first available address will become the top level page table.
1434 1434 */
1435 1435 static void
1436 1436 dboot_process_modules(void)
1437 1437 {
1438 1438 int i, modcount;
1439 1439 extern char _end[];
1440 1440
1441 1441 DBG_MSG("\nFinding Modules\n");
1442 1442 modcount = dboot_multiboot_modcount();
1443 1443 if (modcount > MAX_BOOT_MODULES) {
1444 1444 dboot_panic("Too many modules (%d) -- the maximum is %d.",
1445 1445 modcount, MAX_BOOT_MODULES);
1446 1446 }
1447 1447 /*
1448 1448 * search the modules to find the last used address
1449 1449 * we'll build the module list while we're walking through here
1450 1450 */
1451 1451 check_higher((paddr_t)(uintptr_t)&_end);
1452 1452 for (i = 0; i < modcount; ++i) {
1453 1453 process_module(i);
1454 1454 modules_used++;
1455 1455 }
1456 1456 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1457 1457 DBG(bi->bi_modules);
1458 1458 bi->bi_module_cnt = modcount;
1459 1459 DBG(bi->bi_module_cnt);
1460 1460
1461 1461 fixup_modules();
1462 1462 assign_module_hashes();
1463 1463 check_images();
1464 1464 }
1465 1465
1466 1466 /*
1467 1467 * We then build the phys_install memlist from the multiboot information.
1468 1468 */
1469 1469 static void
1470 1470 dboot_process_mmap(void)
1471 1471 {
1472 1472 uint64_t start;
1473 1473 uint64_t end;
1474 1474 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
1475 1475 uint32_t lower, upper;
1476 1476 int i, mmap_entries;
1477 1477
1478 1478 /*
1479 1479 * Walk through the memory map from multiboot and build our memlist
1480 1480 * structures. Note these will have native format pointers.
1481 1481 */
1482 1482 DBG_MSG("\nFinding Memory Map\n");
1483 1483 num_entries = 0;
1484 1484 num_entries_set = B_FALSE;
1485 1485 max_mem = 0;
1486 1486 if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1487 1487 for (i = 0; i < mmap_entries; i++) {
1488 1488 uint32_t type = dboot_loader_mmap_get_type(i);
1489 1489 start = dboot_loader_mmap_get_base(i);
1490 1490 end = start + dboot_loader_mmap_get_length(i);
1491 1491
1492 1492 if (prom_debug)
1493 1493 dboot_printf("\ttype: %d %" PRIx64 "..%"
1494 1494 PRIx64 "\n", type, start, end);
1495 1495
1496 1496 /*
1497 1497 * page align start and end
1498 1498 */
1499 1499 start = (start + page_offset) & ~page_offset;
1500 1500 end &= ~page_offset;
1501 1501 if (end <= start)
1502 1502 continue;
1503 1503
1504 1504 /*
1505 1505 * only type 1 is usable RAM
1506 1506 */
1507 1507 switch (type) {
1508 1508 case 1:
1509 1509 if (end > max_mem)
1510 1510 max_mem = end;
1511 1511 memlists[memlists_used].addr = start;
1512 1512 memlists[memlists_used].size = end - start;
1513 1513 ++memlists_used;
1514 1514 if (memlists_used > MAX_MEMLIST)
1515 1515 dboot_panic("too many memlists");
1516 1516 break;
1517 1517 case 2:
1518 1518 rsvdmemlists[rsvdmemlists_used].addr = start;
1519 1519 rsvdmemlists[rsvdmemlists_used].size =
1520 1520 end - start;
1521 1521 ++rsvdmemlists_used;
1522 1522 if (rsvdmemlists_used > MAX_MEMLIST)
1523 1523 dboot_panic("too many rsvdmemlists");
1524 1524 break;
1525 1525 default:
1526 1526 continue;
1527 1527 }
1528 1528 }
1529 1529 build_pcimemlists();
1530 1530 } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1531 1531 DBG(lower);
1532 1532 memlists[memlists_used].addr = 0;
1533 1533 memlists[memlists_used].size = lower * 1024;
1534 1534 ++memlists_used;
1535 1535 DBG(upper);
1536 1536 memlists[memlists_used].addr = 1024 * 1024;
1537 1537 memlists[memlists_used].size = upper * 1024;
1538 1538 ++memlists_used;
1539 1539
1540 1540 /*
1541 1541 * Old platform - assume I/O space at the end of memory.
1542 1542 */
1543 1543 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1544 1544 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1545 1545 pcimemlists[0].next = 0;
1546 1546 pcimemlists[0].prev = 0;
1547 1547 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1548 1548 DBG(bi->bi_pcimem);
1549 1549 } else {
1550 1550 dboot_panic("No memory info from boot loader!!!");
1551 1551 }
1552 1552
1553 1553 /*
1554 1554 * finish processing the physinstall list
1555 1555 */
1556 1556 sort_physinstall();
1557 1557
1558 1558 /*
1559 1559 * build bios reserved mem lists
1560 1560 */
1561 1561 build_rsvdmemlists();
1562 1562 }
1563 1563
1564 1564 /*
1565 1565 * The highest address is used as the starting point for dboot's simple
1566 1566 * memory allocator.
1567 1567 *
1568 1568 * Finding the highest address in case of Multiboot 1 protocol is
1569 1569 * quite painful in the sense that some information provided by
1570 1570 * the multiboot info structure points to BIOS data, and some to RAM.
1571 1571 *
1572 1572 * The module list was processed and checked already by dboot_process_modules(),
1573 1573 * so we will check the command line string and the memory map.
1574 1574 *
1575 1575 * This list of to be checked items is based on our current knowledge of
1576 1576 * allocations made by grub1 and will need to be reviewed if there
1577 1577 * are updates about the information provided by Multiboot 1.
1578 1578 *
1579 1579 * In the case of the Multiboot 2, our life is much simpler, as the MB2
1580 1580 * information tag list is one contiguous chunk of memory.
1581 1581 */
1582 1582 static paddr_t
1583 1583 dboot_multiboot1_highest_addr(void)
1584 1584 {
1585 1585 paddr_t addr = (paddr_t)(uintptr_t)NULL;
1586 1586 char *cmdl = (char *)mb_info->cmdline;
1587 1587
1588 1588 if (mb_info->flags & MB_INFO_CMDLINE)
1589 1589 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1590 1590
1591 1591 if (mb_info->flags & MB_INFO_MEM_MAP)
1592 1592 addr = MAX(addr,
1593 1593 ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1594 1594 return (addr);
1595 1595 }
1596 1596
1597 1597 static void
1598 1598 dboot_multiboot_highest_addr(void)
1599 1599 {
1600 1600 paddr_t addr;
1601 1601
1602 1602 switch (multiboot_version) {
1603 1603 case 1:
1604 1604 addr = dboot_multiboot1_highest_addr();
1605 1605 if (addr != (paddr_t)(uintptr_t)NULL)
1606 1606 check_higher(addr);
1607 1607 break;
1608 1608 case 2:
1609 1609 addr = dboot_multiboot2_highest_addr(mb2_info);
1610 1610 if (addr != (paddr_t)(uintptr_t)NULL)
1611 1611 check_higher(addr);
1612 1612 break;
1613 1613 default:
1614 1614 dboot_panic("Unknown multiboot version: %d\n",
1615 1615 multiboot_version);
1616 1616 break;
1617 1617 }
1618 1618 }
1619 1619
1620 1620 /*
1621 1621 * Walk the boot loader provided information and find the highest free address.
1622 1622 */
1623 1623 static void
1624 1624 init_mem_alloc(void)
1625 1625 {
1626 1626 DBG_MSG("Entered init_mem_alloc()\n");
1627 1627 dboot_process_modules();
1628 1628 dboot_process_mmap();
1629 1629 dboot_multiboot_highest_addr();
1630 1630 }
1631 1631
1632 1632 static int
1633 1633 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2)
1634 1634 {
1635 1635 int i;
1636 1636
1637 1637 if (g1->time_low != g2->time_low)
1638 1638 return (0);
1639 1639 if (g1->time_mid != g2->time_mid)
1640 1640 return (0);
1641 1641 if (g1->time_hi_and_version != g2->time_hi_and_version)
1642 1642 return (0);
1643 1643 if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved)
1644 1644 return (0);
1645 1645 if (g1->clock_seq_low != g2->clock_seq_low)
1646 1646 return (0);
1647 1647
1648 1648 for (i = 0; i < 6; i++) {
1649 1649 if (g1->node_addr[i] != g2->node_addr[i])
1650 1650 return (0);
1651 1651 }
1652 1652 return (1);
1653 1653 }
1654 1654
1655 1655 static void
1656 1656 process_efi32(EFI_SYSTEM_TABLE32 *efi)
1657 1657 {
1658 1658 uint32_t entries;
1659 1659 EFI_CONFIGURATION_TABLE32 *config;
1660 1660 efi_guid_t VendorGuid;
1661 1661 int i;
1662 1662
1663 1663 entries = efi->NumberOfTableEntries;
1664 1664 config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1665 1665 efi->ConfigurationTable;
1666 1666
1667 1667 for (i = 0; i < entries; i++) {
1668 1668 (void) memcpy(&VendorGuid, &config[i].VendorGuid,
1669 1669 sizeof (VendorGuid));
1670 1670 if (dboot_same_guids(&VendorGuid, &smbios3)) {
1671 1671 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1672 1672 config[i].VendorTable;
1673 1673 }
1674 1674 if (bi->bi_smbios == 0 &&
1675 1675 dboot_same_guids(&VendorGuid, &smbios)) {
1676 1676 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1677 1677 config[i].VendorTable;
1678 1678 }
1679 1679 if (dboot_same_guids(&VendorGuid, &acpi2)) {
1680 1680 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1681 1681 config[i].VendorTable;
1682 1682 }
1683 1683 if (bi->bi_acpi_rsdp == 0 &&
1684 1684 dboot_same_guids(&VendorGuid, &acpi1)) {
1685 1685 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1686 1686 config[i].VendorTable;
1687 1687 }
1688 1688 }
1689 1689 }
1690 1690
1691 1691 static void
1692 1692 process_efi64(EFI_SYSTEM_TABLE64 *efi)
1693 1693 {
1694 1694 uint64_t entries;
1695 1695 EFI_CONFIGURATION_TABLE64 *config;
1696 1696 efi_guid_t VendorGuid;
1697 1697 int i;
1698 1698
1699 1699 entries = efi->NumberOfTableEntries;
1700 1700 config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1701 1701 efi->ConfigurationTable;
1702 1702
1703 1703 for (i = 0; i < entries; i++) {
1704 1704 (void) memcpy(&VendorGuid, &config[i].VendorGuid,
1705 1705 sizeof (VendorGuid));
1706 1706 if (dboot_same_guids(&VendorGuid, &smbios3)) {
1707 1707 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1708 1708 config[i].VendorTable;
1709 1709 }
1710 1710 if (bi->bi_smbios == 0 &&
1711 1711 dboot_same_guids(&VendorGuid, &smbios)) {
1712 1712 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1713 1713 config[i].VendorTable;
1714 1714 }
1715 1715 /* Prefer acpi v2+ over v1. */
1716 1716 if (dboot_same_guids(&VendorGuid, &acpi2)) {
1717 1717 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1718 1718 config[i].VendorTable;
1719 1719 }
1720 1720 if (bi->bi_acpi_rsdp == 0 &&
1721 1721 dboot_same_guids(&VendorGuid, &acpi1)) {
1722 1722 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1723 1723 config[i].VendorTable;
1724 1724 }
1725 1725 }
1726 1726 }
1727 1727
1728 1728 static void
1729 1729 dboot_multiboot_get_fwtables(void)
1730 1730 {
1731 1731 multiboot_tag_new_acpi_t *nacpitagp;
1732 1732 multiboot_tag_old_acpi_t *oacpitagp;
1733 1733 multiboot_tag_efi64_t *efi64tagp = NULL;
1734 1734 multiboot_tag_efi32_t *efi32tagp = NULL;
1735 1735
1736 1736 /* no fw tables from multiboot 1 */
1737 1737 if (multiboot_version != 2)
1738 1738 return;
1739 1739
1740 1740 efi64tagp = (multiboot_tag_efi64_t *)
1741 1741 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64);
1742 1742 if (efi64tagp != NULL) {
1743 1743 bi->bi_uefi_arch = XBI_UEFI_ARCH_64;
1744 1744 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1745 1745 efi64tagp->mb_pointer;
1746 1746 process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
1747 1747 efi64tagp->mb_pointer);
1748 1748 } else {
1749 1749 efi32tagp = (multiboot_tag_efi32_t *)
1750 1750 dboot_multiboot2_find_tag(mb2_info,
1751 1751 MULTIBOOT_TAG_TYPE_EFI32);
↓ open down ↓ |
1715 lines elided |
↑ open up ↑ |
1752 1752 if (efi32tagp != NULL) {
1753 1753 bi->bi_uefi_arch = XBI_UEFI_ARCH_32;
1754 1754 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1755 1755 efi32tagp->mb_pointer;
1756 1756 process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
1757 1757 efi32tagp->mb_pointer);
1758 1758 }
1759 1759 }
1760 1760
1761 1761 /*
1762 - * The ACPI RSDP can be found by scanning the BIOS memory areas or
1763 - * from the EFI system table. The boot loader may pass in the address
1764 - * it found the ACPI tables at.
1762 + * The multiboot2 info contains a copy of the RSDP; stash a pointer to
1763 + * it (see find_rsdp() in fakebop).
1765 1764 */
1766 1765 nacpitagp = (multiboot_tag_new_acpi_t *)
1767 - dboot_multiboot2_find_tag(mb2_info,
1768 - MULTIBOOT_TAG_TYPE_ACPI_NEW);
1766 + dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_NEW);
1769 1767 oacpitagp = (multiboot_tag_old_acpi_t *)
1770 - dboot_multiboot2_find_tag(mb2_info,
1771 - MULTIBOOT_TAG_TYPE_ACPI_OLD);
1768 + dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_OLD);
1772 1769
1773 1770 if (nacpitagp != NULL) {
1774 - bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1771 + bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t)
1775 1772 &nacpitagp->mb_rsdp[0];
1776 1773 } else if (oacpitagp != NULL) {
1777 - bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1774 + bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t)
1778 1775 &oacpitagp->mb_rsdp[0];
1779 1776 }
1780 1777 }
1781 1778
1782 1779 /* print out EFI version string with newline */
1783 1780 static void
1784 1781 dboot_print_efi_version(uint32_t ver)
1785 1782 {
1786 1783 int rev;
1787 1784
1788 1785 dboot_printf("%d.", EFI_REV_MAJOR(ver));
1789 1786
1790 1787 rev = EFI_REV_MINOR(ver);
1791 1788 if ((rev % 10) != 0) {
1792 1789 dboot_printf("%d.%d\n", rev / 10, rev % 10);
1793 1790 } else {
1794 1791 dboot_printf("%d\n", rev / 10);
1795 1792 }
1796 1793 }
1797 1794
1798 1795 static void
1799 1796 print_efi32(EFI_SYSTEM_TABLE32 *efi)
1800 1797 {
1801 1798 uint16_t *data;
1802 1799 EFI_CONFIGURATION_TABLE32 *conf;
1803 1800 int i;
1804 1801
1805 1802 dboot_printf("EFI32 signature: %llx\n",
1806 1803 (unsigned long long)efi->Hdr.Signature);
1807 1804 dboot_printf("EFI system version: ");
1808 1805 dboot_print_efi_version(efi->Hdr.Revision);
1809 1806 dboot_printf("EFI system vendor: ");
1810 1807 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1811 1808 for (i = 0; data[i] != 0; i++)
1812 1809 dboot_printf("%c", (char)data[i]);
1813 1810 dboot_printf("\nEFI firmware revision: ");
1814 1811 dboot_print_efi_version(efi->FirmwareRevision);
1815 1812 dboot_printf("EFI system table number of entries: %d\n",
1816 1813 efi->NumberOfTableEntries);
1817 1814 conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1818 1815 efi->ConfigurationTable;
1819 1816 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1820 1817 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1821 1818 conf[i].VendorGuid.time_low,
1822 1819 conf[i].VendorGuid.time_mid,
1823 1820 conf[i].VendorGuid.time_hi_and_version,
1824 1821 conf[i].VendorGuid.clock_seq_hi_and_reserved,
1825 1822 conf[i].VendorGuid.clock_seq_low);
1826 1823 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1827 1824 conf[i].VendorGuid.node_addr[0],
1828 1825 conf[i].VendorGuid.node_addr[1],
1829 1826 conf[i].VendorGuid.node_addr[2],
1830 1827 conf[i].VendorGuid.node_addr[3],
1831 1828 conf[i].VendorGuid.node_addr[4],
1832 1829 conf[i].VendorGuid.node_addr[5]);
1833 1830 }
1834 1831 }
1835 1832
1836 1833 static void
1837 1834 print_efi64(EFI_SYSTEM_TABLE64 *efi)
1838 1835 {
1839 1836 uint16_t *data;
1840 1837 EFI_CONFIGURATION_TABLE64 *conf;
1841 1838 int i;
1842 1839
1843 1840 dboot_printf("EFI64 signature: %llx\n",
1844 1841 (unsigned long long)efi->Hdr.Signature);
1845 1842 dboot_printf("EFI system version: ");
1846 1843 dboot_print_efi_version(efi->Hdr.Revision);
1847 1844 dboot_printf("EFI system vendor: ");
1848 1845 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1849 1846 for (i = 0; data[i] != 0; i++)
1850 1847 dboot_printf("%c", (char)data[i]);
1851 1848 dboot_printf("\nEFI firmware revision: ");
1852 1849 dboot_print_efi_version(efi->FirmwareRevision);
1853 1850 dboot_printf("EFI system table number of entries: %" PRIu64 "\n",
1854 1851 efi->NumberOfTableEntries);
1855 1852 conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1856 1853 efi->ConfigurationTable;
1857 1854 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1858 1855 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1859 1856 conf[i].VendorGuid.time_low,
1860 1857 conf[i].VendorGuid.time_mid,
1861 1858 conf[i].VendorGuid.time_hi_and_version,
1862 1859 conf[i].VendorGuid.clock_seq_hi_and_reserved,
1863 1860 conf[i].VendorGuid.clock_seq_low);
1864 1861 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1865 1862 conf[i].VendorGuid.node_addr[0],
1866 1863 conf[i].VendorGuid.node_addr[1],
1867 1864 conf[i].VendorGuid.node_addr[2],
1868 1865 conf[i].VendorGuid.node_addr[3],
1869 1866 conf[i].VendorGuid.node_addr[4],
1870 1867 conf[i].VendorGuid.node_addr[5]);
1871 1868 }
1872 1869 }
1873 1870 #endif /* !__xpv */
1874 1871
1875 1872 /*
1876 1873 * Simple memory allocator, allocates aligned physical memory.
1877 1874 * Note that startup_kernel() only allocates memory, never frees.
1878 1875 * Memory usage just grows in an upward direction.
1879 1876 */
1880 1877 static void *
1881 1878 do_mem_alloc(uint32_t size, uint32_t align)
1882 1879 {
1883 1880 uint_t i;
1884 1881 uint64_t best;
1885 1882 uint64_t start;
1886 1883 uint64_t end;
1887 1884
1888 1885 /*
1889 1886 * make sure size is a multiple of pagesize
1890 1887 */
1891 1888 size = RNDUP(size, MMU_PAGESIZE);
1892 1889 next_avail_addr = RNDUP(next_avail_addr, align);
1893 1890
1894 1891 /*
1895 1892 * XXPV fixme joe
1896 1893 *
1897 1894 * a really large bootarchive that causes you to run out of memory
1898 1895 * may cause this to blow up
1899 1896 */
1900 1897 /* LINTED E_UNEXPECTED_UINT_PROMOTION */
1901 1898 best = (uint64_t)-size;
1902 1899 for (i = 0; i < memlists_used; ++i) {
1903 1900 start = memlists[i].addr;
1904 1901 #if defined(__xpv)
1905 1902 start += mfn_base;
1906 1903 #endif
1907 1904 end = start + memlists[i].size;
1908 1905
1909 1906 /*
1910 1907 * did we find the desired address?
1911 1908 */
1912 1909 if (start <= next_avail_addr && next_avail_addr + size <= end) {
1913 1910 best = next_avail_addr;
1914 1911 goto done;
1915 1912 }
1916 1913
1917 1914 /*
1918 1915 * if not is this address the best so far?
1919 1916 */
1920 1917 if (start > next_avail_addr && start < best &&
1921 1918 RNDUP(start, align) + size <= end)
1922 1919 best = RNDUP(start, align);
1923 1920 }
1924 1921
1925 1922 /*
1926 1923 * We didn't find exactly the address we wanted, due to going off the
1927 1924 * end of a memory region. Return the best found memory address.
1928 1925 */
1929 1926 done:
1930 1927 next_avail_addr = best + size;
1931 1928 #if defined(__xpv)
1932 1929 if (next_avail_addr > scratch_end)
1933 1930 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
1934 1931 "0x%lx", (ulong_t)next_avail_addr,
1935 1932 (ulong_t)scratch_end);
1936 1933 #endif
1937 1934 (void) memset((void *)(uintptr_t)best, 0, size);
1938 1935 return ((void *)(uintptr_t)best);
1939 1936 }
1940 1937
1941 1938 void *
1942 1939 mem_alloc(uint32_t size)
1943 1940 {
1944 1941 return (do_mem_alloc(size, MMU_PAGESIZE));
1945 1942 }
1946 1943
1947 1944
1948 1945 /*
1949 1946 * Build page tables to map all of memory used so far as well as the kernel.
1950 1947 */
1951 1948 static void
1952 1949 build_page_tables(void)
1953 1950 {
1954 1951 uint32_t psize;
1955 1952 uint32_t level;
1956 1953 uint32_t off;
1957 1954 uint64_t start;
1958 1955 #if !defined(__xpv)
1959 1956 uint32_t i;
1960 1957 uint64_t end;
1961 1958 #endif /* __xpv */
1962 1959
1963 1960 /*
1964 1961 * If we're on metal, we need to create the top level pagetable.
1965 1962 */
1966 1963 #if defined(__xpv)
1967 1964 top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
1968 1965 #else /* __xpv */
1969 1966 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1970 1967 #endif /* __xpv */
1971 1968 DBG((uintptr_t)top_page_table);
1972 1969
1973 1970 /*
1974 1971 * Determine if we'll use large mappings for kernel, then map it.
1975 1972 */
1976 1973 if (largepage_support) {
1977 1974 psize = lpagesize;
1978 1975 level = 1;
1979 1976 } else {
1980 1977 psize = MMU_PAGESIZE;
1981 1978 level = 0;
1982 1979 }
1983 1980
1984 1981 DBG_MSG("Mapping kernel\n");
1985 1982 DBG(ktext_phys);
1986 1983 DBG(target_kernel_text);
1987 1984 DBG(ksize);
1988 1985 DBG(psize);
1989 1986 for (off = 0; off < ksize; off += psize)
1990 1987 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1991 1988
1992 1989 /*
1993 1990 * The kernel will need a 1 page window to work with page tables
1994 1991 */
1995 1992 bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1996 1993 DBG(bi->bi_pt_window);
1997 1994 bi->bi_pte_to_pt_window =
1998 1995 (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1999 1996 DBG(bi->bi_pte_to_pt_window);
2000 1997
2001 1998 #if defined(__xpv)
2002 1999 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
2003 2000 /* If this is a domU we're done. */
2004 2001 DBG_MSG("\nPage tables constructed\n");
2005 2002 return;
2006 2003 }
2007 2004 #endif /* __xpv */
2008 2005
2009 2006 /*
2010 2007 * We need 1:1 mappings for the lower 1M of memory to access
2011 2008 * BIOS tables used by a couple of drivers during boot.
2012 2009 *
2013 2010 * The following code works because our simple memory allocator
2014 2011 * only grows usage in an upwards direction.
2015 2012 *
2016 2013 * Note that by this point in boot some mappings for low memory
2017 2014 * may already exist because we've already accessed device in low
2018 2015 * memory. (Specifically the video frame buffer and keyboard
2019 2016 * status ports.) If we're booting on raw hardware then GRUB
2020 2017 * created these mappings for us. If we're booting under a
2021 2018 * hypervisor then we went ahead and remapped these devices into
2022 2019 * memory allocated within dboot itself.
2023 2020 */
2024 2021 if (map_debug)
2025 2022 dboot_printf("1:1 map pa=0..1Meg\n");
2026 2023 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
2027 2024 #if defined(__xpv)
2028 2025 map_ma_at_va(start, start, 0);
2029 2026 #else /* __xpv */
2030 2027 map_pa_at_va(start, start, 0);
2031 2028 #endif /* __xpv */
2032 2029 }
2033 2030
2034 2031 #if !defined(__xpv)
2035 2032
2036 2033 for (i = 0; i < memlists_used; ++i) {
2037 2034 start = memlists[i].addr;
2038 2035 end = start + memlists[i].size;
2039 2036
2040 2037 if (map_debug)
2041 2038 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2042 2039 start, end);
2043 2040 while (start < end && start < next_avail_addr) {
2044 2041 map_pa_at_va(start, start, 0);
2045 2042 start += MMU_PAGESIZE;
2046 2043 }
2047 2044 if (start >= next_avail_addr)
2048 2045 break;
2049 2046 }
2050 2047
2051 2048 /*
2052 2049 * Map framebuffer memory as PT_NOCACHE as this is memory from a
2053 2050 * device and therefore must not be cached.
2054 2051 */
2055 2052 if (fb != NULL && fb->framebuffer != 0) {
2056 2053 multiboot_tag_framebuffer_t *fb_tagp;
2057 2054 fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t)
2058 2055 fb->framebuffer;
2059 2056
2060 2057 start = fb_tagp->framebuffer_common.framebuffer_addr;
2061 2058 end = start + fb_tagp->framebuffer_common.framebuffer_height *
2062 2059 fb_tagp->framebuffer_common.framebuffer_pitch;
2063 2060
2064 2061 if (map_debug)
2065 2062 dboot_printf("FB 1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2066 2063 start, end);
2067 2064 pte_bits |= PT_NOCACHE;
2068 2065 if (PAT_support != 0)
2069 2066 pte_bits |= PT_PAT_4K;
2070 2067
2071 2068 while (start < end) {
2072 2069 map_pa_at_va(start, start, 0);
2073 2070 start += MMU_PAGESIZE;
2074 2071 }
2075 2072 pte_bits &= ~PT_NOCACHE;
2076 2073 if (PAT_support != 0)
2077 2074 pte_bits &= ~PT_PAT_4K;
2078 2075 }
2079 2076 #endif /* !__xpv */
2080 2077
2081 2078 DBG_MSG("\nPage tables constructed\n");
2082 2079 }
2083 2080
2084 2081 #define NO_MULTIBOOT \
2085 2082 "multiboot is no longer used to boot the Solaris Operating System.\n\
2086 2083 The grub entry should be changed to:\n\
2087 2084 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
2088 2085 module$ /platform/i86pc/$ISADIR/boot_archive\n\
2089 2086 See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
2090 2087
2091 2088 static void
2092 2089 dboot_init_xboot_consinfo(void)
2093 2090 {
2094 2091 bi = &boot_info;
2095 2092
2096 2093 #if !defined(__xpv)
2097 2094 fb = &framebuffer;
2098 2095 bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb;
2099 2096
2100 2097 switch (multiboot_version) {
2101 2098 case 1:
2102 2099 dboot_multiboot1_xboot_consinfo();
2103 2100 break;
2104 2101 case 2:
2105 2102 dboot_multiboot2_xboot_consinfo();
2106 2103 break;
2107 2104 default:
2108 2105 dboot_panic("Unknown multiboot version: %d\n",
2109 2106 multiboot_version);
2110 2107 break;
2111 2108 }
2112 2109 dboot_find_console_modules();
2113 2110 #endif
2114 2111 }
2115 2112
2116 2113 /*
2117 2114 * Set up basic data from the boot loader.
2118 2115 * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
2119 2116 * 32-bit dboot code setup used to set up and start 64-bit kernel.
2120 2117 * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
2121 2118 * start 64-bit illumos kernel.
2122 2119 */
2123 2120 static void
2124 2121 dboot_loader_init(void)
2125 2122 {
2126 2123 #if !defined(__xpv)
2127 2124 mb_info = NULL;
2128 2125 mb2_info = NULL;
2129 2126
2130 2127 switch (mb_magic) {
2131 2128 case MB_BOOTLOADER_MAGIC:
2132 2129 multiboot_version = 1;
2133 2130 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
2134 2131 #if defined(_BOOT_TARGET_amd64)
2135 2132 load_addr = mb_header.load_addr;
2136 2133 #endif
2137 2134 break;
2138 2135
2139 2136 case MULTIBOOT2_BOOTLOADER_MAGIC:
2140 2137 multiboot_version = 2;
2141 2138 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
2142 2139 mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
2143 2140 #if defined(_BOOT_TARGET_amd64)
2144 2141 load_addr = mb2_load_addr;
2145 2142 #endif
2146 2143 break;
2147 2144
2148 2145 default:
2149 2146 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
2150 2147 break;
2151 2148 }
2152 2149 #endif /* !defined(__xpv) */
2153 2150 }
2154 2151
2155 2152 /* Extract the kernel command line from [multi]boot information. */
2156 2153 static char *
2157 2154 dboot_loader_cmdline(void)
2158 2155 {
2159 2156 char *line = NULL;
2160 2157
2161 2158 #if defined(__xpv)
2162 2159 line = (char *)xen_info->cmd_line;
2163 2160 #else /* __xpv */
2164 2161
2165 2162 switch (multiboot_version) {
2166 2163 case 1:
2167 2164 if (mb_info->flags & MB_INFO_CMDLINE)
2168 2165 line = (char *)mb_info->cmdline;
2169 2166 break;
2170 2167
2171 2168 case 2:
2172 2169 line = dboot_multiboot2_cmdline(mb2_info);
2173 2170 break;
2174 2171
2175 2172 default:
2176 2173 dboot_panic("Unknown multiboot version: %d\n",
2177 2174 multiboot_version);
2178 2175 break;
2179 2176 }
2180 2177
2181 2178 #endif /* __xpv */
2182 2179
2183 2180 /*
2184 2181 * Make sure we have valid pointer so the string operations
2185 2182 * will not crash us.
2186 2183 */
2187 2184 if (line == NULL)
2188 2185 line = "";
2189 2186
2190 2187 return (line);
2191 2188 }
2192 2189
2193 2190 static char *
2194 2191 dboot_loader_name(void)
2195 2192 {
2196 2193 #if defined(__xpv)
2197 2194 return (NULL);
2198 2195 #else /* __xpv */
2199 2196 multiboot_tag_string_t *tag;
2200 2197
2201 2198 switch (multiboot_version) {
2202 2199 case 1:
2203 2200 return ((char *)(uintptr_t)mb_info->boot_loader_name);
2204 2201
2205 2202 case 2:
2206 2203 tag = dboot_multiboot2_find_tag(mb2_info,
2207 2204 MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
2208 2205 return (tag->mb_string);
2209 2206 default:
2210 2207 dboot_panic("Unknown multiboot version: %d\n",
2211 2208 multiboot_version);
2212 2209 break;
2213 2210 }
2214 2211
2215 2212 return (NULL);
2216 2213 #endif /* __xpv */
2217 2214 }
2218 2215
2219 2216 /*
2220 2217 * startup_kernel has a pretty simple job. It builds pagetables which reflect
2221 2218 * 1:1 mappings for all memory in use. It then also adds mappings for
2222 2219 * the kernel nucleus at virtual address of target_kernel_text using large page
2223 2220 * mappings. The page table pages are also accessible at 1:1 mapped
2224 2221 * virtual addresses.
2225 2222 */
2226 2223 /*ARGSUSED*/
2227 2224 void
2228 2225 startup_kernel(void)
2229 2226 {
2230 2227 char *cmdline;
2231 2228 char *bootloader;
2232 2229 #if defined(__xpv)
2233 2230 physdev_set_iopl_t set_iopl;
2234 2231 #endif /* __xpv */
2235 2232
2236 2233 if (dboot_debug == 1)
2237 2234 bcons_init(NULL); /* Set very early console to ttya. */
2238 2235 dboot_loader_init();
2239 2236 /*
2240 2237 * At this point we are executing in a 32 bit real mode.
2241 2238 */
2242 2239
2243 2240 bootloader = dboot_loader_name();
2244 2241 cmdline = dboot_loader_cmdline();
2245 2242
2246 2243 #if defined(__xpv)
2247 2244 /*
2248 2245 * For dom0, before we initialize the console subsystem we'll
2249 2246 * need to enable io operations, so set I/O priveldge level to 1.
2250 2247 */
2251 2248 if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2252 2249 set_iopl.iopl = 1;
2253 2250 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
2254 2251 }
2255 2252 #endif /* __xpv */
2256 2253
2257 2254 dboot_init_xboot_consinfo();
2258 2255 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
2259 2256 bcons_init(bi); /* Now we can set the real console. */
2260 2257
2261 2258 prom_debug = (find_boot_prop("prom_debug") != NULL);
2262 2259 map_debug = (find_boot_prop("map_debug") != NULL);
2263 2260
2264 2261 #if !defined(__xpv)
2265 2262 dboot_multiboot_get_fwtables();
2266 2263 #endif
2267 2264 DBG_MSG("\n\nillumos prekernel set: ");
2268 2265 DBG_MSG(cmdline);
2269 2266 DBG_MSG("\n");
2270 2267
2271 2268 if (bootloader != NULL && prom_debug) {
2272 2269 dboot_printf("Kernel loaded by: %s\n", bootloader);
2273 2270 #if !defined(__xpv)
2274 2271 dboot_printf("Using multiboot %d boot protocol.\n",
2275 2272 multiboot_version);
2276 2273 #endif
2277 2274 }
2278 2275
2279 2276 if (strstr(cmdline, "multiboot") != NULL) {
↓ open down ↓ |
492 lines elided |
↑ open up ↑ |
2280 2277 dboot_panic(NO_MULTIBOOT);
2281 2278 }
2282 2279
2283 2280 DBG((uintptr_t)bi);
2284 2281 #if !defined(__xpv)
2285 2282 DBG((uintptr_t)mb_info);
2286 2283 DBG((uintptr_t)mb2_info);
2287 2284 if (mb2_info != NULL)
2288 2285 DBG(mb2_info->mbi_total_size);
2289 2286 DBG(bi->bi_acpi_rsdp);
2287 + DBG(bi->bi_acpi_rsdp_copy);
2290 2288 DBG(bi->bi_smbios);
2291 2289 DBG(bi->bi_uefi_arch);
2292 2290 DBG(bi->bi_uefi_systab);
2293 2291
2294 2292 if (bi->bi_uefi_systab && prom_debug) {
2295 2293 if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) {
2296 2294 print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
2297 2295 bi->bi_uefi_systab);
2298 2296 } else {
2299 2297 print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
2300 2298 bi->bi_uefi_systab);
2301 2299 }
2302 2300 }
2303 2301 #endif
2304 2302
2305 2303 /*
2306 2304 * Need correct target_kernel_text value
2307 2305 */
2308 2306 #if defined(_BOOT_TARGET_amd64)
2309 2307 target_kernel_text = KERNEL_TEXT_amd64;
2310 2308 #elif defined(__xpv)
2311 2309 target_kernel_text = KERNEL_TEXT_i386_xpv;
2312 2310 #else
2313 2311 target_kernel_text = KERNEL_TEXT_i386;
2314 2312 #endif
2315 2313 DBG(target_kernel_text);
2316 2314
2317 2315 #if defined(__xpv)
2318 2316
2319 2317 /*
2320 2318 * XXPV Derive this stuff from CPUID / what the hypervisor has enabled
2321 2319 */
2322 2320
2323 2321 #if defined(_BOOT_TARGET_amd64)
2324 2322 /*
2325 2323 * 64-bit hypervisor.
2326 2324 */
2327 2325 amd64_support = 1;
2328 2326 pae_support = 1;
2329 2327
2330 2328 #else /* _BOOT_TARGET_amd64 */
2331 2329
2332 2330 /*
2333 2331 * See if we are running on a PAE Hypervisor
2334 2332 */
2335 2333 {
2336 2334 xen_capabilities_info_t caps;
2337 2335
2338 2336 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2339 2337 dboot_panic("HYPERVISOR_xen_version(caps) failed");
2340 2338 caps[sizeof (caps) - 1] = 0;
2341 2339 if (prom_debug)
2342 2340 dboot_printf("xen capabilities %s\n", caps);
2343 2341 if (strstr(caps, "x86_32p") != NULL)
2344 2342 pae_support = 1;
2345 2343 }
2346 2344
2347 2345 #endif /* _BOOT_TARGET_amd64 */
2348 2346 {
2349 2347 xen_platform_parameters_t p;
2350 2348
2351 2349 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2352 2350 dboot_panic("HYPERVISOR_xen_version(parms) failed");
2353 2351 DBG(p.virt_start);
2354 2352 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2355 2353 }
2356 2354
2357 2355 /*
2358 2356 * The hypervisor loads stuff starting at 1Gig
2359 2357 */
2360 2358 mfn_base = ONE_GIG;
2361 2359 DBG(mfn_base);
2362 2360
2363 2361 /*
2364 2362 * enable writable page table mode for the hypervisor
2365 2363 */
2366 2364 if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2367 2365 VMASST_TYPE_writable_pagetables) < 0)
2368 2366 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2369 2367
2370 2368 /*
2371 2369 * check for NX support
2372 2370 */
2373 2371 if (pae_support) {
2374 2372 uint32_t eax = 0x80000000;
2375 2373 uint32_t edx = get_cpuid_edx(&eax);
2376 2374
2377 2375 if (eax >= 0x80000001) {
2378 2376 eax = 0x80000001;
2379 2377 edx = get_cpuid_edx(&eax);
2380 2378 if (edx & CPUID_AMD_EDX_NX)
2381 2379 NX_support = 1;
2382 2380 }
2383 2381 }
2384 2382
2385 2383 /*
2386 2384 * check for PAT support
2387 2385 */
2388 2386 {
2389 2387 uint32_t eax = 1;
2390 2388 uint32_t edx = get_cpuid_edx(&eax);
2391 2389
2392 2390 if (edx & CPUID_INTC_EDX_PAT)
2393 2391 PAT_support = 1;
2394 2392 }
2395 2393 #if !defined(_BOOT_TARGET_amd64)
2396 2394
2397 2395 /*
2398 2396 * The 32-bit hypervisor uses segmentation to protect itself from
2399 2397 * guests. This means when a guest attempts to install a flat 4GB
2400 2398 * code or data descriptor the 32-bit hypervisor will protect itself
2401 2399 * by silently shrinking the segment such that if the guest attempts
2402 2400 * any access where the hypervisor lives a #gp fault is generated.
2403 2401 * The problem is that some applications expect a full 4GB flat
2404 2402 * segment for their current thread pointer and will use negative
2405 2403 * offset segment wrap around to access data. TLS support in linux
2406 2404 * brand is one example of this.
2407 2405 *
2408 2406 * The 32-bit hypervisor can catch the #gp fault in these cases
2409 2407 * and emulate the access without passing the #gp fault to the guest
2410 2408 * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2411 2409 * Seems like this should have been the default.
2412 2410 * Either way, we want the hypervisor -- and not Solaris -- to deal
2413 2411 * to deal with emulating these accesses.
2414 2412 */
2415 2413 if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2416 2414 VMASST_TYPE_4gb_segments) < 0)
2417 2415 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2418 2416 #endif /* !_BOOT_TARGET_amd64 */
2419 2417
2420 2418 #else /* __xpv */
2421 2419
2422 2420 /*
2423 2421 * use cpuid to enable MMU features
2424 2422 */
2425 2423 if (have_cpuid()) {
2426 2424 uint32_t eax, edx;
2427 2425
2428 2426 eax = 1;
2429 2427 edx = get_cpuid_edx(&eax);
2430 2428 if (edx & CPUID_INTC_EDX_PSE)
2431 2429 largepage_support = 1;
2432 2430 if (edx & CPUID_INTC_EDX_PGE)
2433 2431 pge_support = 1;
2434 2432 if (edx & CPUID_INTC_EDX_PAE)
2435 2433 pae_support = 1;
2436 2434 if (edx & CPUID_INTC_EDX_PAT)
2437 2435 PAT_support = 1;
2438 2436
2439 2437 eax = 0x80000000;
2440 2438 edx = get_cpuid_edx(&eax);
2441 2439 if (eax >= 0x80000001) {
2442 2440 eax = 0x80000001;
2443 2441 edx = get_cpuid_edx(&eax);
2444 2442 if (edx & CPUID_AMD_EDX_LM)
2445 2443 amd64_support = 1;
2446 2444 if (edx & CPUID_AMD_EDX_NX)
2447 2445 NX_support = 1;
2448 2446 }
2449 2447 } else {
2450 2448 dboot_printf("cpuid not supported\n");
2451 2449 }
2452 2450 #endif /* __xpv */
2453 2451
2454 2452
2455 2453 #if defined(_BOOT_TARGET_amd64)
2456 2454 if (amd64_support == 0)
2457 2455 dboot_panic("long mode not supported, rebooting");
2458 2456 else if (pae_support == 0)
2459 2457 dboot_panic("long mode, but no PAE; rebooting");
2460 2458 #else
2461 2459 /*
2462 2460 * Allow the command line to over-ride use of PAE for 32 bit.
2463 2461 */
2464 2462 if (strstr(cmdline, "disablePAE=true") != NULL) {
2465 2463 pae_support = 0;
2466 2464 NX_support = 0;
2467 2465 amd64_support = 0;
2468 2466 }
2469 2467 #endif
2470 2468
2471 2469 /*
2472 2470 * initialize the simple memory allocator
2473 2471 */
2474 2472 init_mem_alloc();
2475 2473
2476 2474 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2477 2475 /*
2478 2476 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2479 2477 */
2480 2478 if (max_mem < FOUR_GIG && NX_support == 0)
2481 2479 pae_support = 0;
2482 2480 #endif
2483 2481
2484 2482 /*
2485 2483 * configure mmu information
2486 2484 */
2487 2485 if (pae_support) {
2488 2486 shift_amt = shift_amt_pae;
2489 2487 ptes_per_table = 512;
2490 2488 pte_size = 8;
2491 2489 lpagesize = TWO_MEG;
2492 2490 #if defined(_BOOT_TARGET_amd64)
2493 2491 top_level = 3;
2494 2492 #else
2495 2493 top_level = 2;
2496 2494 #endif
2497 2495 } else {
2498 2496 pae_support = 0;
2499 2497 NX_support = 0;
2500 2498 shift_amt = shift_amt_nopae;
2501 2499 ptes_per_table = 1024;
2502 2500 pte_size = 4;
2503 2501 lpagesize = FOUR_MEG;
2504 2502 top_level = 1;
2505 2503 }
2506 2504
2507 2505 DBG(PAT_support);
2508 2506 DBG(pge_support);
2509 2507 DBG(NX_support);
2510 2508 DBG(largepage_support);
2511 2509 DBG(amd64_support);
2512 2510 DBG(top_level);
2513 2511 DBG(pte_size);
2514 2512 DBG(ptes_per_table);
2515 2513 DBG(lpagesize);
2516 2514
2517 2515 #if defined(__xpv)
2518 2516 ktext_phys = ONE_GIG; /* from UNIX Mapfile */
2519 2517 #else
2520 2518 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */
2521 2519 #endif
2522 2520
2523 2521 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2524 2522 /*
2525 2523 * For grub, copy kernel bits from the ELF64 file to final place.
2526 2524 */
2527 2525 DBG_MSG("\nAllocating nucleus pages.\n");
2528 2526 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2529 2527
2530 2528 if (ktext_phys == 0)
2531 2529 dboot_panic("failed to allocate aligned kernel memory");
2532 2530 DBG(load_addr);
2533 2531 if (dboot_elfload64(load_addr) != 0)
2534 2532 dboot_panic("failed to parse kernel ELF image, rebooting");
2535 2533 #endif
2536 2534
2537 2535 DBG(ktext_phys);
2538 2536
2539 2537 /*
2540 2538 * Allocate page tables.
2541 2539 */
2542 2540 build_page_tables();
2543 2541
2544 2542 /*
2545 2543 * return to assembly code to switch to running kernel
2546 2544 */
2547 2545 entry_addr_low = (uint32_t)target_kernel_text;
2548 2546 DBG(entry_addr_low);
2549 2547 bi->bi_use_largepage = largepage_support;
2550 2548 bi->bi_use_pae = pae_support;
2551 2549 bi->bi_use_pge = pge_support;
2552 2550 bi->bi_use_nx = NX_support;
2553 2551
2554 2552 #if defined(__xpv)
2555 2553
2556 2554 bi->bi_next_paddr = next_avail_addr - mfn_base;
2557 2555 DBG(bi->bi_next_paddr);
2558 2556 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2559 2557 DBG(bi->bi_next_vaddr);
2560 2558
2561 2559 /*
2562 2560 * unmap unused pages in start area to make them available for DMA
2563 2561 */
2564 2562 while (next_avail_addr < scratch_end) {
2565 2563 (void) HYPERVISOR_update_va_mapping(next_avail_addr,
2566 2564 0, UVMF_INVLPG | UVMF_LOCAL);
2567 2565 next_avail_addr += MMU_PAGESIZE;
2568 2566 }
2569 2567
2570 2568 bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info;
2571 2569 DBG((uintptr_t)HYPERVISOR_shared_info);
2572 2570 bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2573 2571 bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2574 2572
2575 2573 #else /* __xpv */
2576 2574
2577 2575 bi->bi_next_paddr = next_avail_addr;
2578 2576 DBG(bi->bi_next_paddr);
2579 2577 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2580 2578 DBG(bi->bi_next_vaddr);
2581 2579 bi->bi_mb_version = multiboot_version;
2582 2580
2583 2581 switch (multiboot_version) {
2584 2582 case 1:
2585 2583 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info;
2586 2584 break;
2587 2585 case 2:
2588 2586 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info;
2589 2587 break;
2590 2588 default:
2591 2589 dboot_panic("Unknown multiboot version: %d\n",
2592 2590 multiboot_version);
2593 2591 break;
2594 2592 }
2595 2593 bi->bi_top_page_table = (uintptr_t)top_page_table;
2596 2594
2597 2595 #endif /* __xpv */
2598 2596
2599 2597 bi->bi_kseg_size = FOUR_MEG;
2600 2598 DBG(bi->bi_kseg_size);
2601 2599
2602 2600 #ifndef __xpv
2603 2601 if (map_debug)
2604 2602 dump_tables();
2605 2603 #endif
2606 2604
2607 2605 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2608 2606
2609 2607 #ifndef __xpv
2610 2608 /* Update boot info with FB data */
2611 2609 fb->cursor.origin.x = fb_info.cursor.origin.x;
2612 2610 fb->cursor.origin.y = fb_info.cursor.origin.y;
2613 2611 fb->cursor.pos.x = fb_info.cursor.pos.x;
2614 2612 fb->cursor.pos.y = fb_info.cursor.pos.y;
2615 2613 fb->cursor.visible = fb_info.cursor.visible;
2616 2614 #endif
2617 2615 }
↓ open down ↓ |
318 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX