1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2017 Toomas Soome <tsoome@me.com>
14 * Copyright 2019, Joyent, Inc.
15 */
16
17 /*
18 * This module adds support for loading and booting illumos multiboot2
19 * kernel. This code is only built to support the illumos kernel, it does
20 * not support xen.
21 */
22
23 #include <sys/cdefs.h>
24 #include <sys/stddef.h>
25
26 #include <sys/param.h>
27 #include <sys/exec.h>
28 #include <sys/linker.h>
29 #include <sys/module.h>
30 #include <sys/stdint.h>
31 #include <sys/multiboot2.h>
32 #include <stand.h>
33 #include <stdbool.h>
34 #include <machine/elf.h>
35 #include "libzfs.h"
36
37 #include "bootstrap.h"
38 #include <sys/consplat.h>
39
40 #include <machine/metadata.h>
41 #include <machine/pc/bios.h>
42
43 #define SUPPORT_DHCP
44 #include <bootp.h>
45
46 #if !defined(EFI)
47 #include "../i386/btx/lib/btxv86.h"
48 #include "libi386.h"
49 #include "vbe.h"
50
51 #else
52 #include <efi.h>
53 #include <efilib.h>
54 #include "loader_efi.h"
55
56 static void (*trampoline)(uint32_t, struct relocator *, uint64_t);
57 #endif
58
59 #include "platform/acfreebsd.h"
60 #include "acconfig.h"
61 #define ACPI_SYSTEM_XFACE
62 #include "actypes.h"
63 #include "actbl.h"
64
65 extern ACPI_TABLE_RSDP *rsdp;
66
67 /* MB data heap pointer. */
68 static vm_offset_t last_addr;
69
70 static int multiboot2_loadfile(char *, uint64_t, struct preloaded_file **);
71 static int multiboot2_exec(struct preloaded_file *);
72
73 struct file_format multiboot2 = { multiboot2_loadfile, multiboot2_exec };
74 static bool keep_bs = false;
75 static bool have_framebuffer = false;
76 static vm_offset_t load_addr;
77 static vm_offset_t entry_addr;
78
79 /*
80 * Validate tags in info request. This function is provided just to
81 * recognize the current tag list and only serves as a limited
82 * safe guard against possibly corrupt information.
83 */
84 static bool
85 is_info_request_valid(multiboot_header_tag_information_request_t *rtag)
86 {
87 int i;
88
89 /*
90 * If the tag is optional and we do not support it, we do not
91 * have to do anything special, so we skip optional tags.
92 */
93 if (rtag->mbh_flags & MULTIBOOT_HEADER_TAG_OPTIONAL)
94 return (true);
95
96 for (i = 0; i < (rtag->mbh_size - sizeof (*rtag)) /
97 sizeof (rtag->mbh_requests[0]); i++)
98 switch (rtag->mbh_requests[i]) {
99 case MULTIBOOT_TAG_TYPE_END:
100 case MULTIBOOT_TAG_TYPE_CMDLINE:
101 case MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME:
102 case MULTIBOOT_TAG_TYPE_MODULE:
103 case MULTIBOOT_TAG_TYPE_BASIC_MEMINFO:
104 case MULTIBOOT_TAG_TYPE_BOOTDEV:
105 case MULTIBOOT_TAG_TYPE_MMAP:
106 case MULTIBOOT_TAG_TYPE_FRAMEBUFFER:
107 case MULTIBOOT_TAG_TYPE_VBE:
108 case MULTIBOOT_TAG_TYPE_ELF_SECTIONS:
109 case MULTIBOOT_TAG_TYPE_APM:
110 case MULTIBOOT_TAG_TYPE_EFI32:
111 case MULTIBOOT_TAG_TYPE_EFI64:
112 case MULTIBOOT_TAG_TYPE_ACPI_OLD:
113 case MULTIBOOT_TAG_TYPE_ACPI_NEW:
114 case MULTIBOOT_TAG_TYPE_NETWORK:
115 case MULTIBOOT_TAG_TYPE_EFI_MMAP:
116 case MULTIBOOT_TAG_TYPE_EFI_BS:
117 case MULTIBOOT_TAG_TYPE_EFI32_IH:
118 case MULTIBOOT_TAG_TYPE_EFI64_IH:
119 case MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR:
120 break;
121 default:
122 printf("unsupported information tag: 0x%x\n",
123 rtag->mbh_requests[i]);
124 return (false);
125 }
126 return (true);
127 }
128
129 static int
130 multiboot2_loadfile(char *filename, uint64_t dest,
131 struct preloaded_file **result)
132 {
133 int fd, error;
134 uint32_t i;
135 struct stat st;
136 caddr_t header_search;
137 multiboot2_header_t *header;
138 multiboot_header_tag_t *tag;
139 multiboot_header_tag_address_t *addr_tag = NULL;
140 multiboot_header_tag_entry_address_t *entry_tag = NULL;
141 struct preloaded_file *fp;
142
143 /* This allows to check other file formats from file_formats array. */
144 error = EFTYPE;
145 if (filename == NULL)
146 return (error);
147
148 /* is kernel already loaded? */
149 fp = file_findfile(NULL, NULL);
150 if (fp != NULL)
151 return (error);
152
153 if ((fd = open(filename, O_RDONLY)) == -1)
154 return (errno);
155
156 /*
157 * Read MULTIBOOT_SEARCH size in order to search for the
158 * multiboot magic header.
159 */
160 header_search = malloc(MULTIBOOT_SEARCH);
161 if (header_search == NULL) {
162 close(fd);
163 return (ENOMEM);
164 }
165
166 if (read(fd, header_search, MULTIBOOT_SEARCH) != MULTIBOOT_SEARCH)
167 goto out;
168
169 header = NULL;
170 for (i = 0; i <= (MULTIBOOT_SEARCH - sizeof (multiboot2_header_t));
171 i += MULTIBOOT_HEADER_ALIGN) {
172 header = (multiboot2_header_t *)(header_search + i);
173
174 /* Do we have match on magic? */
175 if (header->mb2_magic != MULTIBOOT2_HEADER_MAGIC) {
176 header = NULL;
177 continue;
178 }
179 /*
180 * Validate checksum, the sum of magic + architecture +
181 * header_length + checksum must equal 0.
182 */
183 if (header->mb2_magic + header->mb2_architecture +
184 header->mb2_header_length + header->mb2_checksum != 0) {
185 header = NULL;
186 continue;
187 }
188 /*
189 * Finally, the entire header must fit within MULTIBOOT_SEARCH.
190 */
191 if (i + header->mb2_header_length > MULTIBOOT_SEARCH) {
192 header = NULL;
193 continue;
194 }
195 break;
196 }
197
198 if (header == NULL)
199 goto out;
200
201 have_framebuffer = false;
202 for (tag = header->mb2_tags; tag->mbh_type != MULTIBOOT_TAG_TYPE_END;
203 tag = (multiboot_header_tag_t *)((uintptr_t)tag +
204 roundup2(tag->mbh_size, MULTIBOOT_TAG_ALIGN))) {
205 switch (tag->mbh_type) {
206 case MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST:
207 if (is_info_request_valid((void*)tag) == false)
208 goto out;
209 break;
210 case MULTIBOOT_HEADER_TAG_ADDRESS:
211 addr_tag = (multiboot_header_tag_address_t *)tag;
212 break;
213 case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS:
214 entry_tag =
215 (multiboot_header_tag_entry_address_t *)tag;
216 break;
217 case MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS:
218 break;
219 case MULTIBOOT_HEADER_TAG_FRAMEBUFFER:
220 have_framebuffer = true;
221 break;
222 case MULTIBOOT_HEADER_TAG_MODULE_ALIGN:
223 /* we always align modules */
224 break;
225 case MULTIBOOT_HEADER_TAG_EFI_BS:
226 keep_bs = true;
227 break;
228 default:
229 if (!(tag->mbh_flags & MULTIBOOT_HEADER_TAG_OPTIONAL)) {
230 printf("unsupported tag: 0x%x\n",
231 tag->mbh_type);
232 goto out;
233 }
234 }
235 }
236
237 /*
238 * We must have addr_tag and entry_tag to load a 64-bit kernel.
239 * If these tags are missing, we either have a 32-bit kernel, or
240 * this is not our kernel at all.
241 */
242 if (addr_tag != NULL && entry_tag != NULL) {
243 fp = file_alloc();
244 if (fp == NULL) {
245 error = ENOMEM;
246 goto out;
247 }
248 if (lseek(fd, 0, SEEK_SET) == -1) {
249 printf("lseek failed\n");
250 error = EIO;
251 file_discard(fp);
252 goto out;
253 }
254 if (fstat(fd, &st) < 0) {
255 printf("fstat failed\n");
256 error = EIO;
257 file_discard(fp);
258 goto out;
259 }
260
261 load_addr = addr_tag->mbh_load_addr;
262 entry_addr = entry_tag->mbh_entry_addr;
263 fp->f_addr = archsw.arch_loadaddr(LOAD_KERN, filename,
264 addr_tag->mbh_load_addr);
265 if (fp->f_addr == 0) {
266 error = ENOMEM;
267 file_discard(fp);
268 goto out;
269 }
270 fp->f_size = archsw.arch_readin(fd, fp->f_addr, st.st_size);
271
272 if (fp->f_size != st.st_size) {
273 printf("error reading: %s", strerror(errno));
274 file_discard(fp);
275 error = EIO;
276 goto out;
277 }
278
279 fp->f_name = strdup(filename);
280 fp->f_type = strdup("aout multiboot2 kernel");
281 if (fp->f_name == NULL || fp->f_type == NULL) {
282 error = ENOMEM;
283 file_discard(fp);
284 goto out;
285 }
286
287 fp->f_metadata = NULL;
288 error = 0;
289 } else {
290 #if defined(EFI)
291 /* 32-bit kernel is not yet supported for EFI */
292 printf("32-bit kernel is not supported by UEFI loader\n");
293 error = ENOTSUP;
294 goto out;
295 #endif
296 /* elf32_loadfile_raw will fill the attributes in fp. */
297 error = elf32_loadfile_raw(filename, dest, &fp, 2);
298 if (error != 0) {
299 printf("elf32_loadfile_raw failed: %d unable to "
300 "load multiboot2 kernel\n", error);
301 goto out;
302 }
303 entry_addr = fp->f_addr;
304 /*
305 * We want the load_addr to have some legal value,
306 * so we set it same as the entry_addr.
307 * The distinction is important with UEFI, but not
308 * with BIOS version, because BIOS version does not use
309 * staging area.
310 */
311 load_addr = fp->f_addr;
312 }
313
314 setenv("kernelname", fp->f_name, 1);
315 #if defined(EFI)
316 efi_addsmapdata(fp);
317 #else
318 bios_addsmapdata(fp);
319 #endif
320 *result = fp;
321 out:
322 free(header_search);
323 close(fd);
324 return (error);
325 }
326
327 /*
328 * Search the command line for named property.
329 *
330 * Return codes:
331 * 0 The name is found, we return the data in value and len.
332 * ENOENT The name is not found.
333 * EINVAL The provided command line is badly formed.
334 */
335 static int
336 find_property_value(const char *cmd, const char *name, const char **value,
337 size_t *len)
338 {
339 const char *namep, *valuep;
340 size_t name_len, value_len;
341 int quoted;
342
343 *value = NULL;
344 *len = 0;
345
346 if (cmd == NULL)
347 return (ENOENT);
348
349 while (*cmd != '\0') {
350 if (cmd[0] != '-' || cmd[1] != 'B') {
351 cmd++;
352 continue;
353 }
354 cmd += 2; /* Skip -B */
355 while (cmd[0] == ' ' || cmd[0] == '\t')
356 cmd++; /* Skip whitespaces. */
357 while (*cmd != '\0' && cmd[0] != ' ' && cmd[0] != '\t') {
358 namep = cmd;
359 valuep = strchr(cmd, '=');
360 if (valuep == NULL)
361 break;
362 name_len = valuep - namep;
363 valuep++;
364 value_len = 0;
365 quoted = 0;
366 for (; ; ++value_len) {
367 if (valuep[value_len] == '\0')
368 break;
369
370 /* Is this value quoted? */
371 if (value_len == 0 &&
372 (valuep[0] == '\'' || valuep[0] == '"')) {
373 quoted = valuep[0];
374 ++value_len;
375 }
376
377 /*
378 * In the quote accept any character,
379 * but look for ending quote.
380 */
381 if (quoted != 0) {
382 if (valuep[value_len] == quoted)
383 quoted = 0;
384 continue;
385 }
386
387 /* A comma or white space ends the value. */
388 if (valuep[value_len] == ',' ||
389 valuep[value_len] == ' ' ||
390 valuep[value_len] == '\t')
391 break;
392 }
393 if (quoted != 0) {
394 printf("Missing closing '%c' in \"%s\"\n",
395 quoted, valuep);
396 return (EINVAL);
397 }
398 if (value_len != 0) {
399 if (strncmp(namep, name, name_len) == 0) {
400 *value = valuep;
401 *len = value_len;
402 return (0);
403 }
404 }
405 cmd = valuep + value_len;
406 while (*cmd == ',')
407 cmd++;
408 }
409 }
410 return (ENOENT);
411 }
412
413 /*
414 * If command line has " -B ", insert property after "-B ", otherwise
415 * append to command line.
416 */
417 static char *
418 insert_cmdline(const char *head, const char *prop)
419 {
420 const char *prop_opt = " -B ";
421 char *cmdline, *tail;
422 int len = 0;
423
424 tail = strstr(head, prop_opt);
425 if (tail != NULL) {
426 ptrdiff_t diff;
427 tail += strlen(prop_opt);
428 diff = tail - head;
429 if (diff >= INT_MAX)
430 return (NULL);
431 len = (int)diff;
432 }
433
434 if (tail == NULL)
435 asprintf(&cmdline, "%s%s%s", head, prop_opt, prop);
436 else
437 asprintf(&cmdline, "%.*s%s,%s", len, head, prop, tail);
438
439 return (cmdline);
440 }
441
442 /*
443 * Since we have no way to pass the environment to the mb1 kernel other than
444 * through arguments, we need to take care of console setup.
445 *
446 * If the console is in mirror mode, set the kernel console from $os_console.
447 * If it's unset, use first item from $console.
448 * If $console is "ttyX", also pass $ttyX-mode, since it may have been set by
449 * the user.
450 *
451 * In case of memory allocation errors, just return the original command line
452 * so we have a chance of booting.
453 *
454 * On success, cl will be freed and a new, allocated command line string is
455 * returned.
456 *
457 * For the mb2 kernel, we only set command line console if os_console is set.
458 * We can not overwrite console in the environment, as it can disrupt the
459 * loader console messages, and we do not want to deal with the os_console
460 * in the kernel.
461 */
462 static char *
463 update_cmdline(char *cl, bool mb2)
464 {
465 char *os_console = getenv("os_console");
466 char *ttymode = NULL;
467 char mode[10];
468 char *tmp;
469 const char *prop;
470 size_t plen;
471 int rv;
472
473 if (mb2 == true && os_console == NULL)
474 return (cl);
475
476 if (os_console == NULL) {
477 tmp = strdup(getenv("console"));
478 os_console = strsep(&tmp, ", ");
479 } else {
480 os_console = strdup(os_console);
481 }
482
483 if (os_console == NULL)
484 return (cl);
485
486 if (mb2 == false && strncmp(os_console, "tty", 3) == 0) {
487 snprintf(mode, sizeof (mode), "%s-mode", os_console);
488 /*
489 * The ttyX-mode variable is set by our serial console
490 * driver for ttya-ttyd. However, since the os_console
491 * values are not verified, it is possible we get bogus
492 * name and no mode variable. If so, we do not set console
493 * property and let the kernel use defaults.
494 */
495 if ((ttymode = getenv(mode)) == NULL)
496 return (cl);
497 }
498
499 rv = find_property_value(cl, "console", &prop, &plen);
500 if (rv != 0 && rv != ENOENT) {
501 free(os_console);
502 return (cl);
503 }
504
505 /* If console is set and this is MB2 boot, we are done. */
506 if (rv == 0 && mb2 == true) {
507 free(os_console);
508 return (cl);
509 }
510
511 /* If console is set, do we need to set tty mode? */
512 if (rv == 0) {
513 const char *ttyp = NULL;
514 size_t ttylen;
515
516 free(os_console);
517 os_console = NULL;
518 *mode = '\0';
519 if (strncmp(prop, "tty", 3) == 0 && plen == 4) {
520 strncpy(mode, prop, plen);
521 mode[plen] = '\0';
522 strncat(mode, "-mode", 5);
523 find_property_value(cl, mode, &ttyp, &ttylen);
524 }
525
526 if (*mode != '\0' && ttyp == NULL)
527 ttymode = getenv(mode);
528 else
529 return (cl);
530 }
531
532 /* Build updated command line. */
533 if (os_console != NULL) {
534 char *propstr;
535
536 asprintf(&propstr, "console=%s", os_console);
537 free(os_console);
538 if (propstr == NULL) {
539 return (cl);
540 }
541
542 tmp = insert_cmdline(cl, propstr);
543 free(propstr);
544 if (tmp == NULL)
545 return (cl);
546
547 free(cl);
548 cl = tmp;
549 }
550 if (ttymode != NULL) {
551 char *propstr;
552
553 asprintf(&propstr, "%s=\"%s\"", mode, ttymode);
554 if (propstr == NULL)
555 return (cl);
556
557 tmp = insert_cmdline(cl, propstr);
558 free(propstr);
559 if (tmp == NULL)
560 return (cl);
561 free(cl);
562 cl = tmp;
563 }
564
565 return (cl);
566 }
567
568 /*
569 * Build the kernel command line. Shared function between MB1 and MB2.
570 *
571 * In both cases, if fstype is set and is not zfs, we do not set up
572 * zfs-bootfs property. But we set kernel file name and options.
573 *
574 * For the MB1, we only can pass properties on command line, so
575 * we will set console, ttyX-mode (for serial console) and zfs-bootfs.
576 *
577 * For the MB2, we can pass properties in environment, but if os_console
578 * is set in environment, we need to add console property on the kernel
579 * command line.
580 *
581 * The console properties are managed in update_cmdline().
582 */
583 int
584 mb_kernel_cmdline(struct preloaded_file *fp, struct devdesc *rootdev,
585 char **line)
586 {
587 const char *fs = getenv("fstype");
588 char *cmdline;
589 size_t len;
590 bool zfs_root = false;
591 bool mb2;
592 int rv;
593
594 /*
595 * 64-bit kernel has aout header, 32-bit kernel is elf, and the
596 * type strings are different. Lets just search for "multiboot2".
597 */
598 if (strstr(fp->f_type, "multiboot2") == NULL)
599 mb2 = false;
600 else
601 mb2 = true;
602
603 if (rootdev->d_dev->dv_type == DEVT_ZFS)
604 zfs_root = true;
605
606 /* If we have fstype set in env, reset zfs_root if needed. */
607 if (fs != NULL && strcmp(fs, "zfs") != 0)
608 zfs_root = false;
609
610 /*
611 * If we have fstype set on the command line,
612 * reset zfs_root if needed.
613 */
614 rv = find_property_value(fp->f_args, "fstype", &fs, &len);
615 if (rv != 0 && rv != ENOENT)
616 return (rv);
617
618 if (fs != NULL && strncmp(fs, "zfs", len) != 0)
619 zfs_root = false;
620
621 /* zfs_bootfs() will set the environment, it must be called. */
622 if (zfs_root == true)
623 fs = zfs_bootfs(rootdev);
624
625 if (fp->f_args == NULL)
626 cmdline = strdup(fp->f_name);
627 else
628 asprintf(&cmdline, "%s %s", fp->f_name, fp->f_args);
629
630 if (cmdline == NULL)
631 return (ENOMEM);
632
633 /* Append zfs-bootfs for MB1 command line. */
634 if (mb2 == false && zfs_root == true) {
635 char *tmp;
636
637 tmp = insert_cmdline(cmdline, fs);
638 free(cmdline);
639 if (tmp == NULL)
640 return (ENOMEM);
641 cmdline = tmp;
642 }
643
644 *line = update_cmdline(cmdline, mb2);
645 return (0);
646 }
647
648 /*
649 * Returns allocated virtual address from MB info area.
650 */
651 static vm_offset_t
652 mb_malloc(size_t n)
653 {
654 vm_offset_t ptr = last_addr;
655 last_addr = roundup(last_addr + n, MULTIBOOT_TAG_ALIGN);
656 return (ptr);
657 }
658
659 /*
660 * Calculate size for module tag list.
661 */
662 static size_t
663 module_size(struct preloaded_file *fp)
664 {
665 size_t len, size;
666 struct preloaded_file *mfp;
667
668 size = 0;
669 for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
670 len = strlen(mfp->f_name) + 1;
671 len += strlen(mfp->f_type) + 5 + 1; /* 5 is for "type=" */
672 if (mfp->f_args != NULL)
673 len += strlen(mfp->f_args) + 1;
674 size += sizeof (multiboot_tag_module_t) + len;
675 size = roundup(size, MULTIBOOT_TAG_ALIGN);
676 }
677 return (size);
678 }
679
680 #if defined(EFI)
681 /*
682 * Calculate size for UEFI memory map tag.
683 */
684 static int
685 efimemmap_size(void)
686 {
687 UINTN size, cur_size, desc_size;
688 EFI_MEMORY_DESCRIPTOR *mmap;
689 EFI_STATUS ret;
690
691 size = EFI_PAGE_SIZE; /* Start with 4k. */
692 while (1) {
693 cur_size = size;
694 mmap = malloc(cur_size);
695 if (mmap == NULL)
696 return (0);
697 ret = BS->GetMemoryMap(&cur_size, mmap, NULL, &desc_size, NULL);
698 free(mmap);
699 if (ret == EFI_SUCCESS)
700 break;
701 if (ret == EFI_BUFFER_TOO_SMALL) {
702 if (size < cur_size)
703 size = cur_size;
704 size += (EFI_PAGE_SIZE);
705 } else
706 return (0);
707 }
708
709 /* EFI MMAP will grow when we allocate MBI, set some buffer. */
710 size += (3 << EFI_PAGE_SHIFT);
711 size = roundup(size, desc_size);
712 return (sizeof (multiboot_tag_efi_mmap_t) + size);
713 }
714 #endif
715
716 /*
717 * Calculate size for bios smap tag.
718 */
719 static size_t
720 biossmap_size(struct preloaded_file *fp)
721 {
722 int num;
723 struct file_metadata *md;
724
725 md = file_findmetadata(fp, MODINFOMD_SMAP);
726 if (md == NULL)
727 return (0);
728
729 num = md->md_size / sizeof (struct bios_smap); /* number of entries */
730 return (sizeof (multiboot_tag_mmap_t) +
731 num * sizeof (multiboot_mmap_entry_t));
732 }
733
734 static size_t
735 mbi_size(struct preloaded_file *fp, char *cmdline)
736 {
737 size_t size;
738 #if !defined(EFI)
739 extern multiboot_tag_framebuffer_t gfx_fb;
740 #endif
741
742 size = sizeof (uint32_t) * 2; /* first 2 fields from MBI header */
743 size += sizeof (multiboot_tag_string_t) + strlen(cmdline) + 1;
744 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
745 size += sizeof (multiboot_tag_string_t) + strlen(bootprog_info) + 1;
746 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
747 #if !defined(EFI)
748 size += sizeof (multiboot_tag_basic_meminfo_t);
749 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
750 #endif
751 size += module_size(fp);
752 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
753 #if defined(EFI)
754 size += sizeof (multiboot_tag_efi64_t);
755 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
756 size += efimemmap_size();
757 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
758
759 if (have_framebuffer == true) {
760 size += sizeof (multiboot_tag_framebuffer_t);
761 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
762 }
763 #endif
764
765 size += biossmap_size(fp);
766 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
767
768 #if !defined(EFI)
769 if (gfx_fb.framebuffer_common.framebuffer_type ==
770 MULTIBOOT_FRAMEBUFFER_TYPE_INDEXED) {
771 size += sizeof (struct multiboot_tag_framebuffer_common);
772 size += gfx_fb.u.fb1.framebuffer_palette_num_colors *
773 sizeof (multiboot_color_t);
774 } else {
775 size += sizeof (multiboot_tag_framebuffer_t);
776 }
777 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
778
779 size += sizeof (multiboot_tag_vbe_t);
780 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
781 #endif
782
783 if (bootp_response != NULL) {
784 size += sizeof (multiboot_tag_network_t) + bootp_response_size;
785 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
786 }
787
788 if (rsdp != NULL) {
789 if (rsdp->Revision == 0) {
790 size += sizeof (multiboot_tag_old_acpi_t) +
791 sizeof (ACPI_RSDP_COMMON);
792 } else {
793 size += sizeof (multiboot_tag_new_acpi_t) +
794 rsdp->Length;
795 }
796 size = roundup2(size, MULTIBOOT_TAG_ALIGN);
797 }
798 size += sizeof (multiboot_tag_t);
799
800 return (size);
801 }
802
803 #if defined(EFI)
804 static bool
805 overlaps(uintptr_t start1, size_t size1, uintptr_t start2, size_t size2)
806 {
807 if (start1 < start2 + size2 &&
808 start1 + size1 >= start2) {
809 printf("overlaps: %zx-%zx, %zx-%zx\n",
810 start1, start1 + size1, start2, start2 + size2);
811 return (true);
812 }
813
814 return (false);
815 }
816 #endif
817
818 static int
819 multiboot2_exec(struct preloaded_file *fp)
820 {
821 multiboot2_info_header_t *mbi = NULL;
822 struct preloaded_file *mfp;
823 char *cmdline = NULL;
824 struct devdesc *rootdev;
825 struct file_metadata *md;
826 int i, error, num;
827 int rootfs = 0;
828 size_t size;
829 struct bios_smap *smap;
830 #if defined(EFI)
831 multiboot_tag_module_t *module, *mp;
832 struct relocator *relocator = NULL;
833 EFI_MEMORY_DESCRIPTOR *map;
834 UINTN map_size, desc_size;
835 struct chunk_head *head;
836 struct chunk *chunk;
837 vm_offset_t tmp;
838
839 efi_getdev((void **)(&rootdev), NULL, NULL);
840
841 /*
842 * We need 5 pages for relocation. We'll allocate from the heap: while
843 * it's possible that our heap got placed low down enough to be in the
844 * way of where we're going to relocate our kernel, it's hopefully not
845 * likely.
846 */
847 if ((relocator = malloc(EFI_PAGE_SIZE * 5)) == NULL) {
848 printf("relocator malloc failed!\n");
849 error = ENOMEM;
850 goto error;
851 }
852
853 if (overlaps((uintptr_t)relocator, EFI_PAGE_SIZE * 5,
854 load_addr, fp->f_size)) {
855 printf("relocator pages overlap the kernel!\n");
856 error = EINVAL;
857 goto error;
858 }
859
860 #else
861 i386_getdev((void **)(&rootdev), NULL, NULL);
862
863 if (have_framebuffer == false) {
864 /* make sure we have text mode */
865 bios_set_text_mode(VGA_TEXT_MODE);
866 }
867 #endif
868
869 error = EINVAL;
870 if (rootdev == NULL) {
871 printf("can't determine root device\n");
872 goto error;
873 }
874
875 /*
876 * Set the image command line.
877 */
878 if (fp->f_args == NULL) {
879 cmdline = getenv("boot-args");
880 if (cmdline != NULL) {
881 fp->f_args = strdup(cmdline);
882 if (fp->f_args == NULL) {
883 error = ENOMEM;
884 goto error;
885 }
886 }
887 }
888
889 error = mb_kernel_cmdline(fp, rootdev, &cmdline);
890 if (error != 0)
891 goto error;
892
893 /* mb_kernel_cmdline() updates the environment. */
894 build_environment_module();
895
896 if (have_framebuffer == true) {
897 /* Pass the loaded console font for kernel. */
898 build_font_module();
899 }
900
901 size = mbi_size(fp, cmdline); /* Get the size for MBI. */
902
903 /* Set up the base for mb_malloc. */
904 i = 0;
905 for (mfp = fp; mfp->f_next != NULL; mfp = mfp->f_next)
906 i++;
907
908 #if defined(EFI)
909 /* We need space for kernel + MBI + # modules */
910 num = (EFI_PAGE_SIZE - offsetof(struct relocator, rel_chunklist)) /
911 sizeof (struct chunk);
912 if (i + 2 >= num) {
913 printf("Too many modules, do not have space for relocator.\n");
914 error = ENOMEM;
915 goto error;
916 }
917
918 last_addr = efi_loadaddr(LOAD_MEM, &size, mfp->f_addr + mfp->f_size);
919 mbi = (multiboot2_info_header_t *)last_addr;
920 if (mbi == NULL) {
921 error = ENOMEM;
922 goto error;
923 }
924 last_addr = (vm_offset_t)mbi->mbi_tags;
925 #else
926 /* Start info block from the new page. */
927 last_addr = i386_loadaddr(LOAD_MEM, &size, mfp->f_addr + mfp->f_size);
928
929 /* Do we have space for multiboot info? */
930 if (last_addr + size >= memtop_copyin) {
931 error = ENOMEM;
932 goto error;
933 }
934
935 mbi = (multiboot2_info_header_t *)PTOV(last_addr);
936 last_addr = (vm_offset_t)mbi->mbi_tags;
937 #endif /* EFI */
938
939 {
940 multiboot_tag_string_t *tag;
941 i = sizeof (multiboot_tag_string_t) + strlen(cmdline) + 1;
942 tag = (multiboot_tag_string_t *)mb_malloc(i);
943
944 tag->mb_type = MULTIBOOT_TAG_TYPE_CMDLINE;
945 tag->mb_size = i;
946 memcpy(tag->mb_string, cmdline, strlen(cmdline) + 1);
947 free(cmdline);
948 cmdline = NULL;
949 }
950
951 {
952 multiboot_tag_string_t *tag;
953 i = sizeof (multiboot_tag_string_t) + strlen(bootprog_info) + 1;
954 tag = (multiboot_tag_string_t *)mb_malloc(i);
955
956 tag->mb_type = MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME;
957 tag->mb_size = i;
958 memcpy(tag->mb_string, bootprog_info,
959 strlen(bootprog_info) + 1);
960 }
961
962 #if !defined(EFI)
963 /* Only set in case of BIOS. */
964 {
965 multiboot_tag_basic_meminfo_t *tag;
966 tag = (multiboot_tag_basic_meminfo_t *)
967 mb_malloc(sizeof (*tag));
968
969 tag->mb_type = MULTIBOOT_TAG_TYPE_BASIC_MEMINFO;
970 tag->mb_size = sizeof (*tag);
971 tag->mb_mem_lower = bios_basemem / 1024;
972 tag->mb_mem_upper = bios_extmem / 1024;
973 }
974 #endif
975
976 num = 0;
977 for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
978 num++;
979 if (mfp->f_type != NULL && strcmp(mfp->f_type, "rootfs") == 0)
980 rootfs++;
981 }
982
983 if (num == 0 || rootfs == 0) {
984 /* We need at least one module - rootfs. */
985 printf("No rootfs module provided, aborting\n");
986 error = EINVAL;
987 goto error;
988 }
989
990 /*
991 * Set the stage for physical memory layout:
992 * - We have kernel at load_addr.
993 * - Modules are aligned to page boundary.
994 * - MBI is aligned to page boundary.
995 * - Set the tmp to point to physical address of the first module.
996 * - tmp != mfp->f_addr only in case of EFI.
997 */
998 #if defined(EFI)
999 tmp = roundup2(load_addr + fp->f_size + 1, MULTIBOOT_MOD_ALIGN);
1000 module = (multiboot_tag_module_t *)last_addr;
1001 #endif
1002
1003 for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
1004 multiboot_tag_module_t *tag;
1005
1006 num = strlen(mfp->f_name) + 1;
1007 num += strlen(mfp->f_type) + 5 + 1;
1008 if (mfp->f_args != NULL) {
1009 num += strlen(mfp->f_args) + 1;
1010 }
1011 cmdline = malloc(num);
1012 if (cmdline == NULL) {
1013 error = ENOMEM;
1014 goto error;
1015 }
1016
1017 if (mfp->f_args != NULL)
1018 snprintf(cmdline, num, "%s type=%s %s",
1019 mfp->f_name, mfp->f_type, mfp->f_args);
1020 else
1021 snprintf(cmdline, num, "%s type=%s",
1022 mfp->f_name, mfp->f_type);
1023
1024 tag = (multiboot_tag_module_t *)mb_malloc(sizeof (*tag) + num);
1025
1026 tag->mb_type = MULTIBOOT_TAG_TYPE_MODULE;
1027 tag->mb_size = sizeof (*tag) + num;
1028 #if defined(EFI)
1029 /*
1030 * We can assign module addresses only after BS have been
1031 * switched off.
1032 */
1033 tag->mb_mod_start = 0;
1034 tag->mb_mod_end = mfp->f_size;
1035 #else
1036 tag->mb_mod_start = mfp->f_addr;
1037 tag->mb_mod_end = mfp->f_addr + mfp->f_size;
1038 #endif
1039 memcpy(tag->mb_cmdline, cmdline, num);
1040 free(cmdline);
1041 cmdline = NULL;
1042 }
1043
1044 md = file_findmetadata(fp, MODINFOMD_SMAP);
1045 if (md == NULL) {
1046 printf("no memory smap\n");
1047 error = EINVAL;
1048 goto error;
1049 }
1050
1051 smap = (struct bios_smap *)md->md_data;
1052 num = md->md_size / sizeof (struct bios_smap); /* number of entries */
1053
1054 {
1055 multiboot_tag_mmap_t *tag;
1056 multiboot_mmap_entry_t *mmap_entry;
1057
1058 tag = (multiboot_tag_mmap_t *)
1059 mb_malloc(sizeof (*tag) +
1060 num * sizeof (multiboot_mmap_entry_t));
1061
1062 tag->mb_type = MULTIBOOT_TAG_TYPE_MMAP;
1063 tag->mb_size = sizeof (*tag) +
1064 num * sizeof (multiboot_mmap_entry_t);
1065 tag->mb_entry_size = sizeof (multiboot_mmap_entry_t);
1066 tag->mb_entry_version = 0;
1067 mmap_entry = (multiboot_mmap_entry_t *)tag->mb_entries;
1068
1069 for (i = 0; i < num; i++) {
1070 mmap_entry[i].mmap_addr = smap[i].base;
1071 mmap_entry[i].mmap_len = smap[i].length;
1072 mmap_entry[i].mmap_type = smap[i].type;
1073 mmap_entry[i].mmap_reserved = 0;
1074 }
1075 }
1076
1077 if (bootp_response != NULL) {
1078 multiboot_tag_network_t *tag;
1079 tag = (multiboot_tag_network_t *)
1080 mb_malloc(sizeof (*tag) + bootp_response_size);
1081
1082 tag->mb_type = MULTIBOOT_TAG_TYPE_NETWORK;
1083 tag->mb_size = sizeof (*tag) + bootp_response_size;
1084 memcpy(tag->mb_dhcpack, bootp_response, bootp_response_size);
1085 }
1086
1087 #if !defined(EFI)
1088 multiboot_tag_vbe_t *tag;
1089 extern multiboot_tag_vbe_t vbestate;
1090
1091 if (VBE_VALID_MODE(vbestate.vbe_mode)) {
1092 tag = (multiboot_tag_vbe_t *)mb_malloc(sizeof (*tag));
1093 memcpy(tag, &vbestate, sizeof (*tag));
1094 tag->mb_type = MULTIBOOT_TAG_TYPE_VBE;
1095 tag->mb_size = sizeof (*tag);
1096 }
1097 #endif
1098
1099 if (rsdp != NULL) {
1100 multiboot_tag_new_acpi_t *ntag;
1101 multiboot_tag_old_acpi_t *otag;
1102 uint32_t tsize;
1103
1104 if (rsdp->Revision == 0) {
1105 tsize = sizeof (*otag) + sizeof (ACPI_RSDP_COMMON);
1106 otag = (multiboot_tag_old_acpi_t *)mb_malloc(tsize);
1107 otag->mb_type = MULTIBOOT_TAG_TYPE_ACPI_OLD;
1108 otag->mb_size = tsize;
1109 memcpy(otag->mb_rsdp, rsdp, sizeof (ACPI_RSDP_COMMON));
1110 } else {
1111 tsize = sizeof (*ntag) + rsdp->Length;
1112 ntag = (multiboot_tag_new_acpi_t *)mb_malloc(tsize);
1113 ntag->mb_type = MULTIBOOT_TAG_TYPE_ACPI_NEW;
1114 ntag->mb_size = tsize;
1115 memcpy(ntag->mb_rsdp, rsdp, rsdp->Length);
1116 }
1117 }
1118
1119 #if defined(EFI)
1120 #ifdef __LP64__
1121 {
1122 multiboot_tag_efi64_t *tag;
1123 tag = (multiboot_tag_efi64_t *)
1124 mb_malloc(sizeof (*tag));
1125
1126 tag->mb_type = MULTIBOOT_TAG_TYPE_EFI64;
1127 tag->mb_size = sizeof (*tag);
1128 tag->mb_pointer = (uint64_t)(uintptr_t)ST;
1129 }
1130 #else
1131 {
1132 multiboot_tag_efi32_t *tag;
1133 tag = (multiboot_tag_efi32_t *)
1134 mb_malloc(sizeof (*tag));
1135
1136 tag->mb_type = MULTIBOOT_TAG_TYPE_EFI32;
1137 tag->mb_size = sizeof (*tag);
1138 tag->mb_pointer = (uint32_t)ST;
1139 }
1140 #endif /* __LP64__ */
1141 #endif /* EFI */
1142
1143 if (have_framebuffer == true) {
1144 multiboot_tag_framebuffer_t *tag;
1145 extern multiboot_tag_framebuffer_t gfx_fb;
1146 #if defined(EFI)
1147
1148 tag = (multiboot_tag_framebuffer_t *)mb_malloc(sizeof (*tag));
1149 memcpy(tag, &gfx_fb, sizeof (*tag));
1150 tag->framebuffer_common.mb_type =
1151 MULTIBOOT_TAG_TYPE_FRAMEBUFFER;
1152 tag->framebuffer_common.mb_size = sizeof (*tag);
1153 #else
1154 extern multiboot_color_t *cmap;
1155 uint32_t size;
1156
1157 if (gfx_fb.framebuffer_common.framebuffer_type ==
1158 MULTIBOOT_FRAMEBUFFER_TYPE_INDEXED) {
1159 uint16_t nc;
1160 nc = gfx_fb.u.fb1.framebuffer_palette_num_colors;
1161 size = sizeof (struct multiboot_tag_framebuffer_common)
1162 + sizeof (nc)
1163 + nc * sizeof (multiboot_color_t);
1164 } else {
1165 size = sizeof (gfx_fb);
1166 }
1167
1168 tag = (multiboot_tag_framebuffer_t *)mb_malloc(size);
1169 memcpy(tag, &gfx_fb, sizeof (*tag));
1170
1171 tag->framebuffer_common.mb_type =
1172 MULTIBOOT_TAG_TYPE_FRAMEBUFFER;
1173 tag->framebuffer_common.mb_size = size;
1174
1175 if (gfx_fb.framebuffer_common.framebuffer_type ==
1176 MULTIBOOT_FRAMEBUFFER_TYPE_INDEXED) {
1177 memcpy(tag->u.fb1.framebuffer_palette, cmap,
1178 sizeof (multiboot_color_t) *
1179 gfx_fb.u.fb1.framebuffer_palette_num_colors);
1180 }
1181 #endif /* EFI */
1182 }
1183
1184 #if defined(EFI)
1185 /* Leave EFI memmap last as we will also switch off the BS. */
1186 {
1187 multiboot_tag_efi_mmap_t *tag;
1188 UINTN key;
1189 EFI_STATUS status;
1190
1191 tag = (multiboot_tag_efi_mmap_t *)
1192 mb_malloc(sizeof (*tag));
1193
1194 map_size = 0;
1195 status = BS->GetMemoryMap(&map_size,
1196 (EFI_MEMORY_DESCRIPTOR *)tag->mb_efi_mmap, &key,
1197 &desc_size, &tag->mb_descr_vers);
1198 if (status != EFI_BUFFER_TOO_SMALL) {
1199 error = EINVAL;
1200 goto error;
1201 }
1202 status = BS->GetMemoryMap(&map_size,
1203 (EFI_MEMORY_DESCRIPTOR *)tag->mb_efi_mmap, &key,
1204 &desc_size, &tag->mb_descr_vers);
1205 if (EFI_ERROR(status)) {
1206 error = EINVAL;
1207 goto error;
1208 }
1209 tag->mb_type = MULTIBOOT_TAG_TYPE_EFI_MMAP;
1210 tag->mb_size = sizeof (*tag) + map_size;
1211 tag->mb_descr_size = (uint32_t)desc_size;
1212
1213 map = (EFI_MEMORY_DESCRIPTOR *)tag->mb_efi_mmap;
1214
1215 if (keep_bs == 0) {
1216 status = BS->ExitBootServices(IH, key);
1217 if (EFI_ERROR(status)) {
1218 printf("Call to ExitBootServices failed\n");
1219 error = EINVAL;
1220 goto error;
1221 }
1222 }
1223
1224 last_addr += map_size;
1225 last_addr = roundup2(last_addr, MULTIBOOT_TAG_ALIGN);
1226 }
1227 #endif /* EFI */
1228
1229 /*
1230 * MB tag list end marker.
1231 */
1232 {
1233 multiboot_tag_t *tag = (multiboot_tag_t *)
1234 mb_malloc(sizeof (*tag));
1235 tag->mb_type = MULTIBOOT_TAG_TYPE_END;
1236 tag->mb_size = sizeof (*tag);
1237 }
1238
1239 mbi->mbi_total_size = last_addr - (vm_offset_t)mbi;
1240 mbi->mbi_reserved = 0;
1241
1242 #if defined(EFI)
1243 /*
1244 * At this point we have load_addr pointing to kernel load
1245 * address, module list in MBI having physical addresses,
1246 * module list in fp having logical addresses and tmp pointing to
1247 * physical address for MBI.
1248 * Now we must move all pieces to place and start the kernel.
1249 */
1250 head = &relocator->rel_chunk_head;
1251 STAILQ_INIT(head);
1252
1253 i = 0;
1254 chunk = &relocator->rel_chunklist[i++];
1255 chunk->chunk_vaddr = fp->f_addr;
1256 chunk->chunk_paddr = load_addr;
1257 chunk->chunk_size = fp->f_size;
1258
1259 STAILQ_INSERT_TAIL(head, chunk, chunk_next);
1260
1261 mp = module;
1262 for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
1263 chunk = &relocator->rel_chunklist[i++];
1264 chunk->chunk_vaddr = mfp->f_addr;
1265
1266 /*
1267 * fix the mb_mod_start and mb_mod_end.
1268 */
1269 mp->mb_mod_start = efi_physaddr(module, tmp, map,
1270 map_size / desc_size, desc_size, mp->mb_mod_end);
1271 if (mp->mb_mod_start == 0)
1272 panic("Could not find memory for module");
1273
1274 mp->mb_mod_end += mp->mb_mod_start;
1275 chunk->chunk_paddr = mp->mb_mod_start;
1276 chunk->chunk_size = mfp->f_size;
1277 STAILQ_INSERT_TAIL(head, chunk, chunk_next);
1278
1279 mp = (multiboot_tag_module_t *)
1280 roundup2((uintptr_t)mp + mp->mb_size,
1281 MULTIBOOT_TAG_ALIGN);
1282 }
1283 chunk = &relocator->rel_chunklist[i++];
1284 chunk->chunk_vaddr = (EFI_VIRTUAL_ADDRESS)(uintptr_t)mbi;
1285 chunk->chunk_paddr = efi_physaddr(module, tmp, map,
1286 map_size / desc_size, desc_size, mbi->mbi_total_size);
1287 chunk->chunk_size = mbi->mbi_total_size;
1288 STAILQ_INSERT_TAIL(head, chunk, chunk_next);
1289
1290 trampoline = (void *)(uintptr_t)relocator + EFI_PAGE_SIZE;
1291 memmove(trampoline, multiboot_tramp, EFI_PAGE_SIZE);
1292
1293 relocator->rel_copy = (uintptr_t)trampoline + EFI_PAGE_SIZE;
1294 memmove((void *)relocator->rel_copy, efi_copy_finish, EFI_PAGE_SIZE);
1295
1296 relocator->rel_memmove = (uintptr_t)relocator->rel_copy + EFI_PAGE_SIZE;
1297 memmove((void *)relocator->rel_memmove, memmove, EFI_PAGE_SIZE);
1298 relocator->rel_stack = relocator->rel_memmove + EFI_PAGE_SIZE - 8;
1299
1300 trampoline(MULTIBOOT2_BOOTLOADER_MAGIC, relocator, entry_addr);
1301 #else
1302 dev_cleanup();
1303 __exec((void *)VTOP(multiboot_tramp), MULTIBOOT2_BOOTLOADER_MAGIC,
1304 (void *)entry_addr, (void *)VTOP(mbi));
1305 #endif /* EFI */
1306 panic("exec returned");
1307
1308 error:
1309 free(cmdline);
1310
1311 #if defined(EFI)
1312 free(relocator);
1313
1314 if (mbi != NULL)
1315 efi_free_loadaddr((vm_offset_t)mbi, EFI_SIZE_TO_PAGES(size));
1316 #endif
1317
1318 return (error);
1319 }