1 /*
2  * Copyright 2014, General Dynamics C4 Systems
3  *
4  * SPDX-License-Identifier: GPL-2.0-only
5  */
6 
7 #include <config.h>
8 #include <util.h>
9 #include <hardware.h>
10 #include <machine/io.h>
11 #include <arch/machine.h>
12 #include <arch/kernel/apic.h>
13 #include <arch/kernel/cmdline.h>
14 #include <arch/kernel/boot.h>
15 #include <arch/kernel/boot_sys.h>
16 #include <arch/kernel/smp_sys.h>
17 #include <arch/kernel/vspace.h>
18 #include <arch/kernel/elf.h>
19 #include <smp/lock.h>
20 #include <linker.h>
21 #include <plat/machine/acpi.h>
22 #include <plat/machine/devices.h>
23 #include <plat/machine/pic.h>
24 #include <plat/machine/ioapic.h>
25 #include <sel4/arch/bootinfo_types.h>
26 
27 /* addresses defined in linker script */
28 /* need a fake array to get the pointer from the linker script */
29 
30 /* start/end of CPU boot code */
31 extern char boot_cpu_start[1];
32 extern char boot_cpu_end[1];
33 
34 /* start/end of boot stack */
35 extern char boot_stack_bottom[1];
36 extern char boot_stack_top[1];
37 
38 /* locations in kernel image */
39 extern char ki_skim_start[1];
40 extern char ki_skim_end[1];
41 
42 #ifdef CONFIG_PRINTING
43 /* kernel entry point */
44 extern char _start[1];
45 #endif
46 
47 /* constants */
48 
49 #define HIGHMEM_PADDR 0x100000
50 
51 BOOT_BSS
52 boot_state_t boot_state;
53 
54 /* global variables (not covered by abstract specification) */
55 
56 BOOT_BSS
57 cmdline_opt_t cmdline_opt;
58 
59 /* functions not modeled in abstract specification */
60 
find_load_paddr(paddr_t min_paddr,word_t image_size)61 BOOT_CODE static paddr_t find_load_paddr(paddr_t min_paddr, word_t image_size)
62 {
63     int i;
64 
65     for (i = 0; i < boot_state.mem_p_regs.count; i++) {
66         paddr_t start = MAX(min_paddr, boot_state.mem_p_regs.list[i].start);
67         paddr_t end = boot_state.mem_p_regs.list[i].end;
68         word_t region_size = end - start;
69 
70         if (region_size >= image_size) {
71             return start;
72         }
73     }
74 
75     return 0;
76 }
77 
load_boot_module(word_t boot_module_start,paddr_t load_paddr)78 BOOT_CODE static paddr_t load_boot_module(word_t boot_module_start, paddr_t load_paddr)
79 {
80     v_region_t v_reg;
81     word_t entry;
82     Elf_Header_t *elf_file = (Elf_Header_t *)boot_module_start;
83 
84     if (!elf_checkFile(elf_file)) {
85         printf("Boot module does not contain a valid ELF image\n");
86         return 0;
87     }
88 
89     v_reg = elf_getMemoryBounds(elf_file);
90     entry = elf_file->e_entry;
91 
92     if (v_reg.end == 0) {
93         printf("ELF image in boot module does not contain any segments\n");
94         return 0;
95     }
96     v_reg.end = ROUND_UP(v_reg.end, PAGE_BITS);
97 
98     printf("size=0x%lx v_entry=%p v_start=%p v_end=%p ",
99            v_reg.end - v_reg.start,
100            (void *)entry,
101            (void *)v_reg.start,
102            (void *)v_reg.end
103           );
104 
105     if (!IS_ALIGNED(v_reg.start, PAGE_BITS)) {
106         printf("Userland image virtual start address must be 4KB-aligned\n");
107         return 0;
108     }
109     if (v_reg.end + 2 * BIT(PAGE_BITS) > USER_TOP) {
110         /* for IPC buffer frame and bootinfo frame, need 2*4K of additional userland virtual memory */
111         printf("Userland image virtual end address too high\n");
112         return 0;
113     }
114     if ((entry < v_reg.start) || (entry >= v_reg.end)) {
115         printf("Userland image entry point does not lie within userland image\n");
116         return 0;
117     }
118 
119     load_paddr = find_load_paddr(load_paddr, v_reg.end - v_reg.start);
120     assert(load_paddr);
121 
122     /* fill ui_info struct */
123     boot_state.ui_info.pv_offset = load_paddr - v_reg.start;
124     boot_state.ui_info.p_reg.start = load_paddr;
125     load_paddr += v_reg.end - v_reg.start;
126     boot_state.ui_info.p_reg.end = load_paddr;
127     boot_state.ui_info.v_entry = entry;
128 
129     printf("p_start=0x%lx p_end=0x%lx\n",
130            boot_state.ui_info.p_reg.start,
131            boot_state.ui_info.p_reg.end
132           );
133 
134     /* initialise all initial userland memory and load potentially sparse ELF image */
135     memzero(
136         (void *)boot_state.ui_info.p_reg.start,
137         boot_state.ui_info.p_reg.end - boot_state.ui_info.p_reg.start
138     );
139     elf_load(elf_file, boot_state.ui_info.pv_offset);
140 
141     return load_paddr;
142 }
143 
try_boot_sys_node(cpu_id_t cpu_id)144 static BOOT_CODE bool_t try_boot_sys_node(cpu_id_t cpu_id)
145 {
146     p_region_t boot_mem_reuse_p_reg;
147 
148     if (!map_kernel_window(
149             boot_state.num_ioapic,
150             boot_state.ioapic_paddr,
151             boot_state.num_drhu,
152             boot_state.drhu_list
153         )) {
154         return false;
155     }
156     setCurrentVSpaceRoot(kpptr_to_paddr(X86_KERNEL_VSPACE_ROOT), 0);
157     /* Sync up the compilers view of the world here to force the PD to actually
158      * be set *right now* instead of delayed */
159     asm volatile("" ::: "memory");
160 
161 #ifdef CONFIG_KERNEL_SKIM_WINDOW
162     if (!map_skim_window((vptr_t)ki_skim_start, (vptr_t)ki_skim_end)) {
163         return false;
164     }
165 #endif
166 
167     /* reuse boot code/data memory */
168     boot_mem_reuse_p_reg.start = KERNEL_ELF_PADDR_BASE;
169     boot_mem_reuse_p_reg.end = kpptr_to_paddr(ki_boot_end);
170 
171     /* initialise the CPU */
172     if (!init_cpu(config_set(CONFIG_IRQ_IOAPIC) ? 1 : 0)) {
173         return false;
174     }
175 
176     /* initialise NDKS and kernel heap */
177     if (!init_sys_state(
178             cpu_id,
179             &boot_state.mem_p_regs,
180             boot_state.ui_info,
181             boot_mem_reuse_p_reg,
182             /* parameters below not modeled in abstract specification */
183             boot_state.num_drhu,
184             boot_state.drhu_list,
185             &boot_state.rmrr_list,
186             &boot_state.acpi_rsdp,
187             &boot_state.vbe_info,
188             &boot_state.mb_mmap_info,
189             &boot_state.fb_info
190         )) {
191         return false;
192     }
193 
194     return true;
195 }
196 
add_mem_p_regs(p_region_t reg)197 static BOOT_CODE bool_t add_mem_p_regs(p_region_t reg)
198 {
199     if (reg.end > PADDR_TOP && reg.start > PADDR_TOP) {
200         /* Return true here as it's not an error for there to exist memory outside the kernel window,
201          * we're just going to ignore it and leave it to be given out as device memory */
202         return true;
203     }
204     if (boot_state.mem_p_regs.count == MAX_NUM_FREEMEM_REG) {
205         printf("Dropping memory region 0x%lx-0x%lx, try increasing MAX_NUM_FREEMEM_REG\n", reg.start, reg.end);
206         return false;
207     }
208     if (reg.end > PADDR_TOP) {
209         assert(reg.start <= PADDR_TOP);
210         /* Clamp a region to the top of the kernel window if it extends beyond */
211         reg.end = PADDR_TOP;
212     }
213     printf("Adding physical memory region 0x%lx-0x%lx\n", reg.start, reg.end);
214     boot_state.mem_p_regs.list[boot_state.mem_p_regs.count] = reg;
215     boot_state.mem_p_regs.count++;
216     return true;
217 }
218 
219 /*
220  * the code relies that the GRUB provides correct information
221  * about the actual physical memory regions.
222  */
parse_mem_map(uint32_t mmap_length,uint32_t mmap_addr)223 static BOOT_CODE bool_t parse_mem_map(uint32_t mmap_length, uint32_t mmap_addr)
224 {
225     multiboot_mmap_t *mmap = (multiboot_mmap_t *)((word_t)mmap_addr);
226     printf("Parsing GRUB physical memory map\n");
227 
228     while ((word_t)mmap < (word_t)(mmap_addr + mmap_length)) {
229         uint64_t mem_start = mmap->base_addr;
230         uint64_t mem_length = mmap->length;
231         uint32_t type = mmap->type;
232         if (mem_start != (uint64_t)(word_t)mem_start) {
233             printf("\tPhysical memory region not addressable\n");
234         } else {
235             printf("\tPhysical Memory Region from %lx size %lx type %d\n", (long)mem_start, (long)mem_length, type);
236             if (type == MULTIBOOT_MMAP_USEABLE_TYPE && mem_start >= HIGHMEM_PADDR) {
237                 if (!add_mem_p_regs((p_region_t) {
238                 mem_start, mem_start + mem_length
239             })) {
240                     return false;
241                 }
242             }
243         }
244         mmap++;
245     }
246     return true;
247 }
248 
is_compiled_for_microarchitecture(void)249 static BOOT_CODE bool_t is_compiled_for_microarchitecture(void)
250 {
251     word_t microarch_generation = 0;
252     x86_cpu_identity_t *model_info = x86_cpuid_get_model_info();
253 
254     if (config_set(CONFIG_ARCH_X86_SKYLAKE)) {
255         microarch_generation = 7;
256     } else if (config_set(CONFIG_ARCH_X86_BROADWELL)) {
257         microarch_generation = 6;
258     } else if (config_set(CONFIG_ARCH_X86_HASWELL)) {
259         microarch_generation = 5;
260     } else if (config_set(CONFIG_ARCH_X86_IVY)) {
261         microarch_generation = 4;
262     } else if (config_set(CONFIG_ARCH_X86_SANDY)) {
263         microarch_generation = 3;
264     } else if (config_set(CONFIG_ARCH_X86_WESTMERE)) {
265         microarch_generation = 2;
266     } else if (config_set(CONFIG_ARCH_X86_NEHALEM)) {
267         microarch_generation = 1;
268     }
269 
270     switch (model_info->model) {
271     case SKYLAKE_1_MODEL_ID:
272     case SKYLAKE_2_MODEL_ID:
273         if (microarch_generation > 7) {
274             return false;
275         }
276         break;
277 
278     case BROADWELL_1_MODEL_ID:
279     case BROADWELL_2_MODEL_ID:
280     case BROADWELL_3_MODEL_ID:
281     case BROADWELL_4_MODEL_ID:
282     case BROADWELL_5_MODEL_ID:
283         if (microarch_generation > 6) {
284             return false;
285         }
286         break;
287 
288     case HASWELL_1_MODEL_ID:
289     case HASWELL_2_MODEL_ID:
290     case HASWELL_3_MODEL_ID:
291     case HASWELL_4_MODEL_ID:
292         if (microarch_generation > 5) {
293             return false;
294         }
295         break;
296 
297     case IVY_BRIDGE_1_MODEL_ID:
298     case IVY_BRIDGE_2_MODEL_ID:
299     case IVY_BRIDGE_3_MODEL_ID:
300         if (microarch_generation > 4) {
301             return false;
302         }
303         break;
304 
305     case SANDY_BRIDGE_1_MODEL_ID:
306     case SANDY_BRIDGE_2_MODEL_ID:
307         if (microarch_generation > 3) {
308             return false;
309         }
310         break;
311 
312     case WESTMERE_1_MODEL_ID:
313     case WESTMERE_2_MODEL_ID:
314     case WESTMERE_3_MODEL_ID:
315         if (microarch_generation > 2) {
316             return false;
317         }
318         break;
319 
320     case NEHALEM_1_MODEL_ID:
321     case NEHALEM_2_MODEL_ID:
322     case NEHALEM_3_MODEL_ID:
323         if (microarch_generation > 1) {
324             return false;
325         }
326         break;
327 
328     default:
329         if (!config_set(CONFIG_ARCH_X86_GENERIC)) {
330             return false;
331         }
332     }
333 
334     return true;
335 }
336 
try_boot_sys(void)337 static BOOT_CODE bool_t try_boot_sys(void)
338 {
339     paddr_t mods_end_paddr = boot_state.mods_end_paddr;
340     p_region_t ui_p_regs;
341     paddr_t load_paddr;
342 
343     boot_state.ki_p_reg.start = KERNEL_ELF_PADDR_BASE;
344     boot_state.ki_p_reg.end = kpptr_to_paddr(ki_end);
345 
346     if (!x86_cpuid_initialize()) {
347         printf("Warning: Your x86 CPU has an unsupported vendor, '%s'.\n"
348                "\tYour setup may not be able to competently run seL4 as "
349                "intended.\n"
350                "\tCurrently supported x86 vendors are AMD and Intel.\n",
351                x86_cpuid_get_identity()->vendor_string);
352     }
353 
354     if (!is_compiled_for_microarchitecture()) {
355         printf("Warning: Your kernel was not compiled for the current microarchitecture.\n");
356     }
357 
358     cpuid_007h_edx_t edx;
359     edx.words[0] = x86_cpuid_edx(0x7, 0);
360     /* see if we can definitively say whether or not we need the skim window by
361      * checking whether the CPU is vulnerable to rogue data cache loads (rdcl) */
362     if (cpuid_007h_edx_get_ia32_arch_cap_msr(edx)) {
363         ia32_arch_capabilities_msr_t cap_msr;
364         cap_msr.words[0] = x86_rdmsr(IA32_ARCH_CAPABILITIES_MSR);
365         if (ia32_arch_capabilities_msr_get_rdcl_no(cap_msr) && config_set(CONFIG_KERNEL_SKIM_WINDOW)) {
366             printf("CPU reports not vulnerable to Rogue Data Cache Load (aka Meltdown https://meltdownattack.com) "
367                    "yet SKIM window is enabled. Performance is needlessly being impacted, consider disabling.\n");
368         } else if (!ia32_arch_capabilities_msr_get_rdcl_no(cap_msr) && !config_set(CONFIG_KERNEL_SKIM_WINDOW)) {
369             printf("CPU reports vulnerable to Rogue Data Cache Load (aka Meltdown https://meltdownattack.com) "
370                    "yet SKIM window is *not* enabled. Please re-build with SKIM window enabled.");
371             return false;
372         }
373     } else {
374         /* hardware doesn't tell us directly so guess based on CPU vendor */
375         if (config_set(CONFIG_KERNEL_SKIM_WINDOW) && x86_cpuid_get_identity()->vendor == X86_VENDOR_AMD) {
376             printf("SKIM window for mitigating Meltdown (https://www.meltdownattack.com) "
377                    "not necessary for AMD and performance is being needlessly affected, "
378                    "consider disabling\n");
379         }
380         if (!config_set(CONFIG_KERNEL_SKIM_WINDOW) && x86_cpuid_get_identity()->vendor == X86_VENDOR_INTEL) {
381             printf("***WARNING*** SKIM window not enabled, this machine is probably vulnerable "
382                    "to Meltdown (https://www.meltdownattack.com), consider enabling\n");
383         }
384     }
385 
386 #ifdef ENABLE_SMP_SUPPORT
387     /* copy boot code for APs to lower memory to run in real mode */
388     if (!copy_boot_code_aps(boot_state.mem_lower)) {
389         return false;
390     }
391     /* Initialize any kernel TLS */
392     mode_init_tls(0);
393 #endif /* ENABLE_SMP_SUPPORT */
394 
395     printf("Kernel loaded to: start=0x%lx end=0x%lx size=0x%lx entry=0x%lx\n",
396            boot_state.ki_p_reg.start,
397            boot_state.ki_p_reg.end,
398            boot_state.ki_p_reg.end - boot_state.ki_p_reg.start,
399            (paddr_t)_start
400           );
401 
402     /* remapping legacy IRQs to their correct vectors */
403     pic_remap_irqs(IRQ_INT_OFFSET);
404     if (config_set(CONFIG_IRQ_IOAPIC)) {
405         /* Disable the PIC so that it does not generate any interrupts. We need to
406          * do this *before* we initialize the apic */
407         pic_disable();
408     }
409 
410     /* validate the ACPI table */
411     if (!acpi_validate_rsdp(&boot_state.acpi_rsdp)) {
412         return false;
413     }
414 
415     /* check if kernel configuration matches platform requirments */
416     if (!acpi_fadt_scan(&boot_state.acpi_rsdp)) {
417         return false;
418     }
419 
420     if (!config_set(CONFIG_IOMMU) || cmdline_opt.disable_iommu) {
421         boot_state.num_drhu = 0;
422     } else {
423         /* query available IOMMUs from ACPI */
424         acpi_dmar_scan(
425             &boot_state.acpi_rsdp,
426             boot_state.drhu_list,
427             &boot_state.num_drhu,
428             MAX_NUM_DRHU,
429             &boot_state.rmrr_list
430         );
431     }
432 
433     /* query available CPUs from ACPI */
434     boot_state.num_cpus = acpi_madt_scan(&boot_state.acpi_rsdp, boot_state.cpus, &boot_state.num_ioapic,
435                                          boot_state.ioapic_paddr);
436     if (boot_state.num_cpus == 0) {
437         printf("No CPUs detected\n");
438         return false;
439     }
440 
441     if (config_set(CONFIG_IRQ_IOAPIC)) {
442         if (boot_state.num_ioapic == 0) {
443             printf("No IOAPICs detected\n");
444             return false;
445         }
446     } else {
447         if (boot_state.num_ioapic > 0) {
448             printf("Detected %d IOAPICs, but configured to use PIC instead\n", boot_state.num_ioapic);
449         }
450     }
451 
452     mods_end_paddr = ROUND_UP(mods_end_paddr, PAGE_BITS);
453     assert(mods_end_paddr > boot_state.ki_p_reg.end);
454 
455     printf("ELF-loading userland images from boot modules:\n");
456     load_paddr = mods_end_paddr;
457 
458     load_paddr = load_boot_module(boot_state.boot_module_start, load_paddr);
459     if (!load_paddr) {
460         return false;
461     }
462 
463     /* calculate final location of userland images */
464     ui_p_regs.start = boot_state.ki_p_reg.end;
465     ui_p_regs.end = ui_p_regs.start + load_paddr - mods_end_paddr;
466 
467     printf(
468         "Moving loaded userland images to final location: from=0x%lx to=0x%lx size=0x%lx\n",
469         mods_end_paddr,
470         ui_p_regs.start,
471         ui_p_regs.end - ui_p_regs.start
472     );
473     memcpy((void *)ui_p_regs.start, (void *)mods_end_paddr, ui_p_regs.end - ui_p_regs.start);
474 
475     /* adjust p_reg and pv_offset to final load address */
476     boot_state.ui_info.p_reg.start -= mods_end_paddr - ui_p_regs.start;
477     boot_state.ui_info.p_reg.end   -= mods_end_paddr - ui_p_regs.start;
478     boot_state.ui_info.pv_offset   -= mods_end_paddr - ui_p_regs.start;
479 
480     /* ==== following code corresponds to abstract specification after "select" ==== */
481 
482     if (!platAddDevices()) {
483         return false;
484     }
485 
486     /* Total number of cores we intend to boot */
487     ksNumCPUs = boot_state.num_cpus;
488 
489     printf("Starting node #0 with APIC ID %lu\n", boot_state.cpus[0]);
490     if (!try_boot_sys_node(boot_state.cpus[0])) {
491         return false;
492     }
493 
494     if (config_set(CONFIG_IRQ_IOAPIC)) {
495         ioapic_init(1, boot_state.cpus, boot_state.num_ioapic);
496     }
497 
498     /* initialize BKL before booting up APs */
499     SMP_COND_STATEMENT(clh_lock_init());
500     SMP_COND_STATEMENT(start_boot_aps());
501 
502     /* grab BKL before leaving the kernel */
503     NODE_LOCK_SYS;
504 
505     printf("Booting all finished, dropped to user space\n");
506 
507     return true;
508 }
509 
try_boot_sys_mbi1(multiboot_info_t * mbi)510 static BOOT_CODE bool_t try_boot_sys_mbi1(
511     multiboot_info_t *mbi
512 )
513 {
514     word_t i;
515     multiboot_module_t *modules = (multiboot_module_t *)(word_t)mbi->part1.mod_list;
516 
517     cmdline_parse((const char *)(word_t)mbi->part1.cmdline, &cmdline_opt);
518 
519     if ((mbi->part1.flags & MULTIBOOT_INFO_MEM_FLAG) == 0) {
520         printf("Boot loader did not provide information about physical memory size\n");
521         return false;
522     }
523 
524     if (!(mbi->part1.flags & MULTIBOOT_INFO_MODS_FLAG)) {
525         printf("Boot loader did not provide information about boot modules\n");
526         return false;
527     }
528 
529     printf("Detected %d boot module(s):\n", mbi->part1.mod_count);
530 
531     if (mbi->part1.mod_count < 1) {
532         printf("Expect at least one boot module (containing a userland image)\n");
533         return false;
534     }
535 
536     for (i = 0; i < mbi->part1.mod_count; i++) {
537         printf(
538             "  module #%ld: start=0x%x end=0x%x size=0x%x name='%s'\n",
539             i,
540             modules[i].start,
541             modules[i].end,
542             modules[i].end - modules[i].start,
543             (char *)(long)modules[i].name
544         );
545         if ((sword_t)(modules[i].end - modules[i].start) <= 0) {
546             printf("Invalid boot module size! Possible cause: boot module file not found by QEMU\n");
547             return false;
548         }
549         if (boot_state.mods_end_paddr < modules[i].end) {
550             boot_state.mods_end_paddr = modules[i].end;
551         }
552     }
553 
554     /* initialize the memory. We track two kinds of memory regions. Physical memory
555      * that we will use for the kernel, and physical memory regions that we must
556      * not give to the user. Memory regions that must not be given to the user
557      * include all the physical memory in the kernel window, but also includes any
558      * important or kernel devices. */
559     boot_state.mem_p_regs.count = 0;
560     if (mbi->part1.flags & MULTIBOOT_INFO_MMAP_FLAG) {
561         if (!parse_mem_map(mbi->part2.mmap_length, mbi->part2.mmap_addr)) {
562             return false;
563         }
564         uint32_t multiboot_mmap_length = mbi->part2.mmap_length;
565         if (multiboot_mmap_length > (SEL4_MULTIBOOT_MAX_MMAP_ENTRIES * sizeof(seL4_X86_mb_mmap_t))) {
566             printf("Warning: Multiboot has reported more memory map entries, %zd, "
567                    "than the max amount that will be passed in the bootinfo, %d. "
568                    "These extra regions will still be turned into untyped caps.",
569                    multiboot_mmap_length / sizeof(seL4_X86_mb_mmap_t), SEL4_MULTIBOOT_MAX_MMAP_ENTRIES);
570             multiboot_mmap_length = SEL4_MULTIBOOT_MAX_MMAP_ENTRIES * sizeof(seL4_X86_mb_mmap_t);
571         }
572         memcpy(&boot_state.mb_mmap_info.mmap, (void *)(word_t)mbi->part2.mmap_addr, multiboot_mmap_length);
573         boot_state.mb_mmap_info.mmap_length = multiboot_mmap_length;
574     } else {
575         /* calculate memory the old way */
576         p_region_t avail;
577         avail.start = HIGHMEM_PADDR;
578         avail.end = ROUND_DOWN(avail.start + (mbi->part1.mem_upper << 10), PAGE_BITS);
579         if (!add_mem_p_regs(avail)) {
580             return false;
581         }
582     }
583 
584     /* copy VESA information from multiboot header */
585     if ((mbi->part1.flags & MULTIBOOT_INFO_GRAPHICS_FLAG) == 0) {
586         boot_state.vbe_info.vbeMode = -1;
587         printf("Multiboot gave us no video information\n");
588     } else {
589         boot_state.vbe_info.vbeInfoBlock = *(seL4_VBEInfoBlock_t *)(seL4_Word)mbi->part2.vbe_control_info;
590         boot_state.vbe_info.vbeModeInfoBlock = *(seL4_VBEModeInfoBlock_t *)(seL4_Word)mbi->part2.vbe_mode_info;
591         boot_state.vbe_info.vbeMode = mbi->part2.vbe_mode;
592         printf("Got VBE info in multiboot. Current video mode is %d\n", mbi->part2.vbe_mode);
593         boot_state.vbe_info.vbeInterfaceSeg = mbi->part2.vbe_interface_seg;
594         boot_state.vbe_info.vbeInterfaceOff = mbi->part2.vbe_interface_off;
595         boot_state.vbe_info.vbeInterfaceLen = mbi->part2.vbe_interface_len;
596     }
597 
598     boot_state.mem_lower = mbi->part1.mem_lower;
599     boot_state.boot_module_start = modules->start;
600 
601     /* Initialize ACPI */
602     if (!acpi_init(&boot_state.acpi_rsdp)) {
603         return false;
604     }
605 
606     return true;
607 }
608 
try_boot_sys_mbi2(multiboot2_header_t * mbi2)609 static BOOT_CODE bool_t try_boot_sys_mbi2(
610     multiboot2_header_t *mbi2
611 )
612 {
613     int mod_count                  = 0;
614     multiboot2_tag_t const *tag   = (multiboot2_tag_t *)(mbi2 + 1);
615     multiboot2_tag_t const *tag_e = (multiboot2_tag_t *)((word_t)mbi2 + mbi2->total_size);
616 
617     /* initialize the memory. We track two kinds of memory regions. Physical memory
618      * that we will use for the kernel, and physical memory regions that we must
619      * not give to the user. Memory regions that must not be given to the user
620      * include all the physical memory in the kernel window, but also includes any
621      * important or kernel devices. */
622     boot_state.mem_p_regs.count = 0;
623     boot_state.mb_mmap_info.mmap_length = 0;
624     boot_state.vbe_info.vbeMode = -1;
625 
626     while (tag < tag_e && tag->type != MULTIBOOT2_TAG_END) {
627         word_t const behind_tag = (word_t)tag + sizeof(*tag);
628 
629         if (tag->type == MULTIBOOT2_TAG_CMDLINE) {
630             char const *const cmdline = (char const * const)(behind_tag);
631             cmdline_parse(cmdline, &cmdline_opt);
632         } else if (tag->type == MULTIBOOT2_TAG_ACPI_1) {
633             if (ACPI_V1_SIZE == tag->size - sizeof(*tag)) {
634                 memcpy(&boot_state.acpi_rsdp, (void *)behind_tag, tag->size - sizeof(*tag));
635             }
636         } else if (tag->type == MULTIBOOT2_TAG_ACPI_2) {
637             if (sizeof(boot_state.acpi_rsdp) == tag->size - sizeof(*tag)) {
638                 memcpy(&boot_state.acpi_rsdp, (void *)behind_tag, sizeof(boot_state.acpi_rsdp));
639             }
640         } else if (tag->type == MULTIBOOT2_TAG_MODULE) {
641             multiboot2_module_t const *module = (multiboot2_module_t const *)behind_tag;
642             printf(
643                 "  module #%d: start=0x%x end=0x%x size=0x%x name='%s'\n",
644                 mod_count,
645                 module->start,
646                 module->end,
647                 module->end - module->start,
648                 module->string
649             );
650 
651             if (mod_count == 0) {
652                 boot_state.boot_module_start = module->start;
653             }
654 
655             mod_count ++;
656             if ((sword_t)(module->end - module->start) <= 0) {
657                 printf("Invalid boot module size! Possible cause: boot module file not found\n");
658                 return false;
659             }
660             if (boot_state.mods_end_paddr < module->end) {
661                 boot_state.mods_end_paddr = module->end;
662             }
663         } else if (tag->type == MULTIBOOT2_TAG_MEMORY) {
664             multiboot2_memory_t const *s = (multiboot2_memory_t *)(behind_tag + 8);
665             multiboot2_memory_t const *e = (multiboot2_memory_t *)((word_t)tag + tag->size);
666 
667             for (multiboot2_memory_t const *m = s; m < e; m++) {
668                 if (!m->addr) {
669                     boot_state.mem_lower = m->size;
670                 }
671 
672                 printf("\tPhysical Memory Region from %llx size %llx type %u\n", m->addr, m->size, m->type);
673                 if (m->addr != (uint64_t)(word_t)m->addr) {
674                     printf("\t\tPhysical memory region not addressable\n");
675                 }
676 
677                 if (m->type == MULTIBOOT_MMAP_USEABLE_TYPE && m->addr >= HIGHMEM_PADDR) {
678                     if (!add_mem_p_regs((p_region_t) {
679                     m->addr, m->addr + m->size
680                 }))
681                     return false;
682                 }
683             }
684         } else if (tag->type == MULTIBOOT2_TAG_FB) {
685             multiboot2_fb_t const *fb = (multiboot2_fb_t const *)behind_tag;
686             printf("Got framebuffer info in multiboot2. Current video mode is at physical address=%llx pitch=%u resolution=%ux%u@%u type=%u\n",
687                    fb->addr, fb->pitch, fb->width, fb->height, fb->bpp, fb->type);
688             boot_state.fb_info = *fb;
689         }
690 
691         tag = (multiboot2_tag_t const *)((word_t)tag + ROUND_UP(tag->size, 3));
692     }
693 
694     printf("Detected %d boot module(s):\n", mod_count);
695 
696     if (mod_count < 1) {
697         printf("Expect at least one boot module (containing a userland image)\n");
698         return false;
699     }
700 
701     return true;
702 }
703 
boot_sys(unsigned long multiboot_magic,void * mbi)704 BOOT_CODE VISIBLE void boot_sys(
705     unsigned long multiboot_magic,
706     void *mbi)
707 {
708     bool_t result = false;
709 
710     if (multiboot_magic == MULTIBOOT_MAGIC) {
711         result = try_boot_sys_mbi1(mbi);
712     } else if (multiboot_magic == MULTIBOOT2_MAGIC) {
713         result = try_boot_sys_mbi2(mbi);
714     } else {
715         printf("Boot loader is not multiboot 1 or 2 compliant %lx\n", multiboot_magic);
716     }
717 
718     if (result) {
719         result = try_boot_sys();
720     }
721 
722     if (!result) {
723         fail("boot_sys failed for some reason :(\n");
724     }
725 
726     ARCH_NODE_STATE(x86KScurInterrupt) = int_invalid;
727     ARCH_NODE_STATE(x86KSPendingInterrupt) = int_invalid;
728 
729 #ifdef CONFIG_KERNEL_MCS
730     NODE_STATE(ksCurTime) = getCurrentTime();
731     NODE_STATE(ksConsumed) = 0;
732 #endif
733 
734     schedule();
735     activateThread();
736 }
737 
738