1 /*
2  * Copyright (C) 2021-2022 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <asm/guest/vm.h>
8 #include <asm/e820.h>
9 #include <asm/zeropage.h>
10 #include <asm/guest/ept.h>
11 #include <asm/mmu.h>
12 #include <boot.h>
13 #include <vboot.h>
14 #include <vacpi.h>
15 #include <efi_mmap.h>
16 #include <errno.h>
17 #include <logmsg.h>
18 
19 #define DBG_LEVEL_VM_BZIMAGE	6U
20 
21 /* Define a memory block to store LaaG VM load params in guest address space
22  * The params including:
23  *	Init GDT entries : 1KB (must be 8byte aligned)
24  *	Linux Zeropage : 4KB
25  *	Boot cmdline : 2KB
26  *	EFI memory map : MAX_EFI_MMAP_ENTRIES * sizeof(struct efi_memory_desc)
27  *	Reserved region for trampoline code : 8KB
28  * Each param should keep 8byte aligned and the total region should be able to put below MEM_1M.
29  * Please note in Linux VM, the last 8KB space below MEM_1M is for trampoline code. The block
30  * should be able to accommodate it so that avoid the trampoline corruption. So the params size is:
31  * (MEM_1K + MEM_4K + MEM_2K + 40B * MAX_EFI_MMAP_ENTRIES + MEM_8K)
32  */
33 #define BZIMG_LOAD_PARAMS_SIZE			(MEM_1K * 15U + MAX_EFI_MMAP_ENTRIES * sizeof(struct efi_memory_desc))
34 #define BZIMG_INITGDT_GPA(load_params_gpa)	((load_params_gpa) + 0UL)
35 #define BZIMG_ZEROPAGE_GPA(load_params_gpa)	((load_params_gpa) + MEM_1K)
36 #define BZIMG_CMDLINE_GPA(load_params_gpa)	((load_params_gpa) + MEM_1K + MEM_4K)
37 #define BZIMG_EFIMMAP_GPA(load_params_gpa)	((load_params_gpa) + MEM_1K + MEM_4K + MEM_2K)
38 
39 /* TODO:
40  * The value is referenced from Linux boot protocal for old kernels,
41  * but this should be configurable for different OS. */
42 #define DEFAULT_RAMDISK_GPA_MAX		0x37ffffffUL
43 
44 #define PRE_VM_MAX_RAM_ADDR_BELOW_4GB		(VIRT_ACPI_DATA_ADDR - 1UL)
45 
get_initrd_load_addr(struct acrn_vm * vm,uint64_t kernel_start)46 static void *get_initrd_load_addr(struct acrn_vm *vm, uint64_t kernel_start)
47 {
48 	uint64_t ramdisk_load_gpa = INVALID_GPA;
49 	uint64_t ramdisk_gpa_max = DEFAULT_RAMDISK_GPA_MAX;
50 	struct zero_page *zeropage = (struct zero_page *)vm->sw.kernel_info.kernel_src_addr;
51 	uint32_t kernel_init_size, kernel_align, initrd_addr_max;
52 	uint64_t kernel_end;
53 
54 	/* Per Linux boot protocol, the Kernel need a size of contiguous
55 	 * memory(i.e. init_size field in zeropage) from its extract address to boot,
56 	 * and initrd_addr_max field specifies the maximum address of the ramdisk.
57 	 * Per kernel src head_64.S, decompressed kernel start at 2M aligned to the
58 	 * compressed kernel load address.
59 	 */
60 	stac();
61 	kernel_init_size = zeropage->hdr.init_size;
62 	kernel_align = zeropage->hdr.kernel_alignment;
63 	initrd_addr_max = zeropage->hdr.initrd_addr_max;
64 	clac();
65 	kernel_end = roundup(kernel_start, kernel_align) + kernel_init_size;
66 
67 	if (initrd_addr_max != 0U) {
68 		ramdisk_gpa_max = initrd_addr_max;
69 	}
70 
71 	if (is_service_vm(vm)) {
72 		uint64_t mods_start, mods_end;
73 
74 		get_boot_mods_range(&mods_start, &mods_end);
75 		mods_start = service_vm_hpa2gpa(mods_start);
76 		mods_end = service_vm_hpa2gpa(mods_end);
77 
78 		if (vm->sw.ramdisk_info.src_addr != NULL) {
79 			ramdisk_load_gpa = service_vm_hpa2gpa((uint64_t)vm->sw.ramdisk_info.src_addr);
80 		}
81 
82 		/* For Service VM, the ramdisk has been loaded by bootloader, so in most cases
83 		 * there is no need to do gpa copy again. But in the case that the ramdisk is
84 		 * loaded by bootloader at a address higher than its limit, we should do gpa
85 		 * copy then.
86 		 */
87 		if ((ramdisk_load_gpa + vm->sw.ramdisk_info.size) > ramdisk_gpa_max) {
88 			/* In this case, mods_end must be higher than ramdisk_gpa_max,
89 			 * so try to locate ramdisk between MEM_1M and mods_start/kernel_start,
90 			 * or try the range between kernel_end and mods_start;
91 			 */
92 			ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
93 					MEM_1M, min(min(mods_start, kernel_start), ramdisk_gpa_max));
94 			if ((ramdisk_load_gpa == INVALID_GPA) && (kernel_end < min(mods_start, ramdisk_gpa_max))) {
95 				ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
96 						kernel_end, min(mods_start, ramdisk_gpa_max));
97 			}
98 		}
99 	} else {
100 		/* For pre-launched VM, the ramdisk would be put by searching ve820 table.
101 		 */
102 		ramdisk_gpa_max = min(PRE_VM_MAX_RAM_ADDR_BELOW_4GB, ramdisk_gpa_max);
103 
104 		if (kernel_end < ramdisk_gpa_max) {
105 			ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
106 					kernel_end, ramdisk_gpa_max);
107 		}
108 		if (ramdisk_load_gpa == INVALID_GPA) {
109 			ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
110 					MEM_1M, min(kernel_start, ramdisk_gpa_max));
111 		}
112 	}
113 
114 	if (ramdisk_load_gpa == INVALID_GPA) {
115 		pr_err("no space in guest memory to load VM %d ramdisk", vm->vm_id);
116 		vm->sw.ramdisk_info.size = 0U;
117 	}
118 	dev_dbg(DBG_LEVEL_VM_BZIMAGE, "VM%d ramdisk load_addr: 0x%lx", vm->vm_id, ramdisk_load_gpa);
119 
120 	return (ramdisk_load_gpa == INVALID_GPA) ? NULL : (void *)ramdisk_load_gpa;
121 }
122 
123 /**
124  * @pre vm != NULL
125  */
get_bzimage_kernel_load_addr(struct acrn_vm * vm)126 static void *get_bzimage_kernel_load_addr(struct acrn_vm *vm)
127 {
128 	void *load_addr = NULL;
129 	struct vm_sw_info *sw_info = &vm->sw;
130 	struct zero_page *zeropage;
131 
132 	/* According to the explaination for pref_address
133 	 * in Documentation/x86/boot.txt, a relocating
134 	 * bootloader should attempt to load kernel at pref_address
135 	 * if possible. A non-relocatable kernel will unconditionally
136 	 * move itself and to run at this address.
137 	 */
138 	zeropage = (struct zero_page *)sw_info->kernel_info.kernel_src_addr;
139 
140 	stac();
141 	if ((is_service_vm(vm)) && (zeropage->hdr.relocatable_kernel != 0U)) {
142 		uint64_t mods_start, mods_end;
143 		uint64_t kernel_load_gpa = INVALID_GPA;
144 		uint32_t kernel_align = zeropage->hdr.kernel_alignment;
145 		uint32_t kernel_init_size = zeropage->hdr.init_size;
146 		/* Because the kernel load address need to be up aligned to kernel_align size
147 		 * whereas find_space_from_ve820() can only return page aligned address,
148 		 * we enlarge the needed size to (kernel_init_size + kernel_align).
149 		 */
150 		uint32_t kernel_size = kernel_init_size + kernel_align;
151 
152 		get_boot_mods_range(&mods_start, &mods_end);
153 		mods_start = service_vm_hpa2gpa(mods_start);
154 		mods_end = service_vm_hpa2gpa(mods_end);
155 
156 		/* TODO: support load kernel when modules are beyond 4GB space. */
157 		if (mods_end < MEM_4G) {
158 			kernel_load_gpa = find_space_from_ve820(vm, kernel_size, MEM_1M, mods_start);
159 
160 			if (kernel_load_gpa == INVALID_GPA) {
161 				kernel_load_gpa = find_space_from_ve820(vm, kernel_size, mods_end, MEM_4G);
162 			}
163 		}
164 
165 		if (kernel_load_gpa != INVALID_GPA) {
166 			load_addr = (void *)roundup((uint64_t)kernel_load_gpa, kernel_align);
167 		}
168 	} else {
169 		load_addr = (void *)zeropage->hdr.pref_addr;
170 		if (is_service_vm(vm)) {
171 			/* The non-relocatable Servic VM kernel might overlap with boot modules. */
172 			pr_err("Non-relocatable kernel found, risk to boot!");
173 		}
174 	}
175 	clac();
176 
177 	if (load_addr == NULL) {
178 		pr_err("Could not get kernel load addr of VM %d .", vm->vm_id);
179 	}
180 
181 	dev_dbg(DBG_LEVEL_VM_BZIMAGE, "VM%d kernel load_addr: 0x%lx", vm->vm_id, load_addr);
182 	return load_addr;
183 }
184 
185 #ifdef CONFIG_MULTIBOOT2
186 /**
187  * @pre vm != NULL && efi_mmap_desc != NULL
188  */
create_service_vm_efi_mmap_desc(struct acrn_vm * vm,struct efi_memory_desc * efi_mmap_desc)189 static uint16_t create_service_vm_efi_mmap_desc(struct acrn_vm *vm, struct efi_memory_desc *efi_mmap_desc)
190 {
191 	uint16_t i, desc_idx = 0U;
192 	const struct efi_memory_desc *hv_efi_mmap_desc = get_efi_mmap_entry();
193 
194 	for (i = 0U; i < (uint16_t)get_efi_mmap_entries_count(); i++) {
195 		/* Below efi mmap desc types in native should be kept as original for Service VM */
196 		if ((hv_efi_mmap_desc[i].type == EFI_RESERVED_MEMORYTYPE)
197 				|| (hv_efi_mmap_desc[i].type == EFI_UNUSABLE_MEMORY)
198 				|| (hv_efi_mmap_desc[i].type == EFI_ACPI_RECLAIM_MEMORY)
199 				|| (hv_efi_mmap_desc[i].type == EFI_ACPI_MEMORY_NVS)
200 				|| (hv_efi_mmap_desc[i].type == EFI_BOOT_SERVICES_CODE)
201 				|| (hv_efi_mmap_desc[i].type == EFI_BOOT_SERVICES_DATA)
202 				|| (hv_efi_mmap_desc[i].type == EFI_RUNTIME_SERVICES_CODE)
203 				|| (hv_efi_mmap_desc[i].type == EFI_RUNTIME_SERVICES_DATA)
204 				|| (hv_efi_mmap_desc[i].type == EFI_MEMORYMAPPED_IO)
205 				|| (hv_efi_mmap_desc[i].type == EFI_MEMORYMAPPED_IOPORTSPACE)
206 				|| (hv_efi_mmap_desc[i].type == EFI_PALCODE)
207 				|| (hv_efi_mmap_desc[i].type == EFI_PERSISTENT_MEMORY)) {
208 
209 			efi_mmap_desc[desc_idx] = hv_efi_mmap_desc[i];
210 			desc_idx++;
211 		}
212 	}
213 
214 	for (i = 0U; i < (uint16_t)vm->e820_entry_num; i++) {
215 		/* The memory region with e820 type of RAM could be acted as EFI_CONVENTIONAL_MEMORY
216 		 * for Service VM, the region which occupied by HV and pre-launched VM has been filtered
217 		 * already, so it is safe for Service VM.
218 		 * As Service VM start to run after efi call ExitBootService(), the type of EFI_LOADER_CODE
219 		 * and EFI_LOADER_DATA which have been mapped to E820_TYPE_RAM are not needed.
220 		 */
221 		if (vm->e820_entries[i].type == E820_TYPE_RAM) {
222 			efi_mmap_desc[desc_idx].type = EFI_CONVENTIONAL_MEMORY;
223 			efi_mmap_desc[desc_idx].phys_addr = vm->e820_entries[i].baseaddr;
224 			efi_mmap_desc[desc_idx].virt_addr = vm->e820_entries[i].baseaddr;
225 			efi_mmap_desc[desc_idx].num_pages = vm->e820_entries[i].length / PAGE_SIZE;
226 			efi_mmap_desc[desc_idx].attribute = EFI_MEMORY_WB;
227 			desc_idx++;
228 		}
229 	}
230 
231 	for (i = 0U; i < desc_idx; i++) {
232 		pr_dbg("Service VM efi mmap desc[%d]: addr: 0x%lx, len: 0x%lx, type: %d", i,
233 			efi_mmap_desc[i].phys_addr, efi_mmap_desc[i].num_pages * PAGE_SIZE, efi_mmap_desc[i].type);
234 	}
235 
236 	return desc_idx;
237 }
238 #endif
239 
240 /**
241  * @pre zp != NULL && vm != NULL
242  */
create_zeropage_e820(struct zero_page * zp,const struct acrn_vm * vm)243 static uint32_t create_zeropage_e820(struct zero_page *zp, const struct acrn_vm *vm)
244 {
245 	uint32_t entry_num = vm->e820_entry_num;
246 	struct e820_entry *zp_e820 = zp->entries;
247 	const struct e820_entry *vm_e820 = vm->e820_entries;
248 
249 	if ((zp_e820 == NULL) || (vm_e820 == NULL) || (entry_num == 0U) || (entry_num > E820_MAX_ENTRIES)) {
250 		pr_err("e820 create error");
251 		entry_num = 0U;
252 	} else {
253 		(void)memcpy_s((void *)zp_e820, entry_num * sizeof(struct e820_entry),
254 			(void *)vm_e820, entry_num * sizeof(struct e820_entry));
255 	}
256 	return entry_num;
257 }
258 
259 /**
260  * @pre vm != NULL
261  * @pre (vm->min_mem_addr <= kernel_load_addr) && (kernel_load_addr < vm->max_mem_addr)
262  */
create_zero_page(struct acrn_vm * vm,uint64_t load_params_gpa)263 static uint64_t create_zero_page(struct acrn_vm *vm, uint64_t load_params_gpa)
264 {
265 	struct zero_page *zeropage, *hva;
266 	struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info);
267 	struct sw_module_info *bootargs_info = &(vm->sw.bootargs_info);
268 	struct sw_module_info *ramdisk_info = &(vm->sw.ramdisk_info);
269 	uint64_t gpa, addr;
270 
271 	gpa = BZIMG_ZEROPAGE_GPA(load_params_gpa);
272 	hva = (struct zero_page *)gpa2hva(vm, gpa);
273 	zeropage = hva;
274 
275 	stac();
276 	/* clear the zeropage */
277 	(void)memset(zeropage, 0U, MEM_4K);
278 
279 #ifdef CONFIG_MULTIBOOT2
280 	if (is_service_vm(vm)) {
281 		struct acrn_boot_info *abi = get_acrn_boot_info();
282 
283 		if (boot_from_uefi(abi)) {
284 			struct efi_info *service_vm_efi_info = &zeropage->boot_efi_info;
285 			uint64_t efi_mmap_gpa = BZIMG_EFIMMAP_GPA(load_params_gpa);
286 			struct efi_memory_desc *efi_mmap_desc = (struct efi_memory_desc *)gpa2hva(vm, efi_mmap_gpa);
287 			uint16_t efi_mmap_desc_nr = create_service_vm_efi_mmap_desc(vm, efi_mmap_desc);
288 
289 			service_vm_efi_info->loader_signature = 0x34364c45; /* "EL64" */
290 			service_vm_efi_info->memdesc_version = abi->uefi_info.memdesc_version;
291 			service_vm_efi_info->memdesc_size = sizeof(struct efi_memory_desc);
292 			service_vm_efi_info->memmap_size = efi_mmap_desc_nr * sizeof(struct efi_memory_desc);
293 			service_vm_efi_info->memmap = (uint32_t)efi_mmap_gpa;
294 			service_vm_efi_info->memmap_hi = (uint32_t)(efi_mmap_gpa >> 32U);
295 			service_vm_efi_info->systab = abi->uefi_info.systab;
296 			service_vm_efi_info->systab_hi = abi->uefi_info.systab_hi;
297 		}
298 	}
299 #endif
300 	/* copy part of the header into the zero page */
301 	hva = (struct zero_page *)sw_kernel->kernel_src_addr;
302 	(void)memcpy_s(&(zeropage->hdr), sizeof(zeropage->hdr),
303 				&(hva->hdr), sizeof(hva->hdr));
304 
305 	/* See if kernel has a RAM disk */
306 	if (ramdisk_info->src_addr != NULL) {
307 		/* Copy ramdisk load_addr and size in zeropage header structure
308 		 */
309 		addr = (uint64_t)ramdisk_info->load_addr;
310 		zeropage->hdr.ramdisk_addr = (uint32_t)addr;
311 		zeropage->hdr.ramdisk_size = (uint32_t)ramdisk_info->size;
312 	}
313 
314 	/* Copy bootargs load_addr in zeropage header structure */
315 	addr = (uint64_t)bootargs_info->load_addr;
316 	zeropage->hdr.bootargs_addr = (uint32_t)addr;
317 
318 	/* set constant arguments in zero page */
319 	zeropage->hdr.loader_type = 0xffU;
320 	zeropage->hdr.load_flags |= (1U << 5U);	/* quiet */
321 
322 	/* Create/add e820 table entries in zeropage */
323 	zeropage->e820_nentries = (uint8_t)create_zeropage_e820(zeropage, vm);
324 	clac();
325 
326 	/* Return Physical Base Address of zeropage */
327 	return gpa;
328 }
329 
330 /**
331  * @pre vm != NULL
332  */
load_bzimage(struct acrn_vm * vm,struct acrn_vcpu * vcpu,uint64_t load_params_gpa,uint64_t kernel_load_gpa)333 static void load_bzimage(struct acrn_vm *vm, struct acrn_vcpu *vcpu,
334 						uint64_t load_params_gpa, uint64_t kernel_load_gpa)
335 {
336 	uint32_t i;
337 	uint32_t prot_code_offset, prot_code_size, kernel_entry_offset;
338 	uint8_t setup_sectors;
339 	const struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
340 	struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info);
341 	struct sw_module_info *bootargs_info = &(vm->sw.bootargs_info);
342 	struct sw_module_info *ramdisk_info = &(vm->sw.ramdisk_info);
343 	struct zero_page *zeropage = (struct zero_page *)sw_kernel->kernel_src_addr;
344 
345 	/* The bzImage file consists of three parts:
346 	 * boot_params(i.e. zero page) + real mode setup code + compressed protected mode code
347 	 * The compressed proteced mode code start at offset (setup_sectors + 1U) * 512U of bzImage.
348 	 * Only protected mode code need to be loaded.
349 	 */
350 	stac();
351 	setup_sectors = (zeropage->hdr.setup_sects == 0U) ? 4U : zeropage->hdr.setup_sects;
352 	clac();
353 	prot_code_offset = (uint32_t)(setup_sectors + 1U) * 512U;
354 	prot_code_size = (sw_kernel->kernel_size > prot_code_offset) ?
355 				(sw_kernel->kernel_size - prot_code_offset) : 0U;
356 
357 	/* Copy the protected mode part kernel code to its run-time location */
358 	(void)copy_to_gpa(vm, (sw_kernel->kernel_src_addr + prot_code_offset), kernel_load_gpa, prot_code_size);
359 
360 	if (vm->sw.ramdisk_info.size > 0U) {
361 		/* Use customer specified ramdisk load addr if it is configured in VM configuration,
362 		 * otherwise use allocated address calculated by HV.
363 		 */
364 		if (vm_config->os_config.kernel_ramdisk_addr != 0UL) {
365 			vm->sw.ramdisk_info.load_addr = (void *)vm_config->os_config.kernel_ramdisk_addr;
366 		} else {
367 			vm->sw.ramdisk_info.load_addr = (void *)get_initrd_load_addr(vm, kernel_load_gpa);
368 			if (vm->sw.ramdisk_info.load_addr == NULL) {
369 				pr_err("failed to load initrd for VM%d !", vm->vm_id);
370 			}
371 		}
372 
373 		/* Don't need to load ramdisk if src_addr and load_addr are pointed to same place. */
374 		if (gpa2hva(vm, (uint64_t)ramdisk_info->load_addr) != ramdisk_info->src_addr) {
375 			load_sw_module(vm, ramdisk_info);
376 		}
377 	}
378 
379 	bootargs_info->load_addr = (void *)BZIMG_CMDLINE_GPA(load_params_gpa);
380 
381 	load_sw_module(vm, bootargs_info);
382 
383 	/* 32bit kernel entry is at where protected mode code loaded */
384 	kernel_entry_offset = 0U;
385 	if (vcpu->arch.cpu_mode == CPU_MODE_64BIT) {
386 		/* 64bit entry is the 512bytes after the start */
387 		kernel_entry_offset += 512U;
388 	}
389 
390 	sw_kernel->kernel_entry_addr = (void *)(kernel_load_gpa + kernel_entry_offset);
391 
392 	/* Documentation states: ebx=0, edi=0, ebp=0, esi=ptr to
393 	 * zeropage
394 	 */
395 	for (i = 0U; i < NUM_GPRS; i++) {
396 		vcpu_set_gpreg(vcpu, i, 0UL);
397 	}
398 
399 	/* Create Zeropage and copy Physical Base Address of Zeropage
400 	 * in RSI
401 	 */
402 	vcpu_set_gpreg(vcpu, CPU_REG_RSI, create_zero_page(vm, load_params_gpa));
403 	pr_info("%s, RSI pointing to zero page for VM %d at GPA %X",
404 			__func__, vm->vm_id, vcpu_get_gpreg(vcpu, CPU_REG_RSI));
405 }
406 
bzimage_loader(struct acrn_vm * vm)407 int32_t bzimage_loader(struct acrn_vm *vm)
408 {
409 	int32_t ret = -ENOMEM;
410 	/* get primary vcpu */
411 	struct acrn_vcpu *vcpu = vcpu_from_vid(vm, BSP_CPU_ID);
412 	uint64_t load_params_gpa = find_space_from_ve820(vm, BZIMG_LOAD_PARAMS_SIZE, MEM_4K, MEM_1M);
413 
414 	if (load_params_gpa != INVALID_GPA) {
415 		uint64_t kernel_load_gpa = (uint64_t)get_bzimage_kernel_load_addr(vm);
416 
417 		if (kernel_load_gpa != 0UL) {
418 			/* We boot bzImage from protected mode directly */
419 			init_vcpu_protect_mode_regs(vcpu, BZIMG_INITGDT_GPA(load_params_gpa));
420 
421 			load_bzimage(vm, vcpu, load_params_gpa, kernel_load_gpa);
422 
423 			ret = 0;
424 		}
425 	}
426 
427 	return ret;
428 }
429