1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * Copyright (c) 2017-2024 Intel Corporation.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <types.h>
30 #include <asm/lib/atomic.h>
31 #include <asm/cpufeatures.h>
32 #include <asm/pgtable.h>
33 #include <asm/cpu_caps.h>
34 #include <asm/mmu.h>
35 #include <asm/vmx.h>
36 #include <reloc.h>
37 #include <asm/guest/vm.h>
38 #include <asm/boot/ld_sym.h>
39 #include <logmsg.h>
40 #include <misc_cfg.h>
41 
42 static void *ppt_mmu_pml4_addr;
43 /**
44  * @brief The sanitized page
45  *
46  * The sanitized page is used to mitigate l1tf.
47  */
48 static uint8_t sanitized_page[PAGE_SIZE] __aligned(PAGE_SIZE);
49 
50 /* PPT VA and PA are identical mapping */
51 #define PPT_PML4_PAGE_NUM	PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
52 #define PPT_PDPT_PAGE_NUM	PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
53 #define PPT_PT_PAGE_NUM	0UL	/* not support 4K granularity page mapping */
54 
55 /* Please refer to how the ept page num  was calculated */
get_ppt_page_num(void)56 uint64_t get_ppt_page_num(void)
57 {
58        uint64_t ppt_pd_page_num = PD_PAGE_NUM(get_e820_ram_size() + MEM_4G) + CONFIG_MAX_PCI_DEV_NUM * 6U;
59 
60        /* must be a multiple of 64 */
61        return roundup((PPT_PML4_PAGE_NUM + PPT_PDPT_PAGE_NUM + ppt_pd_page_num + PPT_PT_PAGE_NUM), 64U);
62 }
63 
64 /* ppt: primary page pool */
65 static struct page_pool ppt_page_pool;
66 
67 /* @pre: The PPT and EPT have same page granularity */
ppt_large_page_support(enum _page_table_level level,__unused uint64_t prot)68 static inline bool ppt_large_page_support(enum _page_table_level level, __unused uint64_t prot)
69 {
70 	bool support;
71 
72 	if (level == IA32E_PD) {
73 		support = true;
74 	} else if (level == IA32E_PDPT) {
75 		support = pcpu_has_cap(X86_FEATURE_PAGE1GB);
76 	} else {
77 		support = false;
78 	}
79 
80 	return support;
81 }
82 
ppt_clflush_pagewalk(const void * entry)83 static inline void ppt_clflush_pagewalk(const void* entry __attribute__((unused)))
84 {
85 }
86 
87 
ppt_nop_tweak_exe_right(uint64_t * entry)88 static inline void ppt_nop_tweak_exe_right(uint64_t *entry __attribute__((unused))) {}
ppt_nop_recover_exe_right(uint64_t * entry)89 static inline void ppt_nop_recover_exe_right(uint64_t *entry __attribute__((unused))) {}
90 
91 static const struct pgtable ppt_pgtable = {
92 	.default_access_right = (PAGE_PRESENT | PAGE_RW | PAGE_USER),
93 	.pgentry_present_mask = PAGE_PRESENT,
94 	.pool = &ppt_page_pool,
95 	.large_page_support = ppt_large_page_support,
96 	.clflush_pagewalk = ppt_clflush_pagewalk,
97 	.tweak_exe_right = ppt_nop_tweak_exe_right,
98 	.recover_exe_right = ppt_nop_recover_exe_right,
99 };
100 
101 /*
102  * @pre: the combined type and vpid is correct
103  */
local_invvpid(uint64_t type,uint16_t vpid,uint64_t gva)104 static inline void local_invvpid(uint64_t type, uint16_t vpid, uint64_t gva)
105 {
106 	const struct invvpid_operand operand = { vpid, 0U, 0U, gva };
107 
108 	if (asm_invvpid(operand, type) != 0) {
109 		pr_dbg("%s, failed. type = %lu, vpid = %u", __func__, type, vpid);
110 	}
111 }
112 
113 /*
114  * @pre: the combined type and EPTP is correct
115  */
local_invept(uint64_t type,struct invept_desc desc)116 static inline void local_invept(uint64_t type, struct invept_desc desc)
117 {
118 	if (asm_invept(type, desc) != 0) {
119 		pr_dbg("%s, failed. type = %lu, eptp = 0x%lx", __func__, type, desc.eptp);
120 	}
121 }
122 
flush_vpid_single(uint16_t vpid)123 void flush_vpid_single(uint16_t vpid)
124 {
125 	if (vpid != 0U) {
126 		local_invvpid(VMX_VPID_TYPE_SINGLE_CONTEXT, vpid, 0UL);
127 	}
128 }
129 
flush_vpid_global(void)130 void flush_vpid_global(void)
131 {
132 	local_invvpid(VMX_VPID_TYPE_ALL_CONTEXT, 0U, 0UL);
133 }
134 
invept(const void * eptp)135 void invept(const void *eptp)
136 {
137 	struct invept_desc desc = {0};
138 
139 	if (pcpu_has_vmx_ept_vpid_cap(VMX_EPT_INVEPT_SINGLE_CONTEXT)) {
140 		desc.eptp = hva2hpa(eptp) | (3UL << 3U) | 6UL;
141 		local_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
142 	} else if (pcpu_has_vmx_ept_vpid_cap(VMX_EPT_INVEPT_GLOBAL_CONTEXT)) {
143 		local_invept(INVEPT_TYPE_ALL_CONTEXTS, desc);
144 	} else {
145 		/* Neither type of INVEPT is supported. Skip. */
146 	}
147 }
148 
enable_paging(void)149 void enable_paging(void)
150 {
151 	uint64_t tmp64 = 0UL;
152 
153 	/* Initialize IA32_PAT according to ISDM 11.12.4 Programming the PAT */
154 	msr_write(MSR_IA32_PAT, PAT_POWER_ON_VALUE);
155 
156 	/*
157 	 * Enable MSR IA32_EFER.NXE bit,to prevent
158 	 * instruction fetching from pages with XD bit set.
159 	 */
160 	tmp64 = msr_read(MSR_IA32_EFER);
161 
162 	/*
163 	 * SCE bit is not used by the host. However we set this bit so that
164 	 * it's highly likely that the value of IA32_EFER the host and the guest
165 	 * is identical, and we don't need to switch this MSR on VMX transitions
166 	 */
167 	tmp64 |= MSR_IA32_EFER_NXE_BIT | MSR_IA32_EFER_SCE_BIT;
168 	msr_write(MSR_IA32_EFER, tmp64);
169 
170 	/* Enable Write Protect, inhibiting writing to read-only pages */
171 	CPU_CR_READ(cr0, &tmp64);
172 	CPU_CR_WRITE(cr0, tmp64 | CR0_WP);
173 
174 	/* HPA->HVA is 1:1 mapping at this moment, simply treat ppt_mmu_pml4_addr as HPA. */
175 	CPU_CR_WRITE(cr3, ppt_mmu_pml4_addr);
176 }
177 
enable_smep(void)178 void enable_smep(void)
179 {
180 	uint64_t val64 = 0UL;
181 
182 	/* Enable CR4.SMEP*/
183 	CPU_CR_READ(cr4, &val64);
184 	CPU_CR_WRITE(cr4, val64 | CR4_SMEP);
185 }
186 
enable_smap(void)187 void enable_smap(void)
188 {
189 	uint64_t val64 = 0UL;
190 
191 	/* Enable CR4.SMAP*/
192 	CPU_CR_READ(cr4, &val64);
193 	CPU_CR_WRITE(cr4, val64 | CR4_SMAP);
194 }
195 
196 /*
197  * Clean USER bit in page table to update memory pages to be owned by hypervisor.
198  */
set_paging_supervisor(uint64_t base,uint64_t size)199 void set_paging_supervisor(uint64_t base, uint64_t size)
200 {
201 	uint64_t base_aligned;
202 	uint64_t size_aligned;
203 	uint64_t region_end = base + size;
204 
205 	/*rounddown base to 2MBytes aligned.*/
206 	base_aligned = round_pde_down(base);
207 	size_aligned = region_end - base_aligned;
208 
209 	pgtable_modify_or_del_map((uint64_t *)ppt_mmu_pml4_addr, base_aligned,
210 		round_pde_up(size_aligned), 0UL, PAGE_USER, &ppt_pgtable, MR_MODIFY);
211 }
212 
set_paging_nx(uint64_t base,uint64_t size)213 void set_paging_nx(uint64_t base, uint64_t size)
214 {
215 	uint64_t region_end = base + size;
216 	uint64_t base_aligned = round_pde_down(base);
217 	uint64_t size_aligned = round_pde_up(region_end - base_aligned);
218 
219 	pgtable_modify_or_del_map((uint64_t *)ppt_mmu_pml4_addr,
220 		base_aligned, size_aligned, PAGE_NX, 0UL, &ppt_pgtable, MR_MODIFY);
221 }
222 
set_paging_x(uint64_t base,uint64_t size)223 void set_paging_x(uint64_t base, uint64_t size)
224 {
225 	uint64_t region_end = base + size;
226 	uint64_t base_aligned = round_pde_down(base);
227 	uint64_t size_aligned = round_pde_up(region_end - base_aligned);
228 
229 	pgtable_modify_or_del_map((uint64_t *)ppt_mmu_pml4_addr,
230 		base_aligned, size_aligned, 0UL, PAGE_NX, &ppt_pgtable, MR_MODIFY);
231 }
232 
allocate_ppt_pages(void)233 void allocate_ppt_pages(void)
234 {
235 	uint64_t page_base;
236 	uint64_t bitmap_size = get_ppt_page_num() / 8;
237 
238 	page_base = e820_alloc_memory(sizeof(struct page) * get_ppt_page_num(), MEM_4G);
239 	ppt_page_pool.bitmap = (uint64_t *)e820_alloc_memory(bitmap_size, MEM_4G);
240 
241 	ppt_page_pool.start_page = (struct page *)(void *)page_base;
242 	ppt_page_pool.bitmap_size = bitmap_size / sizeof(uint64_t);
243 	ppt_page_pool.dummy_page = NULL;
244 
245 	memset(ppt_page_pool.bitmap, 0, bitmap_size);
246 }
247 
init_paging(void)248 void init_paging(void)
249 {
250 	uint64_t hv_hva;
251 	uint32_t i;
252 	uint64_t low32_max_ram = 0UL;
253 	uint64_t high64_min_ram = ~0UL;
254 	uint64_t high64_max_ram = MEM_4G;
255 
256 	struct acrn_boot_info *abi = get_acrn_boot_info();
257 	const struct abi_mmap *entry;
258 	uint32_t entries_count = abi->mmap_entries;
259 	const struct abi_mmap *p_mmap = abi->mmap_entry;
260 
261 	pr_dbg("HV MMU Initialization");
262 
263 	init_sanitized_page((uint64_t *)sanitized_page, hva2hpa_early(sanitized_page));
264 
265 	/* Allocate memory for Hypervisor PML4 table */
266 	ppt_mmu_pml4_addr = pgtable_create_root(&ppt_pgtable);
267 
268 	/* Modify WB attribute for E820_TYPE_RAM */
269 	for (i = 0U; i < entries_count; i++) {
270 		entry = p_mmap + i;
271 		if (entry->type == MMAP_TYPE_RAM) {
272 			uint64_t end = entry->baseaddr + entry->length;
273 			if (end < MEM_4G) {
274 				low32_max_ram = max(end, low32_max_ram);
275 			} else {
276 				high64_min_ram = min(entry->baseaddr, high64_min_ram);
277 				high64_max_ram = max(end, high64_max_ram);
278 			}
279 		}
280 	}
281 
282 	low32_max_ram = round_pde_up(low32_max_ram);
283 	high64_max_ram = round_pde_down(high64_max_ram);
284 
285 	/* Map [0, low32_max_ram) and [high64_min_ram, high64_max_ram) RAM regions as WB attribute */
286 	pgtable_add_map((uint64_t *)ppt_mmu_pml4_addr, 0UL, 0UL,
287 			low32_max_ram, PAGE_ATTR_USER | PAGE_CACHE_WB, &ppt_pgtable);
288 
289 	if (high64_max_ram > high64_min_ram) {
290 		pgtable_add_map((uint64_t *)ppt_mmu_pml4_addr, high64_min_ram, high64_min_ram,
291 				high64_max_ram - high64_min_ram, PAGE_ATTR_USER | PAGE_CACHE_WB, &ppt_pgtable);
292 	}
293 	/* Map [low32_max_ram, 4G) and [MMIO64_START, MMIO64_END) MMIO regions as UC attribute */
294 	pgtable_add_map((uint64_t *)ppt_mmu_pml4_addr, low32_max_ram, low32_max_ram,
295 		MEM_4G - low32_max_ram, PAGE_ATTR_USER | PAGE_CACHE_UC, &ppt_pgtable);
296 	if ((MMIO64_START != ~0UL) && (MMIO64_END != 0UL)) {
297 		pgtable_add_map((uint64_t *)ppt_mmu_pml4_addr, MMIO64_START, MMIO64_START,
298 			(MMIO64_END - MMIO64_START), PAGE_ATTR_USER | PAGE_CACHE_UC, &ppt_pgtable);
299 	}
300 
301 	/*
302 	 * set the paging-structure entries' U/S flag to supervisor-mode for hypervisor owned memory.
303 	 * (exclude the memory reserve for trusty)
304 	 *
305 	 * Before the new PML4 take effect in enable_paging(), HPA->HVA is always 1:1 mapping,
306 	 * simply treat the return value of get_hv_image_base() as HPA.
307 	 */
308 	hv_hva = get_hv_image_base();
309 	pgtable_modify_or_del_map((uint64_t *)ppt_mmu_pml4_addr, hv_hva & PDE_MASK,
310 			get_hv_image_size() + (((hv_hva & (PDE_SIZE - 1UL)) != 0UL) ? PDE_SIZE : 0UL),
311 			PAGE_CACHE_WB, PAGE_CACHE_MASK | PAGE_USER, &ppt_pgtable, MR_MODIFY);
312 
313 	/*
314 	 * remove 'NX' bit for pages that contain hv code section, as by default XD bit is set for
315 	 * all pages, including pages for guests.
316 	 */
317 	pgtable_modify_or_del_map((uint64_t *)ppt_mmu_pml4_addr, round_pde_down(hv_hva),
318 			round_pde_up((uint64_t)&ld_text_end) - round_pde_down(hv_hva), 0UL,
319 			PAGE_NX, &ppt_pgtable, MR_MODIFY);
320 #if ((SERVICE_VM_NUM == 1) && (MAX_TRUSTY_VM_NUM > 0))
321 	pgtable_modify_or_del_map((uint64_t *)ppt_mmu_pml4_addr, (uint64_t)get_sworld_memory_base(),
322 			TRUSTY_RAM_SIZE * MAX_TRUSTY_VM_NUM, PAGE_USER, 0UL, &ppt_pgtable, MR_MODIFY);
323 #endif
324 
325 	/* Enable paging */
326 	enable_paging();
327 }
328 
flush_tlb(uint64_t addr)329 void flush_tlb(uint64_t addr)
330 {
331 	invlpg(addr);
332 }
333 
flush_tlb_range(uint64_t addr,uint64_t size)334 void flush_tlb_range(uint64_t addr, uint64_t size)
335 {
336 	uint64_t linear_addr;
337 
338 	for (linear_addr = addr; linear_addr < (addr + size); linear_addr += PAGE_SIZE) {
339 		invlpg(linear_addr);
340 	}
341 }
342 
flush_invalidate_all_cache(void)343 void flush_invalidate_all_cache(void)
344 {
345 	wbinvd();
346 }
347 
flush_cacheline(const volatile void * p)348 void flush_cacheline(const volatile void *p)
349 {
350 	clflush(p);
351 }
352 
flush_cache_range(const volatile void * p,uint64_t size)353 void flush_cache_range(const volatile void *p, uint64_t size)
354 {
355 	uint64_t i;
356 
357 	for (i = 0UL; i < size; i += CACHE_LINE_SIZE) {
358 		clflushopt(p + i);
359 	}
360 }
361