1 /*
2  * Copyright (C) 2018-2024 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <types.h>
8 #include <util.h>
9 #include <acrn_hv_defs.h>
10 #include <asm/page.h>
11 #include <asm/mmu.h>
12 #include <logmsg.h>
13 
14 /**
15  * @addtogroup hwmgmt_page
16  *
17  * @{
18  */
19 
20 /**
21  * @file
22  * @brief Implementation page table management.
23  *
24  * This file implements the external APIs to establish, modify, delete, or look for the mapping information. It also
25  * defines some helper functions to implement the features that are commonly used in this file.
26  *
27  */
28 
29 #define DBG_LEVEL_MMU	6U
30 
31 /**
32  * @brief Host physical address of the sanitized page.
33  *
34  * The sanitized page is used to mitigate l1tf. This variable is used to store the host physical address of the
35  * sanitized page.
36  */
37 static uint64_t sanitized_page_hpa;
38 
sanitize_pte_entry(uint64_t * ptep,const struct pgtable * table)39 static void sanitize_pte_entry(uint64_t *ptep, const struct pgtable *table)
40 {
41 	set_pgentry(ptep, sanitized_page_hpa, table);
42 }
43 
sanitize_pte(uint64_t * pt_page,const struct pgtable * table)44 static void sanitize_pte(uint64_t *pt_page, const struct pgtable *table)
45 {
46 	uint64_t i;
47 	for (i = 0UL; i < PTRS_PER_PTE; i++) {
48 		sanitize_pte_entry(pt_page + i, table);
49 	}
50 }
51 
52 /**
53  * @brief Initializes a sanitized page.
54  *
55  * This function is responsible for initializing a sanitized page. It sets the page table entries in this sanitized page
56  * to point to the host physical address of the sanitized page itself.
57  *
58  * The static variable 'sanitized_page_hpa' will be set and the `sanitized_page` will be initialized.
59  *
60  * @param[out] sanitized_page The page to be sanitized.
61  * @param[in] hpa The host physical address that the page table entries in the sanitized page will point to.
62  *
63  * @return None
64  *
65  * @pre sanitized_page != NULL
66  * @pre ((uint64_t)sanitized_page & (PAGE_SIZE - 1)) == 0x0U
67  * @pre hpa != 0U
68  * @pre (hpa & (PAGE_SIZE - 1)) == 0x0U
69  *
70  * @post N/A
71  */
init_sanitized_page(uint64_t * sanitized_page,uint64_t hpa)72 void init_sanitized_page(uint64_t *sanitized_page, uint64_t hpa)
73 {
74 	uint64_t i;
75 
76 	sanitized_page_hpa = hpa;
77 	/* set ptep in sanitized_page point to itself */
78 	for (i = 0UL; i < PTRS_PER_PTE; i++) {
79 		*(sanitized_page + i) = sanitized_page_hpa;
80 	}
81 }
82 
try_to_free_pgtable_page(const struct pgtable * table,uint64_t * pde,uint64_t * pt_page,uint32_t type)83 static void try_to_free_pgtable_page(const struct pgtable *table,
84 			uint64_t *pde, uint64_t *pt_page, uint32_t type)
85 {
86 	if (type == MR_DEL) {
87 		uint64_t index;
88 
89 		for (index = 0UL; index < PTRS_PER_PTE; index++) {
90 			uint64_t *pte = pt_page + index;
91 			if (pgentry_present(table, (*pte))) {
92 				break;
93 			}
94 		}
95 
96 		if (index == PTRS_PER_PTE) {
97 			free_page(table->pool, (void *)pt_page);
98 			sanitize_pte_entry(pde, table);
99 		}
100 	}
101 }
102 
103 /*
104  * Split a large page table into next level page table.
105  *
106  * @pre: level could only IA32E_PDPT or IA32E_PD
107  */
split_large_page(uint64_t * pte,enum _page_table_level level,__unused uint64_t vaddr,const struct pgtable * table)108 static void split_large_page(uint64_t *pte, enum _page_table_level level,
109 		__unused uint64_t vaddr, const struct pgtable *table)
110 {
111 	uint64_t *pbase;
112 	uint64_t ref_paddr, paddr, paddrinc;
113 	uint64_t i, ref_prot;
114 
115 	switch (level) {
116 	case IA32E_PDPT:
117 		ref_paddr = (*pte) & PDPTE_PFN_MASK;
118 		paddrinc = PDE_SIZE;
119 		ref_prot = (*pte) & ~PDPTE_PFN_MASK;
120 		break;
121 	default:	/* IA32E_PD */
122 		ref_paddr = (*pte) & PDE_PFN_MASK;
123 		paddrinc = PTE_SIZE;
124 		ref_prot = (*pte) & ~PDE_PFN_MASK;
125 		ref_prot &= ~PAGE_PSE;
126 		table->recover_exe_right(&ref_prot);
127 		break;
128 	}
129 
130 	pbase = (uint64_t *)alloc_page(table->pool);
131 	dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, pbase: 0x%lx\n", __func__, ref_paddr, pbase);
132 
133 	paddr = ref_paddr;
134 	for (i = 0UL; i < PTRS_PER_PTE; i++) {
135 		set_pgentry(pbase + i, paddr | ref_prot, table);
136 		paddr += paddrinc;
137 	}
138 
139 	ref_prot = table->default_access_right;
140 	set_pgentry(pte, hva2hpa((void *)pbase) | ref_prot, table);
141 
142 	/* TODO: flush the TLB */
143 }
144 
local_modify_or_del_pte(uint64_t * pte,uint64_t prot_set,uint64_t prot_clr,uint32_t type,const struct pgtable * table)145 static inline void local_modify_or_del_pte(uint64_t *pte,
146 		uint64_t prot_set, uint64_t prot_clr, uint32_t type, const struct pgtable *table)
147 {
148 	if (type == MR_MODIFY) {
149 		uint64_t new_pte = *pte;
150 		new_pte &= ~prot_clr;
151 		new_pte |= prot_set;
152 		set_pgentry(pte, new_pte, table);
153 	} else {
154 		sanitize_pte_entry(pte, table);
155 	}
156 }
157 
158 /*
159  * pgentry may means pml4e/pdpte/pde
160  */
construct_pgentry(uint64_t * pde,void * pd_page,uint64_t prot,const struct pgtable * table)161 static inline void construct_pgentry(uint64_t *pde, void *pd_page, uint64_t prot, const struct pgtable *table)
162 {
163 	sanitize_pte((uint64_t *)pd_page, table);
164 
165 	set_pgentry(pde, hva2hpa(pd_page) | prot, table);
166 }
167 
168 /*
169  * In PT level,
170  * type: MR_MODIFY
171  * modify [vaddr_start, vaddr_end) memory type or page access right.
172  * type: MR_DEL
173  * delete [vaddr_start, vaddr_end) MT PT mapping
174  */
modify_or_del_pte(uint64_t * pde,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)175 static void modify_or_del_pte(uint64_t *pde, uint64_t vaddr_start, uint64_t vaddr_end,
176 		uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
177 {
178 	uint64_t *pt_page = pde_page_vaddr(*pde);
179 	uint64_t vaddr = vaddr_start;
180 	uint64_t index = pte_index(vaddr);
181 
182 	dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
183 	for (; index < PTRS_PER_PTE; index++) {
184 		uint64_t *pte = pt_page + index;
185 
186 		if (!pgentry_present(table, (*pte))) {
187 			/*suppress warning message for low memory (< 1MBytes),as service VM
188 			 * will update MTTR attributes for this region by default whether it
189 			 * is present or not.
190 			 */
191 			if ((type == MR_MODIFY) && (vaddr >= MEM_1M)) {
192 				pr_warn("%s, vaddr: 0x%lx pte is not present.\n", __func__, vaddr);
193 			}
194 		} else {
195 			local_modify_or_del_pte(pte, prot_set, prot_clr, type, table);
196 		}
197 
198 		vaddr += PTE_SIZE;
199 		if (vaddr >= vaddr_end) {
200 			break;
201 		}
202 	}
203 
204 	try_to_free_pgtable_page(table, pde, pt_page, type);
205 }
206 
207 /*
208  * In PD level,
209  * type: MR_MODIFY
210  * modify [vaddr_start, vaddr_end) memory type or page access right.
211  * type: MR_DEL
212  * delete [vaddr_start, vaddr_end) MT PT mapping
213  */
modify_or_del_pde(uint64_t * pdpte,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)214 static void modify_or_del_pde(uint64_t *pdpte, uint64_t vaddr_start, uint64_t vaddr_end,
215 		uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
216 {
217 	uint64_t *pd_page = pdpte_page_vaddr(*pdpte);
218 	uint64_t vaddr = vaddr_start;
219 	uint64_t index = pde_index(vaddr);
220 
221 	dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
222 	for (; index < PTRS_PER_PDE; index++) {
223 		uint64_t *pde = pd_page + index;
224 		uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE;
225 
226 		if (!pgentry_present(table, (*pde))) {
227 			if (type == MR_MODIFY) {
228 				pr_warn("%s, addr: 0x%lx pde is not present.\n", __func__, vaddr);
229 			}
230 		} else {
231 			if (pde_large(*pde) != 0UL) {
232 				if ((vaddr_next > vaddr_end) || (!mem_aligned_check(vaddr, PDE_SIZE))) {
233 					split_large_page(pde, IA32E_PD, vaddr, table);
234 				} else {
235 					local_modify_or_del_pte(pde, prot_set, prot_clr, type, table);
236 					if (vaddr_next < vaddr_end) {
237 						vaddr = vaddr_next;
238 						continue;
239 					}
240 					break;	/* done */
241 				}
242 			}
243 			modify_or_del_pte(pde, vaddr, vaddr_end, prot_set, prot_clr, table, type);
244 		}
245 		if (vaddr_next >= vaddr_end) {
246 			break;	/* done */
247 		}
248 		vaddr = vaddr_next;
249 	}
250 
251 	try_to_free_pgtable_page(table, pdpte, pd_page, type);
252 }
253 
254 /*
255  * In PDPT level,
256  * type: MR_MODIFY
257  * modify [vaddr_start, vaddr_end) memory type or page access right.
258  * type: MR_DEL
259  * delete [vaddr_start, vaddr_end) MT PT mapping
260  */
modify_or_del_pdpte(const uint64_t * pml4e,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)261 static void modify_or_del_pdpte(const uint64_t *pml4e, uint64_t vaddr_start, uint64_t vaddr_end,
262 		uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
263 {
264 	uint64_t *pdpt_page = pml4e_page_vaddr(*pml4e);
265 	uint64_t vaddr = vaddr_start;
266 	uint64_t index = pdpte_index(vaddr);
267 
268 	dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
269 	for (; index < PTRS_PER_PDPTE; index++) {
270 		uint64_t *pdpte = pdpt_page + index;
271 		uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE;
272 
273 		if (!pgentry_present(table, (*pdpte))) {
274 			if (type == MR_MODIFY) {
275 				pr_warn("%s, vaddr: 0x%lx pdpte is not present.\n", __func__, vaddr);
276 			}
277 		} else {
278 			if (pdpte_large(*pdpte) != 0UL) {
279 				if ((vaddr_next > vaddr_end) ||
280 						(!mem_aligned_check(vaddr, PDPTE_SIZE))) {
281 					split_large_page(pdpte, IA32E_PDPT, vaddr, table);
282 				} else {
283 					local_modify_or_del_pte(pdpte, prot_set, prot_clr, type, table);
284 					if (vaddr_next < vaddr_end) {
285 						vaddr = vaddr_next;
286 						continue;
287 					}
288 					break;	/* done */
289 				}
290 			}
291 			modify_or_del_pde(pdpte, vaddr, vaddr_end, prot_set, prot_clr, table, type);
292 		}
293 		if (vaddr_next >= vaddr_end) {
294 			break;	/* done */
295 		}
296 		vaddr = vaddr_next;
297 	}
298 }
299 
300 /**
301  * @brief Modify or delete the mappings associated with the specified address range.
302  *
303  * This function modifies the properties of an existing mapping or deletes it entirely from the page table. The input
304  * address range is specified by [vaddr_base, vaddr_base + size). It is used when changing the access permissions of a
305  * memory region or when freeing a previously mapped region. This operation is critical for dynamic memory management,
306  * allowing the system to adapt to changes in memory usage patterns or to reclaim resources.
307  *
308  * For error case behaviors:
309  * - If the 'type' is MR_MODIFY and any page referenced by the PML4E in the specified address range is not present, the
310  * function asserts that the operation is invalid.
311  * For normal case behaviors(when the error case conditions are not satisfied):
312  * - If any page referenced by the PDPTE/PDE/PTE in the specified address range is not present, there is no change to
313  * the corresponding mapping and it continues the operation.
314  * - If any PDPTE/PDE in the specified address range maps a large page and the large page address exceeds the specified
315  * address range, the function splits the large page into next level page to allow for the modification or deletion of
316  * the mappings and the execute right will be recovered by the callback function table->recover_exe_right() when a 2MB
317  * page is split to 4KB pages.
318  * - If the 'type' is MR_MODIFY, the function modifies the properties of the existing mapping to match the specified
319  * properties.
320  * - If the 'type' is MR_DEL, the function will set corresponding page table entries to point to the sanitized page.
321  *
322  * @param[inout] pml4_page A pointer to the specified PML4 table.
323  * @param[in] vaddr_base The specified input address determining the start of the input address range whose mapping
324  *                       information is to be updated.
325  *                       For hypervisor's MMU, it is the host virtual address.
326  *                       For each VM's EPT, it is the guest physical address.
327  * @param[in] size The size of the specified input address range whose mapping information is to be updated.
328  * @param[in] prot_set Bit positions representing the specified properties which need to be set.
329  *                     Bits specified by prot_clr are cleared before each bit specified by prot_set is set to 1.
330  * @param[in] prot_clr Bit positions representing the specified properties which need to be cleared.
331  *                     Bits specified by prot_clr are cleared before each bit specified by prot_set is set to 1.
332  * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
333  * @param[in] type The type of operation to perform (MR_MODIFY or MR_DEL).
334  *
335  * @return None
336  *
337  * @pre pml4_page != NULL
338  * @pre table != NULL
339  * @pre (type == MR_MODIFY) || (type == MR_DEL)
340  * @pre For x86 hypervisor, the following conditions shall be met if "type == MR_MODIFY".
341  *      - (prot_set & ~(PAGE_RW | PAGE_USER | PAGE_PWT | PAGE_PCD | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PSE | PAGE_GLOBAL
342  *      | PAGE_PAT_LARGE | PAGE_NX) == 0)
343  *      - (prot_clr & ~(PAGE_RW | PAGE_USER | PAGE_PWT | PAGE_PCD | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PSE | PAGE_GLOBAL
344  *      | PAGE_PAT_LARGE | PAGE_NX) == 0)
345  * @pre For the VM EPT mappings, the following conditions shall be met if "type == MR_MODIFY".
346  *      - (prot_set & ~(EPT_RD | EPT_WR | EPT_EXE | EPT_MT_MASK) == 0)
347  *      - (prot_set & EPT_MT_MASK) == EPT_UNCACHED || (prot_set & EPT_MT_MASK) == EPT_WC ||
348  *        (prot_set & EPT_MT_MASK) == EPT_WT || (prot_set & EPT_MT_MASK) == EPT_WP || (prot_set & EPT_MT_MASK) == EPT_WB
349  *      - (prot_clr & ~(EPT_RD | EPT_WR | EPT_EXE | EPT_MT_MASK) == 0)
350  *      - (prot_clr & EPT_MT_MASK) == EPT_UNCACHED || (prot_clr & EPT_MT_MASK) == EPT_WC ||
351  *        (prot_clr & EPT_MT_MASK) == EPT_WT || (prot_clr & EPT_MT_MASK) == EPT_WP || (prot_clr & EPT_MT_MASK) == EPT_WB
352  *
353  * @post N/A
354  *
355  * @remark N/A
356  */
pgtable_modify_or_del_map(uint64_t * pml4_page,uint64_t vaddr_base,uint64_t size,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)357 void pgtable_modify_or_del_map(uint64_t *pml4_page, uint64_t vaddr_base, uint64_t size,
358 		uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
359 {
360 	uint64_t vaddr = round_page_up(vaddr_base);
361 	uint64_t vaddr_next, vaddr_end;
362 	uint64_t *pml4e;
363 
364 	vaddr_end = vaddr + round_page_down(size);
365 	dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: 0x%lx, size: 0x%lx\n",
366 		__func__, vaddr, size);
367 
368 	while (vaddr < vaddr_end) {
369 		vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE;
370 		pml4e = pml4e_offset(pml4_page, vaddr);
371 		if ((!pgentry_present(table, (*pml4e))) && (type == MR_MODIFY)) {
372 			ASSERT(false, "invalid op, pml4e not present");
373 		} else {
374 			modify_or_del_pdpte(pml4e, vaddr, vaddr_end, prot_set, prot_clr, table, type);
375 			vaddr = vaddr_next;
376 		}
377 	}
378 }
379 
380 /*
381  * In PT level,
382  * add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
383  */
add_pte(const uint64_t * pde,uint64_t paddr_start,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot,const struct pgtable * table)384 static void add_pte(const uint64_t *pde, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
385 		uint64_t prot, const struct pgtable *table)
386 {
387 	uint64_t *pt_page = pde_page_vaddr(*pde);
388 	uint64_t vaddr = vaddr_start;
389 	uint64_t paddr = paddr_start;
390 	uint64_t index = pte_index(vaddr);
391 
392 	dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n",
393 		__func__, paddr, vaddr_start, vaddr_end);
394 	for (; index < PTRS_PER_PTE; index++) {
395 		uint64_t *pte = pt_page + index;
396 
397 		if (pgentry_present(table, (*pte))) {
398 			pr_fatal("%s, pte 0x%lx is already present!\n", __func__, vaddr);
399 		} else {
400 			set_pgentry(pte, paddr | prot, table);
401 		}
402 		paddr += PTE_SIZE;
403 		vaddr += PTE_SIZE;
404 
405 		if (vaddr >= vaddr_end) {
406 			break;	/* done */
407 		}
408 	}
409 }
410 
411 /*
412  * In PD level,
413  * add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
414  */
add_pde(const uint64_t * pdpte,uint64_t paddr_start,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot,const struct pgtable * table)415 static void add_pde(const uint64_t *pdpte, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
416 		uint64_t prot, const struct pgtable *table)
417 {
418 	uint64_t *pd_page = pdpte_page_vaddr(*pdpte);
419 	uint64_t vaddr = vaddr_start;
420 	uint64_t paddr = paddr_start;
421 	uint64_t index = pde_index(vaddr);
422 	uint64_t local_prot = prot;
423 
424 	dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n",
425 		__func__, paddr, vaddr, vaddr_end);
426 	for (; index < PTRS_PER_PDE; index++) {
427 		uint64_t *pde = pd_page + index;
428 		uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE;
429 
430 		if (pde_large(*pde) != 0UL) {
431 			pr_fatal("%s, pde 0x%lx is already present!\n", __func__, vaddr);
432 		} else {
433 			if (!pgentry_present(table, (*pde))) {
434 				if (table->large_page_support(IA32E_PD, prot) &&
435 					mem_aligned_check(paddr, PDE_SIZE) &&
436 					mem_aligned_check(vaddr, PDE_SIZE) &&
437 					(vaddr_next <= vaddr_end)) {
438 					table->tweak_exe_right(&local_prot);
439 					set_pgentry(pde, paddr | (local_prot | PAGE_PSE), table);
440 					if (vaddr_next < vaddr_end) {
441 						paddr += (vaddr_next - vaddr);
442 						vaddr = vaddr_next;
443 						continue;
444 					}
445 					break;	/* done */
446 				} else {
447 					void *pt_page = alloc_page(table->pool);
448 					construct_pgentry(pde, pt_page, table->default_access_right, table);
449 				}
450 			}
451 			add_pte(pde, paddr, vaddr, vaddr_end, prot, table);
452 		}
453 		if (vaddr_next >= vaddr_end) {
454 			break;	/* done */
455 		}
456 		paddr += (vaddr_next - vaddr);
457 		vaddr = vaddr_next;
458 	}
459 }
460 
461 /*
462  * In PDPT level,
463  * add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
464  */
add_pdpte(const uint64_t * pml4e,uint64_t paddr_start,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot,const struct pgtable * table)465 static void add_pdpte(const uint64_t *pml4e, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
466 		uint64_t prot, const struct pgtable *table)
467 {
468 	uint64_t *pdpt_page = pml4e_page_vaddr(*pml4e);
469 	uint64_t vaddr = vaddr_start;
470 	uint64_t paddr = paddr_start;
471 	uint64_t index = pdpte_index(vaddr);
472 	uint64_t local_prot = prot;
473 
474 	dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n", __func__, paddr, vaddr, vaddr_end);
475 	for (; index < PTRS_PER_PDPTE; index++) {
476 		uint64_t *pdpte = pdpt_page + index;
477 		uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE;
478 
479 		if (pdpte_large(*pdpte) != 0UL) {
480 			pr_fatal("%s, pdpte 0x%lx is already present!\n", __func__, vaddr);
481 		} else {
482 			if (!pgentry_present(table, (*pdpte))) {
483 				if (table->large_page_support(IA32E_PDPT, prot) &&
484 					mem_aligned_check(paddr, PDPTE_SIZE) &&
485 					mem_aligned_check(vaddr, PDPTE_SIZE) &&
486 					(vaddr_next <= vaddr_end)) {
487 					table->tweak_exe_right(&local_prot);
488 					set_pgentry(pdpte, paddr | (local_prot | PAGE_PSE), table);
489 					if (vaddr_next < vaddr_end) {
490 						paddr += (vaddr_next - vaddr);
491 						vaddr = vaddr_next;
492 						continue;
493 					}
494 					break;	/* done */
495 				} else {
496 					void *pd_page = alloc_page(table->pool);
497 					construct_pgentry(pdpte, pd_page, table->default_access_right, table);
498 				}
499 			}
500 			add_pde(pdpte, paddr, vaddr, vaddr_end, prot, table);
501 		}
502 		if (vaddr_next >= vaddr_end) {
503 			break;	/* done */
504 		}
505 		paddr += (vaddr_next - vaddr);
506 		vaddr = vaddr_next;
507 	}
508 }
509 
510 /**
511  * @brief Add new page table mappings.
512  *
513  * This function maps a virtual address range specified by [vaddr_base, vaddr_base + size) to a physical address range
514  * starting from 'paddr_base'.
515  *
516  * - If any subrange within [vaddr_base, vaddr_base + size) is already mapped, there is no change to the corresponding
517  * mapping and it continues the operation.
518  * - When a new 1GB or 2MB mapping is established, the callback function table->tweak_exe_right() is invoked to tweak
519  * the execution bit.
520  * - When a new page table referenced by a new PDPTE/PDE is created, all entries in the page table are initialized to
521  * point to the sanitized page by default.
522  * - Finally, the new mappings are established and initialized according to the specified address range and properties.
523  *
524  * @param[inout] pml4_page A pointer to the specified PML4 table hierarchy.
525  * @param[in] paddr_base The specified physical address determining the start of the physical memory region.
526  *                       It is the host physical address.
527  * @param[in] vaddr_base The specified input address determining the start of the input address space.
528  *                       For hypervisor's MMU, it is the host virtual address.
529  *                       For each VM's EPT, it is the guest physical address.
530  * @param[in] size The size of the specified input address space.
531  * @param[in] prot Bit positions representing the specified properties which need to be set.
532  * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
533  *
534  * @return None
535  *
536  * @pre pml4_page != NULL
537  * @pre Any subrange within [vaddr_base, vaddr_base + size) shall already be unmapped.
538  * @pre For x86 hypervisor mapping, the following condition shall be met.
539  *      - prot & ~(PAGE_PRESENT| PAGE_RW | PAGE_USER | PAGE_PWT | PAGE_PCD | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PSE |
540  *      PAGE_GLOBAL | PAGE_PAT_LARGE | PAGE_NX) == 0
541  * @pre For VM EPT mapping, the following conditions shall be met.
542  *      - prot & ~(EPT_RD | EPT_WR | EPT_EXE | EPT_MT_MASK | EPT_IGNORE_PAT) == 0
543  *      - (prot & EPT_MT_MASK) == EPT_UNCACHED || (prot & EPT_MT_MASK) == EPT_WC || (prot & EPT_MT_MASK) == EPT_WT ||
544  *        (prot & EPT_MT_MASK) == EPT_WP || (prot & EPT_MT_MASK) == EPT_WB
545  * @pre table != NULL
546  *
547  * @post N/A
548  *
549  * @remark N/A
550  */
pgtable_add_map(uint64_t * pml4_page,uint64_t paddr_base,uint64_t vaddr_base,uint64_t size,uint64_t prot,const struct pgtable * table)551 void pgtable_add_map(uint64_t *pml4_page, uint64_t paddr_base, uint64_t vaddr_base,
552 		uint64_t size, uint64_t prot, const struct pgtable *table)
553 {
554 	uint64_t vaddr, vaddr_next, vaddr_end;
555 	uint64_t paddr;
556 	uint64_t *pml4e;
557 
558 	dev_dbg(DBG_LEVEL_MMU, "%s, paddr 0x%lx, vaddr 0x%lx, size 0x%lx\n", __func__, paddr_base, vaddr_base, size);
559 
560 	/* align address to page size*/
561 	vaddr = round_page_up(vaddr_base);
562 	paddr = round_page_up(paddr_base);
563 	vaddr_end = vaddr + round_page_down(size);
564 
565 	while (vaddr < vaddr_end) {
566 		vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE;
567 		pml4e = pml4e_offset(pml4_page, vaddr);
568 		if (!pgentry_present(table, (*pml4e))) {
569 			void *pdpt_page = alloc_page(table->pool);
570 			construct_pgentry(pml4e, pdpt_page, table->default_access_right, table);
571 		}
572 		add_pdpte(pml4e, paddr, vaddr, vaddr_end, prot, table);
573 
574 		paddr += (vaddr_next - vaddr);
575 		vaddr = vaddr_next;
576 	}
577 }
578 
579 /**
580  * @brief Create a new root page table.
581  *
582  * This function initializes and returns a new root page table. It is typically used during the setup of a new execution
583  * context, such as initializing a hypervisor PML4 table or creating a virtual machine. The root page table is essential
584  * for defining the virtual memory layout for the context.
585  *
586  * It creates a new root page table and every entries in the page table are initialized to point to the sanitized page.
587  * Finally, the function returns the root page table pointer.
588  *
589  * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
590  *
591  * @return A pointer to the newly created root page table.
592  *
593  * @pre table != NULL
594  *
595  * @post N/A
596  */
pgtable_create_root(const struct pgtable * table)597 void *pgtable_create_root(const struct pgtable *table)
598 {
599 	uint64_t *page = (uint64_t *)alloc_page(table->pool);
600 	sanitize_pte(page, table);
601 	return page;
602 }
603 
604 /**
605  * @brief Create a root page table for Secure World.
606  *
607  * This function initializes a new root page table for Secure World. It is intended to be used during the initialization
608  * phase of Trusty, setting up isolated memory regions for secure execution. Secure world can access Normal World's
609  * memory, but Normal World cannot access Secure World's memory. The PML4T/PDPT for Secure World are separated from
610  * Normal World. PDT/PT are shared in both Secure World's EPT and Normal World's EPT. So this function copies the PDPTEs
611  * from the Normal World to the Secure World.
612  *
613  * - It creates a new root page table and every entries are initialized to point to the sanitized page by default.
614  * - The access right specified by prot_clr is cleared for Secure World PDPTEs.
615  * - Finally, the function returns the new root page table pointer.
616  *
617  * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
618  * @param[in] nworld_pml4_page A pointer to pml4 table hierarchy in Normal World.
619  * @param[in] prot_table_present Mask indicating the page referenced is present.
620  * @param[in] prot_clr Bit positions representing the specified properties which need to be cleared.
621  *
622  * @return A pointer to the newly created root page table for Secure World.
623  *
624  * @pre table != NULL
625  * @pre nworld_pml4_page != NULL
626  *
627  * @post N/A
628  */
pgtable_create_trusty_root(const struct pgtable * table,void * nworld_pml4_page,uint64_t prot_table_present,uint64_t prot_clr)629 void *pgtable_create_trusty_root(const struct pgtable *table,
630 	void *nworld_pml4_page, uint64_t prot_table_present, uint64_t prot_clr)
631 {
632 	uint16_t i;
633 	uint64_t pdpte, *dest_pdpte_p, *src_pdpte_p;
634 	uint64_t nworld_pml4e, sworld_pml4e;
635 	void *sub_table_addr, *pml4_base;
636 
637 	/* Copy PDPT entries from Normal world to Secure world
638 	 * Secure world can access Normal World's memory,
639 	 * but Normal World can not access Secure World's memory.
640 	 * The PML4/PDPT for Secure world are separated from
641 	 * Normal World. PD/PT are shared in both Secure world's EPT
642 	 * and Normal World's EPT
643 	 */
644 	pml4_base = pgtable_create_root(table);
645 
646 	/* The trusty memory is remapped to guest physical address
647 	 * of gpa_rebased to gpa_rebased + size
648 	 */
649 	sub_table_addr = alloc_page(table->pool);
650 	sworld_pml4e = hva2hpa(sub_table_addr) | prot_table_present;
651 	set_pgentry((uint64_t *)pml4_base, sworld_pml4e, table);
652 
653 	nworld_pml4e = get_pgentry((uint64_t *)nworld_pml4_page);
654 
655 	/*
656 	 * copy PTPDEs from normal world EPT to secure world EPT,
657 	 * and remove execute access attribute in these entries
658 	 */
659 	dest_pdpte_p = pml4e_page_vaddr(sworld_pml4e);
660 	src_pdpte_p = pml4e_page_vaddr(nworld_pml4e);
661 	for (i = 0U; i < (uint16_t)(PTRS_PER_PDPTE - 1UL); i++) {
662 		pdpte = get_pgentry(src_pdpte_p);
663 		if ((pdpte & prot_table_present) != 0UL) {
664 			pdpte &= ~prot_clr;
665 			set_pgentry(dest_pdpte_p, pdpte, table);
666 		}
667 		src_pdpte_p++;
668 		dest_pdpte_p++;
669 	}
670 
671 	return pml4_base;
672 }
673 
674 /**
675  * @brief Look for the paging-structure entry that contains the mapping information for the specified input address.
676  *
677  * This function looks for the paging-structure entry that contains the mapping information for the specified input
678  * address of the translation process. It is used to search the page table hierarchy for the entry corresponding to the
679  * given virtual address. The function traverses the page table hierarchy from the PML4 down to the appropriate page
680  * table level, returning the entry if found.
681  *
682  * - If specified address is mapped in the page table hierarchy, it will return a pointer to the page table entry that
683  * maps the specified address.
684  * - If the specified address is not mapped in the page table hierarchy, it will return NULL.
685  *
686  * @param[in] pml4_page A pointer to the specified PML4 table hierarchy.
687  * @param[in] addr The specified input address whose mapping information is to be searched.
688  *                 For hypervisor's MMU, it is the host virtual address.
689  *                 For each VM's EPT, it is the guest physical address.
690  * @param[out] pg_size A pointer to the size of the page controlled by the returned paging-structure entry.
691  * @param[in] table A pointer to the struct pgtable which provides the page pool and callback functions to be used when
692  *                  creating the new page.
693  *
694  * @return A pointer to the paging-structure entry that maps the specified input address.
695  *
696  * @retval non-NULL There is a paging-structure entry that contains the mapping information for the specified input
697  *                  address.
698  * @retval NULL There is no paging-structure entry that contains the mapping information for the specified input
699  *              address.
700  *
701  * @pre pml4_page != NULL
702  * @pre pg_size != NULL
703  * @pre table != NULL
704  *
705  * @post N/A
706  *
707  * @remark N/A
708  */
pgtable_lookup_entry(uint64_t * pml4_page,uint64_t addr,uint64_t * pg_size,const struct pgtable * table)709 const uint64_t *pgtable_lookup_entry(uint64_t *pml4_page, uint64_t addr, uint64_t *pg_size, const struct pgtable *table)
710 {
711 	const uint64_t *pret = NULL;
712 	bool present = true;
713 	uint64_t *pml4e, *pdpte, *pde, *pte;
714 
715 	pml4e = pml4e_offset(pml4_page, addr);
716 	present = pgentry_present(table, (*pml4e));
717 
718 	if (present) {
719 		pdpte = pdpte_offset(pml4e, addr);
720 		present = pgentry_present(table, (*pdpte));
721 		if (present) {
722 			if (pdpte_large(*pdpte) != 0UL) {
723 				*pg_size = PDPTE_SIZE;
724 				pret = pdpte;
725 			} else {
726 				pde = pde_offset(pdpte, addr);
727 				present = pgentry_present(table, (*pde));
728 				if (present) {
729 					if (pde_large(*pde) != 0UL) {
730 						*pg_size = PDE_SIZE;
731 						pret = pde;
732 					} else {
733 						pte = pte_offset(pde, addr);
734 						present = pgentry_present(table, (*pte));
735 						if (present) {
736 							*pg_size = PTE_SIZE;
737 							pret = pte;
738 						}
739 					}
740 				}
741 			}
742 		}
743 	}
744 
745 	return pret;
746 }
747 
748 /**
749  * @}
750  */