1 /*
2 * Copyright (C) 2018-2024 Intel Corporation.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #include <types.h>
8 #include <util.h>
9 #include <acrn_hv_defs.h>
10 #include <asm/page.h>
11 #include <asm/mmu.h>
12 #include <logmsg.h>
13
14 /**
15 * @addtogroup hwmgmt_page
16 *
17 * @{
18 */
19
20 /**
21 * @file
22 * @brief Implementation page table management.
23 *
24 * This file implements the external APIs to establish, modify, delete, or look for the mapping information. It also
25 * defines some helper functions to implement the features that are commonly used in this file.
26 *
27 */
28
29 #define DBG_LEVEL_MMU 6U
30
31 /**
32 * @brief Host physical address of the sanitized page.
33 *
34 * The sanitized page is used to mitigate l1tf. This variable is used to store the host physical address of the
35 * sanitized page.
36 */
37 static uint64_t sanitized_page_hpa;
38
sanitize_pte_entry(uint64_t * ptep,const struct pgtable * table)39 static void sanitize_pte_entry(uint64_t *ptep, const struct pgtable *table)
40 {
41 set_pgentry(ptep, sanitized_page_hpa, table);
42 }
43
sanitize_pte(uint64_t * pt_page,const struct pgtable * table)44 static void sanitize_pte(uint64_t *pt_page, const struct pgtable *table)
45 {
46 uint64_t i;
47 for (i = 0UL; i < PTRS_PER_PTE; i++) {
48 sanitize_pte_entry(pt_page + i, table);
49 }
50 }
51
52 /**
53 * @brief Initializes a sanitized page.
54 *
55 * This function is responsible for initializing a sanitized page. It sets the page table entries in this sanitized page
56 * to point to the host physical address of the sanitized page itself.
57 *
58 * The static variable 'sanitized_page_hpa' will be set and the `sanitized_page` will be initialized.
59 *
60 * @param[out] sanitized_page The page to be sanitized.
61 * @param[in] hpa The host physical address that the page table entries in the sanitized page will point to.
62 *
63 * @return None
64 *
65 * @pre sanitized_page != NULL
66 * @pre ((uint64_t)sanitized_page & (PAGE_SIZE - 1)) == 0x0U
67 * @pre hpa != 0U
68 * @pre (hpa & (PAGE_SIZE - 1)) == 0x0U
69 *
70 * @post N/A
71 */
init_sanitized_page(uint64_t * sanitized_page,uint64_t hpa)72 void init_sanitized_page(uint64_t *sanitized_page, uint64_t hpa)
73 {
74 uint64_t i;
75
76 sanitized_page_hpa = hpa;
77 /* set ptep in sanitized_page point to itself */
78 for (i = 0UL; i < PTRS_PER_PTE; i++) {
79 *(sanitized_page + i) = sanitized_page_hpa;
80 }
81 }
82
try_to_free_pgtable_page(const struct pgtable * table,uint64_t * pde,uint64_t * pt_page,uint32_t type)83 static void try_to_free_pgtable_page(const struct pgtable *table,
84 uint64_t *pde, uint64_t *pt_page, uint32_t type)
85 {
86 if (type == MR_DEL) {
87 uint64_t index;
88
89 for (index = 0UL; index < PTRS_PER_PTE; index++) {
90 uint64_t *pte = pt_page + index;
91 if (pgentry_present(table, (*pte))) {
92 break;
93 }
94 }
95
96 if (index == PTRS_PER_PTE) {
97 free_page(table->pool, (void *)pt_page);
98 sanitize_pte_entry(pde, table);
99 }
100 }
101 }
102
103 /*
104 * Split a large page table into next level page table.
105 *
106 * @pre: level could only IA32E_PDPT or IA32E_PD
107 */
split_large_page(uint64_t * pte,enum _page_table_level level,__unused uint64_t vaddr,const struct pgtable * table)108 static void split_large_page(uint64_t *pte, enum _page_table_level level,
109 __unused uint64_t vaddr, const struct pgtable *table)
110 {
111 uint64_t *pbase;
112 uint64_t ref_paddr, paddr, paddrinc;
113 uint64_t i, ref_prot;
114
115 switch (level) {
116 case IA32E_PDPT:
117 ref_paddr = (*pte) & PDPTE_PFN_MASK;
118 paddrinc = PDE_SIZE;
119 ref_prot = (*pte) & ~PDPTE_PFN_MASK;
120 break;
121 default: /* IA32E_PD */
122 ref_paddr = (*pte) & PDE_PFN_MASK;
123 paddrinc = PTE_SIZE;
124 ref_prot = (*pte) & ~PDE_PFN_MASK;
125 ref_prot &= ~PAGE_PSE;
126 table->recover_exe_right(&ref_prot);
127 break;
128 }
129
130 pbase = (uint64_t *)alloc_page(table->pool);
131 dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, pbase: 0x%lx\n", __func__, ref_paddr, pbase);
132
133 paddr = ref_paddr;
134 for (i = 0UL; i < PTRS_PER_PTE; i++) {
135 set_pgentry(pbase + i, paddr | ref_prot, table);
136 paddr += paddrinc;
137 }
138
139 ref_prot = table->default_access_right;
140 set_pgentry(pte, hva2hpa((void *)pbase) | ref_prot, table);
141
142 /* TODO: flush the TLB */
143 }
144
local_modify_or_del_pte(uint64_t * pte,uint64_t prot_set,uint64_t prot_clr,uint32_t type,const struct pgtable * table)145 static inline void local_modify_or_del_pte(uint64_t *pte,
146 uint64_t prot_set, uint64_t prot_clr, uint32_t type, const struct pgtable *table)
147 {
148 if (type == MR_MODIFY) {
149 uint64_t new_pte = *pte;
150 new_pte &= ~prot_clr;
151 new_pte |= prot_set;
152 set_pgentry(pte, new_pte, table);
153 } else {
154 sanitize_pte_entry(pte, table);
155 }
156 }
157
158 /*
159 * pgentry may means pml4e/pdpte/pde
160 */
construct_pgentry(uint64_t * pde,void * pd_page,uint64_t prot,const struct pgtable * table)161 static inline void construct_pgentry(uint64_t *pde, void *pd_page, uint64_t prot, const struct pgtable *table)
162 {
163 sanitize_pte((uint64_t *)pd_page, table);
164
165 set_pgentry(pde, hva2hpa(pd_page) | prot, table);
166 }
167
168 /*
169 * In PT level,
170 * type: MR_MODIFY
171 * modify [vaddr_start, vaddr_end) memory type or page access right.
172 * type: MR_DEL
173 * delete [vaddr_start, vaddr_end) MT PT mapping
174 */
modify_or_del_pte(uint64_t * pde,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)175 static void modify_or_del_pte(uint64_t *pde, uint64_t vaddr_start, uint64_t vaddr_end,
176 uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
177 {
178 uint64_t *pt_page = pde_page_vaddr(*pde);
179 uint64_t vaddr = vaddr_start;
180 uint64_t index = pte_index(vaddr);
181
182 dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
183 for (; index < PTRS_PER_PTE; index++) {
184 uint64_t *pte = pt_page + index;
185
186 if (!pgentry_present(table, (*pte))) {
187 /*suppress warning message for low memory (< 1MBytes),as service VM
188 * will update MTTR attributes for this region by default whether it
189 * is present or not.
190 */
191 if ((type == MR_MODIFY) && (vaddr >= MEM_1M)) {
192 pr_warn("%s, vaddr: 0x%lx pte is not present.\n", __func__, vaddr);
193 }
194 } else {
195 local_modify_or_del_pte(pte, prot_set, prot_clr, type, table);
196 }
197
198 vaddr += PTE_SIZE;
199 if (vaddr >= vaddr_end) {
200 break;
201 }
202 }
203
204 try_to_free_pgtable_page(table, pde, pt_page, type);
205 }
206
207 /*
208 * In PD level,
209 * type: MR_MODIFY
210 * modify [vaddr_start, vaddr_end) memory type or page access right.
211 * type: MR_DEL
212 * delete [vaddr_start, vaddr_end) MT PT mapping
213 */
modify_or_del_pde(uint64_t * pdpte,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)214 static void modify_or_del_pde(uint64_t *pdpte, uint64_t vaddr_start, uint64_t vaddr_end,
215 uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
216 {
217 uint64_t *pd_page = pdpte_page_vaddr(*pdpte);
218 uint64_t vaddr = vaddr_start;
219 uint64_t index = pde_index(vaddr);
220
221 dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
222 for (; index < PTRS_PER_PDE; index++) {
223 uint64_t *pde = pd_page + index;
224 uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE;
225
226 if (!pgentry_present(table, (*pde))) {
227 if (type == MR_MODIFY) {
228 pr_warn("%s, addr: 0x%lx pde is not present.\n", __func__, vaddr);
229 }
230 } else {
231 if (pde_large(*pde) != 0UL) {
232 if ((vaddr_next > vaddr_end) || (!mem_aligned_check(vaddr, PDE_SIZE))) {
233 split_large_page(pde, IA32E_PD, vaddr, table);
234 } else {
235 local_modify_or_del_pte(pde, prot_set, prot_clr, type, table);
236 if (vaddr_next < vaddr_end) {
237 vaddr = vaddr_next;
238 continue;
239 }
240 break; /* done */
241 }
242 }
243 modify_or_del_pte(pde, vaddr, vaddr_end, prot_set, prot_clr, table, type);
244 }
245 if (vaddr_next >= vaddr_end) {
246 break; /* done */
247 }
248 vaddr = vaddr_next;
249 }
250
251 try_to_free_pgtable_page(table, pdpte, pd_page, type);
252 }
253
254 /*
255 * In PDPT level,
256 * type: MR_MODIFY
257 * modify [vaddr_start, vaddr_end) memory type or page access right.
258 * type: MR_DEL
259 * delete [vaddr_start, vaddr_end) MT PT mapping
260 */
modify_or_del_pdpte(const uint64_t * pml4e,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)261 static void modify_or_del_pdpte(const uint64_t *pml4e, uint64_t vaddr_start, uint64_t vaddr_end,
262 uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
263 {
264 uint64_t *pdpt_page = pml4e_page_vaddr(*pml4e);
265 uint64_t vaddr = vaddr_start;
266 uint64_t index = pdpte_index(vaddr);
267
268 dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
269 for (; index < PTRS_PER_PDPTE; index++) {
270 uint64_t *pdpte = pdpt_page + index;
271 uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE;
272
273 if (!pgentry_present(table, (*pdpte))) {
274 if (type == MR_MODIFY) {
275 pr_warn("%s, vaddr: 0x%lx pdpte is not present.\n", __func__, vaddr);
276 }
277 } else {
278 if (pdpte_large(*pdpte) != 0UL) {
279 if ((vaddr_next > vaddr_end) ||
280 (!mem_aligned_check(vaddr, PDPTE_SIZE))) {
281 split_large_page(pdpte, IA32E_PDPT, vaddr, table);
282 } else {
283 local_modify_or_del_pte(pdpte, prot_set, prot_clr, type, table);
284 if (vaddr_next < vaddr_end) {
285 vaddr = vaddr_next;
286 continue;
287 }
288 break; /* done */
289 }
290 }
291 modify_or_del_pde(pdpte, vaddr, vaddr_end, prot_set, prot_clr, table, type);
292 }
293 if (vaddr_next >= vaddr_end) {
294 break; /* done */
295 }
296 vaddr = vaddr_next;
297 }
298 }
299
300 /**
301 * @brief Modify or delete the mappings associated with the specified address range.
302 *
303 * This function modifies the properties of an existing mapping or deletes it entirely from the page table. The input
304 * address range is specified by [vaddr_base, vaddr_base + size). It is used when changing the access permissions of a
305 * memory region or when freeing a previously mapped region. This operation is critical for dynamic memory management,
306 * allowing the system to adapt to changes in memory usage patterns or to reclaim resources.
307 *
308 * For error case behaviors:
309 * - If the 'type' is MR_MODIFY and any page referenced by the PML4E in the specified address range is not present, the
310 * function asserts that the operation is invalid.
311 * For normal case behaviors(when the error case conditions are not satisfied):
312 * - If any page referenced by the PDPTE/PDE/PTE in the specified address range is not present, there is no change to
313 * the corresponding mapping and it continues the operation.
314 * - If any PDPTE/PDE in the specified address range maps a large page and the large page address exceeds the specified
315 * address range, the function splits the large page into next level page to allow for the modification or deletion of
316 * the mappings and the execute right will be recovered by the callback function table->recover_exe_right() when a 2MB
317 * page is split to 4KB pages.
318 * - If the 'type' is MR_MODIFY, the function modifies the properties of the existing mapping to match the specified
319 * properties.
320 * - If the 'type' is MR_DEL, the function will set corresponding page table entries to point to the sanitized page.
321 *
322 * @param[inout] pml4_page A pointer to the specified PML4 table.
323 * @param[in] vaddr_base The specified input address determining the start of the input address range whose mapping
324 * information is to be updated.
325 * For hypervisor's MMU, it is the host virtual address.
326 * For each VM's EPT, it is the guest physical address.
327 * @param[in] size The size of the specified input address range whose mapping information is to be updated.
328 * @param[in] prot_set Bit positions representing the specified properties which need to be set.
329 * Bits specified by prot_clr are cleared before each bit specified by prot_set is set to 1.
330 * @param[in] prot_clr Bit positions representing the specified properties which need to be cleared.
331 * Bits specified by prot_clr are cleared before each bit specified by prot_set is set to 1.
332 * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
333 * @param[in] type The type of operation to perform (MR_MODIFY or MR_DEL).
334 *
335 * @return None
336 *
337 * @pre pml4_page != NULL
338 * @pre table != NULL
339 * @pre (type == MR_MODIFY) || (type == MR_DEL)
340 * @pre For x86 hypervisor, the following conditions shall be met if "type == MR_MODIFY".
341 * - (prot_set & ~(PAGE_RW | PAGE_USER | PAGE_PWT | PAGE_PCD | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PSE | PAGE_GLOBAL
342 * | PAGE_PAT_LARGE | PAGE_NX) == 0)
343 * - (prot_clr & ~(PAGE_RW | PAGE_USER | PAGE_PWT | PAGE_PCD | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PSE | PAGE_GLOBAL
344 * | PAGE_PAT_LARGE | PAGE_NX) == 0)
345 * @pre For the VM EPT mappings, the following conditions shall be met if "type == MR_MODIFY".
346 * - (prot_set & ~(EPT_RD | EPT_WR | EPT_EXE | EPT_MT_MASK) == 0)
347 * - (prot_set & EPT_MT_MASK) == EPT_UNCACHED || (prot_set & EPT_MT_MASK) == EPT_WC ||
348 * (prot_set & EPT_MT_MASK) == EPT_WT || (prot_set & EPT_MT_MASK) == EPT_WP || (prot_set & EPT_MT_MASK) == EPT_WB
349 * - (prot_clr & ~(EPT_RD | EPT_WR | EPT_EXE | EPT_MT_MASK) == 0)
350 * - (prot_clr & EPT_MT_MASK) == EPT_UNCACHED || (prot_clr & EPT_MT_MASK) == EPT_WC ||
351 * (prot_clr & EPT_MT_MASK) == EPT_WT || (prot_clr & EPT_MT_MASK) == EPT_WP || (prot_clr & EPT_MT_MASK) == EPT_WB
352 *
353 * @post N/A
354 *
355 * @remark N/A
356 */
pgtable_modify_or_del_map(uint64_t * pml4_page,uint64_t vaddr_base,uint64_t size,uint64_t prot_set,uint64_t prot_clr,const struct pgtable * table,uint32_t type)357 void pgtable_modify_or_del_map(uint64_t *pml4_page, uint64_t vaddr_base, uint64_t size,
358 uint64_t prot_set, uint64_t prot_clr, const struct pgtable *table, uint32_t type)
359 {
360 uint64_t vaddr = round_page_up(vaddr_base);
361 uint64_t vaddr_next, vaddr_end;
362 uint64_t *pml4e;
363
364 vaddr_end = vaddr + round_page_down(size);
365 dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: 0x%lx, size: 0x%lx\n",
366 __func__, vaddr, size);
367
368 while (vaddr < vaddr_end) {
369 vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE;
370 pml4e = pml4e_offset(pml4_page, vaddr);
371 if ((!pgentry_present(table, (*pml4e))) && (type == MR_MODIFY)) {
372 ASSERT(false, "invalid op, pml4e not present");
373 } else {
374 modify_or_del_pdpte(pml4e, vaddr, vaddr_end, prot_set, prot_clr, table, type);
375 vaddr = vaddr_next;
376 }
377 }
378 }
379
380 /*
381 * In PT level,
382 * add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
383 */
add_pte(const uint64_t * pde,uint64_t paddr_start,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot,const struct pgtable * table)384 static void add_pte(const uint64_t *pde, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
385 uint64_t prot, const struct pgtable *table)
386 {
387 uint64_t *pt_page = pde_page_vaddr(*pde);
388 uint64_t vaddr = vaddr_start;
389 uint64_t paddr = paddr_start;
390 uint64_t index = pte_index(vaddr);
391
392 dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n",
393 __func__, paddr, vaddr_start, vaddr_end);
394 for (; index < PTRS_PER_PTE; index++) {
395 uint64_t *pte = pt_page + index;
396
397 if (pgentry_present(table, (*pte))) {
398 pr_fatal("%s, pte 0x%lx is already present!\n", __func__, vaddr);
399 } else {
400 set_pgentry(pte, paddr | prot, table);
401 }
402 paddr += PTE_SIZE;
403 vaddr += PTE_SIZE;
404
405 if (vaddr >= vaddr_end) {
406 break; /* done */
407 }
408 }
409 }
410
411 /*
412 * In PD level,
413 * add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
414 */
add_pde(const uint64_t * pdpte,uint64_t paddr_start,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot,const struct pgtable * table)415 static void add_pde(const uint64_t *pdpte, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
416 uint64_t prot, const struct pgtable *table)
417 {
418 uint64_t *pd_page = pdpte_page_vaddr(*pdpte);
419 uint64_t vaddr = vaddr_start;
420 uint64_t paddr = paddr_start;
421 uint64_t index = pde_index(vaddr);
422 uint64_t local_prot = prot;
423
424 dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n",
425 __func__, paddr, vaddr, vaddr_end);
426 for (; index < PTRS_PER_PDE; index++) {
427 uint64_t *pde = pd_page + index;
428 uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE;
429
430 if (pde_large(*pde) != 0UL) {
431 pr_fatal("%s, pde 0x%lx is already present!\n", __func__, vaddr);
432 } else {
433 if (!pgentry_present(table, (*pde))) {
434 if (table->large_page_support(IA32E_PD, prot) &&
435 mem_aligned_check(paddr, PDE_SIZE) &&
436 mem_aligned_check(vaddr, PDE_SIZE) &&
437 (vaddr_next <= vaddr_end)) {
438 table->tweak_exe_right(&local_prot);
439 set_pgentry(pde, paddr | (local_prot | PAGE_PSE), table);
440 if (vaddr_next < vaddr_end) {
441 paddr += (vaddr_next - vaddr);
442 vaddr = vaddr_next;
443 continue;
444 }
445 break; /* done */
446 } else {
447 void *pt_page = alloc_page(table->pool);
448 construct_pgentry(pde, pt_page, table->default_access_right, table);
449 }
450 }
451 add_pte(pde, paddr, vaddr, vaddr_end, prot, table);
452 }
453 if (vaddr_next >= vaddr_end) {
454 break; /* done */
455 }
456 paddr += (vaddr_next - vaddr);
457 vaddr = vaddr_next;
458 }
459 }
460
461 /*
462 * In PDPT level,
463 * add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
464 */
add_pdpte(const uint64_t * pml4e,uint64_t paddr_start,uint64_t vaddr_start,uint64_t vaddr_end,uint64_t prot,const struct pgtable * table)465 static void add_pdpte(const uint64_t *pml4e, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
466 uint64_t prot, const struct pgtable *table)
467 {
468 uint64_t *pdpt_page = pml4e_page_vaddr(*pml4e);
469 uint64_t vaddr = vaddr_start;
470 uint64_t paddr = paddr_start;
471 uint64_t index = pdpte_index(vaddr);
472 uint64_t local_prot = prot;
473
474 dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n", __func__, paddr, vaddr, vaddr_end);
475 for (; index < PTRS_PER_PDPTE; index++) {
476 uint64_t *pdpte = pdpt_page + index;
477 uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE;
478
479 if (pdpte_large(*pdpte) != 0UL) {
480 pr_fatal("%s, pdpte 0x%lx is already present!\n", __func__, vaddr);
481 } else {
482 if (!pgentry_present(table, (*pdpte))) {
483 if (table->large_page_support(IA32E_PDPT, prot) &&
484 mem_aligned_check(paddr, PDPTE_SIZE) &&
485 mem_aligned_check(vaddr, PDPTE_SIZE) &&
486 (vaddr_next <= vaddr_end)) {
487 table->tweak_exe_right(&local_prot);
488 set_pgentry(pdpte, paddr | (local_prot | PAGE_PSE), table);
489 if (vaddr_next < vaddr_end) {
490 paddr += (vaddr_next - vaddr);
491 vaddr = vaddr_next;
492 continue;
493 }
494 break; /* done */
495 } else {
496 void *pd_page = alloc_page(table->pool);
497 construct_pgentry(pdpte, pd_page, table->default_access_right, table);
498 }
499 }
500 add_pde(pdpte, paddr, vaddr, vaddr_end, prot, table);
501 }
502 if (vaddr_next >= vaddr_end) {
503 break; /* done */
504 }
505 paddr += (vaddr_next - vaddr);
506 vaddr = vaddr_next;
507 }
508 }
509
510 /**
511 * @brief Add new page table mappings.
512 *
513 * This function maps a virtual address range specified by [vaddr_base, vaddr_base + size) to a physical address range
514 * starting from 'paddr_base'.
515 *
516 * - If any subrange within [vaddr_base, vaddr_base + size) is already mapped, there is no change to the corresponding
517 * mapping and it continues the operation.
518 * - When a new 1GB or 2MB mapping is established, the callback function table->tweak_exe_right() is invoked to tweak
519 * the execution bit.
520 * - When a new page table referenced by a new PDPTE/PDE is created, all entries in the page table are initialized to
521 * point to the sanitized page by default.
522 * - Finally, the new mappings are established and initialized according to the specified address range and properties.
523 *
524 * @param[inout] pml4_page A pointer to the specified PML4 table hierarchy.
525 * @param[in] paddr_base The specified physical address determining the start of the physical memory region.
526 * It is the host physical address.
527 * @param[in] vaddr_base The specified input address determining the start of the input address space.
528 * For hypervisor's MMU, it is the host virtual address.
529 * For each VM's EPT, it is the guest physical address.
530 * @param[in] size The size of the specified input address space.
531 * @param[in] prot Bit positions representing the specified properties which need to be set.
532 * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
533 *
534 * @return None
535 *
536 * @pre pml4_page != NULL
537 * @pre Any subrange within [vaddr_base, vaddr_base + size) shall already be unmapped.
538 * @pre For x86 hypervisor mapping, the following condition shall be met.
539 * - prot & ~(PAGE_PRESENT| PAGE_RW | PAGE_USER | PAGE_PWT | PAGE_PCD | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PSE |
540 * PAGE_GLOBAL | PAGE_PAT_LARGE | PAGE_NX) == 0
541 * @pre For VM EPT mapping, the following conditions shall be met.
542 * - prot & ~(EPT_RD | EPT_WR | EPT_EXE | EPT_MT_MASK | EPT_IGNORE_PAT) == 0
543 * - (prot & EPT_MT_MASK) == EPT_UNCACHED || (prot & EPT_MT_MASK) == EPT_WC || (prot & EPT_MT_MASK) == EPT_WT ||
544 * (prot & EPT_MT_MASK) == EPT_WP || (prot & EPT_MT_MASK) == EPT_WB
545 * @pre table != NULL
546 *
547 * @post N/A
548 *
549 * @remark N/A
550 */
pgtable_add_map(uint64_t * pml4_page,uint64_t paddr_base,uint64_t vaddr_base,uint64_t size,uint64_t prot,const struct pgtable * table)551 void pgtable_add_map(uint64_t *pml4_page, uint64_t paddr_base, uint64_t vaddr_base,
552 uint64_t size, uint64_t prot, const struct pgtable *table)
553 {
554 uint64_t vaddr, vaddr_next, vaddr_end;
555 uint64_t paddr;
556 uint64_t *pml4e;
557
558 dev_dbg(DBG_LEVEL_MMU, "%s, paddr 0x%lx, vaddr 0x%lx, size 0x%lx\n", __func__, paddr_base, vaddr_base, size);
559
560 /* align address to page size*/
561 vaddr = round_page_up(vaddr_base);
562 paddr = round_page_up(paddr_base);
563 vaddr_end = vaddr + round_page_down(size);
564
565 while (vaddr < vaddr_end) {
566 vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE;
567 pml4e = pml4e_offset(pml4_page, vaddr);
568 if (!pgentry_present(table, (*pml4e))) {
569 void *pdpt_page = alloc_page(table->pool);
570 construct_pgentry(pml4e, pdpt_page, table->default_access_right, table);
571 }
572 add_pdpte(pml4e, paddr, vaddr, vaddr_end, prot, table);
573
574 paddr += (vaddr_next - vaddr);
575 vaddr = vaddr_next;
576 }
577 }
578
579 /**
580 * @brief Create a new root page table.
581 *
582 * This function initializes and returns a new root page table. It is typically used during the setup of a new execution
583 * context, such as initializing a hypervisor PML4 table or creating a virtual machine. The root page table is essential
584 * for defining the virtual memory layout for the context.
585 *
586 * It creates a new root page table and every entries in the page table are initialized to point to the sanitized page.
587 * Finally, the function returns the root page table pointer.
588 *
589 * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
590 *
591 * @return A pointer to the newly created root page table.
592 *
593 * @pre table != NULL
594 *
595 * @post N/A
596 */
pgtable_create_root(const struct pgtable * table)597 void *pgtable_create_root(const struct pgtable *table)
598 {
599 uint64_t *page = (uint64_t *)alloc_page(table->pool);
600 sanitize_pte(page, table);
601 return page;
602 }
603
604 /**
605 * @brief Create a root page table for Secure World.
606 *
607 * This function initializes a new root page table for Secure World. It is intended to be used during the initialization
608 * phase of Trusty, setting up isolated memory regions for secure execution. Secure world can access Normal World's
609 * memory, but Normal World cannot access Secure World's memory. The PML4T/PDPT for Secure World are separated from
610 * Normal World. PDT/PT are shared in both Secure World's EPT and Normal World's EPT. So this function copies the PDPTEs
611 * from the Normal World to the Secure World.
612 *
613 * - It creates a new root page table and every entries are initialized to point to the sanitized page by default.
614 * - The access right specified by prot_clr is cleared for Secure World PDPTEs.
615 * - Finally, the function returns the new root page table pointer.
616 *
617 * @param[in] table A pointer to the struct pgtable containing the information of the specified memory operations.
618 * @param[in] nworld_pml4_page A pointer to pml4 table hierarchy in Normal World.
619 * @param[in] prot_table_present Mask indicating the page referenced is present.
620 * @param[in] prot_clr Bit positions representing the specified properties which need to be cleared.
621 *
622 * @return A pointer to the newly created root page table for Secure World.
623 *
624 * @pre table != NULL
625 * @pre nworld_pml4_page != NULL
626 *
627 * @post N/A
628 */
pgtable_create_trusty_root(const struct pgtable * table,void * nworld_pml4_page,uint64_t prot_table_present,uint64_t prot_clr)629 void *pgtable_create_trusty_root(const struct pgtable *table,
630 void *nworld_pml4_page, uint64_t prot_table_present, uint64_t prot_clr)
631 {
632 uint16_t i;
633 uint64_t pdpte, *dest_pdpte_p, *src_pdpte_p;
634 uint64_t nworld_pml4e, sworld_pml4e;
635 void *sub_table_addr, *pml4_base;
636
637 /* Copy PDPT entries from Normal world to Secure world
638 * Secure world can access Normal World's memory,
639 * but Normal World can not access Secure World's memory.
640 * The PML4/PDPT for Secure world are separated from
641 * Normal World. PD/PT are shared in both Secure world's EPT
642 * and Normal World's EPT
643 */
644 pml4_base = pgtable_create_root(table);
645
646 /* The trusty memory is remapped to guest physical address
647 * of gpa_rebased to gpa_rebased + size
648 */
649 sub_table_addr = alloc_page(table->pool);
650 sworld_pml4e = hva2hpa(sub_table_addr) | prot_table_present;
651 set_pgentry((uint64_t *)pml4_base, sworld_pml4e, table);
652
653 nworld_pml4e = get_pgentry((uint64_t *)nworld_pml4_page);
654
655 /*
656 * copy PTPDEs from normal world EPT to secure world EPT,
657 * and remove execute access attribute in these entries
658 */
659 dest_pdpte_p = pml4e_page_vaddr(sworld_pml4e);
660 src_pdpte_p = pml4e_page_vaddr(nworld_pml4e);
661 for (i = 0U; i < (uint16_t)(PTRS_PER_PDPTE - 1UL); i++) {
662 pdpte = get_pgentry(src_pdpte_p);
663 if ((pdpte & prot_table_present) != 0UL) {
664 pdpte &= ~prot_clr;
665 set_pgentry(dest_pdpte_p, pdpte, table);
666 }
667 src_pdpte_p++;
668 dest_pdpte_p++;
669 }
670
671 return pml4_base;
672 }
673
674 /**
675 * @brief Look for the paging-structure entry that contains the mapping information for the specified input address.
676 *
677 * This function looks for the paging-structure entry that contains the mapping information for the specified input
678 * address of the translation process. It is used to search the page table hierarchy for the entry corresponding to the
679 * given virtual address. The function traverses the page table hierarchy from the PML4 down to the appropriate page
680 * table level, returning the entry if found.
681 *
682 * - If specified address is mapped in the page table hierarchy, it will return a pointer to the page table entry that
683 * maps the specified address.
684 * - If the specified address is not mapped in the page table hierarchy, it will return NULL.
685 *
686 * @param[in] pml4_page A pointer to the specified PML4 table hierarchy.
687 * @param[in] addr The specified input address whose mapping information is to be searched.
688 * For hypervisor's MMU, it is the host virtual address.
689 * For each VM's EPT, it is the guest physical address.
690 * @param[out] pg_size A pointer to the size of the page controlled by the returned paging-structure entry.
691 * @param[in] table A pointer to the struct pgtable which provides the page pool and callback functions to be used when
692 * creating the new page.
693 *
694 * @return A pointer to the paging-structure entry that maps the specified input address.
695 *
696 * @retval non-NULL There is a paging-structure entry that contains the mapping information for the specified input
697 * address.
698 * @retval NULL There is no paging-structure entry that contains the mapping information for the specified input
699 * address.
700 *
701 * @pre pml4_page != NULL
702 * @pre pg_size != NULL
703 * @pre table != NULL
704 *
705 * @post N/A
706 *
707 * @remark N/A
708 */
pgtable_lookup_entry(uint64_t * pml4_page,uint64_t addr,uint64_t * pg_size,const struct pgtable * table)709 const uint64_t *pgtable_lookup_entry(uint64_t *pml4_page, uint64_t addr, uint64_t *pg_size, const struct pgtable *table)
710 {
711 const uint64_t *pret = NULL;
712 bool present = true;
713 uint64_t *pml4e, *pdpte, *pde, *pte;
714
715 pml4e = pml4e_offset(pml4_page, addr);
716 present = pgentry_present(table, (*pml4e));
717
718 if (present) {
719 pdpte = pdpte_offset(pml4e, addr);
720 present = pgentry_present(table, (*pdpte));
721 if (present) {
722 if (pdpte_large(*pdpte) != 0UL) {
723 *pg_size = PDPTE_SIZE;
724 pret = pdpte;
725 } else {
726 pde = pde_offset(pdpte, addr);
727 present = pgentry_present(table, (*pde));
728 if (present) {
729 if (pde_large(*pde) != 0UL) {
730 *pg_size = PDE_SIZE;
731 pret = pde;
732 } else {
733 pte = pte_offset(pde, addr);
734 present = pgentry_present(table, (*pte));
735 if (present) {
736 *pg_size = PTE_SIZE;
737 pret = pte;
738 }
739 }
740 }
741 }
742 }
743 }
744
745 return pret;
746 }
747
748 /**
749 * @}
750 */