1 /*
2  * Copyright 2018 The Hafnium Authors.
3  *
4  * Use of this source code is governed by a BSD-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/BSD-3-Clause.
7  */
8 
9 #include "hf/mm.h"
10 
11 #include <stdatomic.h>
12 #include <stdint.h>
13 
14 #include "hf/arch/init.h"
15 
16 #include "hf/assert.h"
17 #include "hf/check.h"
18 #include "hf/dlog.h"
19 #include "hf/layout.h"
20 #include "hf/plat/console.h"
21 #include "hf/static_assert.h"
22 
23 /**
24  * This file has functions for managing the level 1 and 2 page tables used by
25  * Hafnium. There is a level 1 mapping used by Hafnium itself to access memory,
26  * and then a level 2 mapping per VM. The design assumes that all page tables
27  * contain only 1-1 mappings, aligned on the block boundaries.
28  */
29 
30 /*
31  * For stage 2, the input is an intermediate physical addresses rather than a
32  * virtual address so:
33  */
34 static_assert(
35 	sizeof(ptable_addr_t) == sizeof(uintpaddr_t),
36 	"Currently, the same code manages the stage 1 and stage 2 page tables "
37 	"which only works if the virtual and intermediate physical addresses "
38 	"are the same size. It looks like that assumption might not be holding "
39 	"so we need to check that everything is going to be ok.");
40 
41 static struct mm_ptable ptable;
42 static struct spinlock ptable_lock;
43 
44 static bool mm_stage2_invalidate = false;
45 
46 /**
47  * After calling this function, modifications to stage-2 page tables will use
48  * break-before-make and invalidate the TLB for the affected range.
49  */
mm_vm_enable_invalidation(void)50 void mm_vm_enable_invalidation(void)
51 {
52 	mm_stage2_invalidate = true;
53 }
54 
55 /**
56  * Get the page table from the physical address.
57  */
mm_page_table_from_pa(paddr_t pa)58 static struct mm_page_table *mm_page_table_from_pa(paddr_t pa)
59 {
60 	return ptr_from_va(va_from_pa(pa));
61 }
62 
63 /**
64  * Rounds an address down to a page boundary.
65  */
mm_round_down_to_page(ptable_addr_t addr)66 static ptable_addr_t mm_round_down_to_page(ptable_addr_t addr)
67 {
68 	return addr & ~((ptable_addr_t)(PAGE_SIZE - 1));
69 }
70 
71 /**
72  * Rounds an address up to a page boundary.
73  */
mm_round_up_to_page(ptable_addr_t addr)74 static ptable_addr_t mm_round_up_to_page(ptable_addr_t addr)
75 {
76 	return mm_round_down_to_page(addr + PAGE_SIZE - 1);
77 }
78 
79 /**
80  * Calculates the size of the address space represented by a page table entry at
81  * the given level.
82  */
mm_entry_size(uint8_t level)83 static size_t mm_entry_size(uint8_t level)
84 {
85 	return UINT64_C(1) << (PAGE_BITS + level * PAGE_LEVEL_BITS);
86 }
87 
88 /**
89  * Gets the address of the start of the next block of the given size. The size
90  * must be a power of two.
91  */
mm_start_of_next_block(ptable_addr_t addr,size_t block_size)92 static ptable_addr_t mm_start_of_next_block(ptable_addr_t addr,
93 					    size_t block_size)
94 {
95 	return (addr + block_size) & ~(block_size - 1);
96 }
97 
98 /**
99  * Gets the physical address of the start of the next block of the given size.
100  * The size must be a power of two.
101  */
mm_pa_start_of_next_block(paddr_t pa,size_t block_size)102 static paddr_t mm_pa_start_of_next_block(paddr_t pa, size_t block_size)
103 {
104 	return pa_init((pa_addr(pa) + block_size) & ~(block_size - 1));
105 }
106 
107 /**
108  * For a given address, calculates the maximum (plus one) address that can be
109  * represented by the same table at the given level.
110  */
mm_level_end(ptable_addr_t addr,uint8_t level)111 static ptable_addr_t mm_level_end(ptable_addr_t addr, uint8_t level)
112 {
113 	size_t offset = PAGE_BITS + (level + 1) * PAGE_LEVEL_BITS;
114 
115 	return ((addr >> offset) + 1) << offset;
116 }
117 
118 /**
119  * For a given address, calculates the index at which its entry is stored in a
120  * table at the given level.
121  */
mm_index(ptable_addr_t addr,uint8_t level)122 static size_t mm_index(ptable_addr_t addr, uint8_t level)
123 {
124 	ptable_addr_t v = addr >> (PAGE_BITS + level * PAGE_LEVEL_BITS);
125 
126 	return v & ((UINT64_C(1) << PAGE_LEVEL_BITS) - 1);
127 }
128 
129 /**
130  * Allocates a new page table.
131  */
mm_alloc_page_tables(size_t count,struct mpool * ppool)132 static struct mm_page_table *mm_alloc_page_tables(size_t count,
133 						  struct mpool *ppool)
134 {
135 	if (count == 1) {
136 		return mpool_alloc(ppool);
137 	}
138 
139 	return mpool_alloc_contiguous(ppool, count, count);
140 }
141 
142 /**
143  * Returns the maximum level in the page table given the flags.
144  */
mm_max_level(int flags)145 static uint8_t mm_max_level(int flags)
146 {
147 	return (flags & MM_FLAG_STAGE1) ? arch_mm_stage1_max_level()
148 					: arch_mm_stage2_max_level();
149 }
150 
151 /**
152  * Returns the number of root-level tables given the flags.
153  */
mm_root_table_count(int flags)154 static uint8_t mm_root_table_count(int flags)
155 {
156 	return (flags & MM_FLAG_STAGE1) ? arch_mm_stage1_root_table_count()
157 					: arch_mm_stage2_root_table_count();
158 }
159 
160 /**
161  * Invalidates the TLB for the given address range.
162  */
mm_invalidate_tlb(ptable_addr_t begin,ptable_addr_t end,int flags,uint16_t id,bool non_secure)163 static void mm_invalidate_tlb(ptable_addr_t begin, ptable_addr_t end, int flags,
164 			      uint16_t id, bool non_secure)
165 {
166 	if (flags & MM_FLAG_STAGE1) {
167 		arch_mm_invalidate_stage1_range(id, va_init(begin),
168 						va_init(end));
169 	} else {
170 		arch_mm_invalidate_stage2_range(id, ipa_init(begin),
171 						ipa_init(end), non_secure);
172 	}
173 }
174 
175 /**
176  * Frees all page-table-related memory associated with the given pte at the
177  * given level, including any subtables recursively.
178  */
179 // NOLINTNEXTLINE(misc-no-recursion)
mm_free_page_pte(pte_t pte,uint8_t level,struct mpool * ppool)180 static void mm_free_page_pte(pte_t pte, uint8_t level, struct mpool *ppool)
181 {
182 	struct mm_page_table *table;
183 	uint64_t i;
184 
185 	if (!arch_mm_pte_is_table(pte, level)) {
186 		return;
187 	}
188 
189 	/* Recursively free any subtables. */
190 	table = mm_page_table_from_pa(arch_mm_table_from_pte(pte, level));
191 	for (i = 0; i < MM_PTE_PER_PAGE; ++i) {
192 		mm_free_page_pte(table->entries[i], level - 1, ppool);
193 	}
194 
195 	/* Free the table itself. */
196 	mpool_free(ppool, table);
197 }
198 
199 /**
200  * Returns the first address which cannot be encoded in page tables given by
201  * `flags`. It is the exclusive end of the address space created by the tables.
202  */
mm_ptable_addr_space_end(int flags)203 ptable_addr_t mm_ptable_addr_space_end(int flags)
204 {
205 	return mm_root_table_count(flags) *
206 	       mm_entry_size(mm_max_level(flags) + 1);
207 }
208 
209 /**
210  * Initialises the given page table.
211  */
mm_ptable_init(struct mm_ptable * t,uint16_t id,int flags,struct mpool * ppool)212 bool mm_ptable_init(struct mm_ptable *t, uint16_t id, int flags,
213 		    struct mpool *ppool)
214 {
215 	uint8_t i;
216 	size_t j;
217 	struct mm_page_table *tables;
218 	uint8_t root_table_count = mm_root_table_count(flags);
219 
220 	tables = mm_alloc_page_tables(root_table_count, ppool);
221 	if (tables == NULL) {
222 		return false;
223 	}
224 
225 	for (i = 0; i < root_table_count; i++) {
226 		for (j = 0; j < MM_PTE_PER_PAGE; j++) {
227 			tables[i].entries[j] =
228 				arch_mm_absent_pte(mm_max_level(flags));
229 		}
230 	}
231 
232 	/*
233 	 * TODO: halloc could return a virtual or physical address if mm not
234 	 * enabled?
235 	 */
236 	t->root = pa_init((uintpaddr_t)tables);
237 	t->id = id;
238 	return true;
239 }
240 
241 /**
242  * Frees all memory associated with the give page table.
243  */
mm_ptable_fini(struct mm_ptable * t,int flags,struct mpool * ppool)244 static void mm_ptable_fini(struct mm_ptable *t, int flags, struct mpool *ppool)
245 {
246 	struct mm_page_table *tables = mm_page_table_from_pa(t->root);
247 	uint8_t level = mm_max_level(flags);
248 	uint8_t root_table_count = mm_root_table_count(flags);
249 	uint8_t i;
250 	uint64_t j;
251 
252 	for (i = 0; i < root_table_count; ++i) {
253 		for (j = 0; j < MM_PTE_PER_PAGE; ++j) {
254 			mm_free_page_pte(tables[i].entries[j], level, ppool);
255 		}
256 	}
257 
258 	mpool_add_chunk(ppool, tables,
259 			sizeof(struct mm_page_table) * root_table_count);
260 }
261 
262 /**
263  * Replaces a page table entry with the given value. If both old and new values
264  * are valid, it performs a break-before-make sequence where it first writes an
265  * invalid value to the PTE, flushes the TLB, then writes the actual new value.
266  * This is to prevent cases where CPUs have different 'valid' values in their
267  * TLBs, which may result in issues for example in cache coherency.
268  */
mm_replace_entry(ptable_addr_t begin,pte_t * pte,pte_t new_pte,uint8_t level,int flags,struct mpool * ppool,uint16_t id,bool non_secure)269 static void mm_replace_entry(ptable_addr_t begin, pte_t *pte, pte_t new_pte,
270 			     uint8_t level, int flags, struct mpool *ppool,
271 			     uint16_t id, bool non_secure)
272 {
273 	pte_t v = *pte;
274 
275 	/*
276 	 * We need to do the break-before-make sequence if both values are
277 	 * present and the TLB is being invalidated.
278 	 */
279 	if (((flags & MM_FLAG_STAGE1) || mm_stage2_invalidate) &&
280 	    arch_mm_pte_is_valid(v, level)) {
281 		*pte = arch_mm_absent_pte(level);
282 		mm_invalidate_tlb(begin, begin + mm_entry_size(level), flags,
283 				  id, non_secure);
284 	}
285 
286 	/* Assign the new pte. */
287 	*pte = new_pte;
288 
289 	/* Free pages that aren't in use anymore. */
290 	mm_free_page_pte(v, level, ppool);
291 }
292 
293 /**
294  * Populates the provided page table entry with a reference to another table if
295  * needed, that is, if it does not yet point to another table.
296  *
297  * Returns a pointer to the table the entry now points to.
298  */
mm_populate_table_pte(ptable_addr_t begin,pte_t * pte,uint8_t level,int flags,struct mpool * ppool,uint16_t id,bool non_secure)299 static struct mm_page_table *mm_populate_table_pte(ptable_addr_t begin,
300 						   pte_t *pte, uint8_t level,
301 						   int flags,
302 						   struct mpool *ppool,
303 						   uint16_t id, bool non_secure)
304 {
305 	struct mm_page_table *ntable;
306 	pte_t v = *pte;
307 	pte_t new_pte;
308 	size_t i;
309 	size_t inc;
310 	uint8_t level_below = level - 1;
311 
312 	/* Just return pointer to table if it's already populated. */
313 	if (arch_mm_pte_is_table(v, level)) {
314 		return mm_page_table_from_pa(arch_mm_table_from_pte(v, level));
315 	}
316 
317 	/* Allocate a new table. */
318 	ntable = mm_alloc_page_tables(1, ppool);
319 	if (ntable == NULL) {
320 		dlog_error("Failed to allocate memory for page table\n");
321 		return NULL;
322 	}
323 
324 	/* Determine template for new pte and its increment. */
325 	if (arch_mm_pte_is_block(v, level)) {
326 		inc = mm_entry_size(level_below);
327 		new_pte = arch_mm_block_pte(level_below,
328 					    arch_mm_block_from_pte(v, level),
329 					    arch_mm_pte_attrs(v, level));
330 	} else {
331 		inc = 0;
332 		new_pte = arch_mm_absent_pte(level_below);
333 	}
334 
335 	/* Initialise entries in the new table. */
336 	for (i = 0; i < MM_PTE_PER_PAGE; i++) {
337 		ntable->entries[i] = new_pte;
338 		new_pte += inc;
339 	}
340 
341 	/* Ensure initialisation is visible before updating the pte. */
342 	atomic_thread_fence(memory_order_release);
343 
344 	/* Replace the pte entry, doing a break-before-make if needed. */
345 	mm_replace_entry(begin, pte,
346 			 arch_mm_table_pte(level, pa_init((uintpaddr_t)ntable)),
347 			 level, flags, ppool, id, non_secure);
348 
349 	return ntable;
350 }
351 
352 /**
353  * Updates the page table at the given level to map the given address range to a
354  * physical range using the provided (architecture-specific) attributes. Or if
355  * MM_FLAG_UNMAP is set, unmap the given range instead.
356  *
357  * This function calls itself recursively if it needs to update additional
358  * levels, but the recursion is bound by the maximum number of levels in a page
359  * table.
360  */
361 // NOLINTNEXTLINE(misc-no-recursion)
mm_map_level(ptable_addr_t begin,ptable_addr_t end,paddr_t pa,uint64_t attrs,struct mm_page_table * table,uint8_t level,int flags,struct mpool * ppool,uint16_t id)362 static bool mm_map_level(ptable_addr_t begin, ptable_addr_t end, paddr_t pa,
363 			 uint64_t attrs, struct mm_page_table *table,
364 			 uint8_t level, int flags, struct mpool *ppool,
365 			 uint16_t id)
366 {
367 	pte_t *pte = &table->entries[mm_index(begin, level)];
368 	ptable_addr_t level_end = mm_level_end(begin, level);
369 	size_t entry_size = mm_entry_size(level);
370 	bool commit = flags & MM_FLAG_COMMIT;
371 	bool unmap = flags & MM_FLAG_UNMAP;
372 
373 	/* Cap end so that we don't go over the current level max. */
374 	if (end > level_end) {
375 		end = level_end;
376 	}
377 
378 	/* Fill each entry in the table. */
379 	while (begin < end) {
380 		if (unmap ? !arch_mm_pte_is_present(*pte, level)
381 			  : arch_mm_pte_is_block(*pte, level) &&
382 				    arch_mm_pte_attrs(*pte, level) == attrs) {
383 			/*
384 			 * If the entry is already mapped with the right
385 			 * attributes, or already absent in the case of
386 			 * unmapping, no need to do anything; carry on to the
387 			 * next entry.
388 			 */
389 		} else if ((end - begin) >= entry_size &&
390 			   (unmap || arch_mm_is_block_allowed(level)) &&
391 			   (begin & (entry_size - 1)) == 0) {
392 			/*
393 			 * If the entire entry is within the region we want to
394 			 * map, map/unmap the whole entry.
395 			 */
396 			if (commit) {
397 				pte_t new_pte =
398 					unmap ? arch_mm_absent_pte(level)
399 					      : arch_mm_block_pte(level, pa,
400 								  attrs);
401 				mm_replace_entry(begin, pte, new_pte, level,
402 						 flags, ppool, id,
403 						 (attrs & (1ULL << 57)) != 0);
404 			}
405 		} else {
406 			/*
407 			 * If the entry is already a subtable get it; otherwise
408 			 * replace it with an equivalent subtable and get that.
409 			 */
410 			struct mm_page_table *nt = mm_populate_table_pte(
411 				begin, pte, level, flags, ppool, id,
412 				(attrs & (1ULL << 57)) != 0);
413 			if (nt == NULL) {
414 				return false;
415 			}
416 
417 			/*
418 			 * Recurse to map/unmap the appropriate entries within
419 			 * the subtable.
420 			 */
421 			if (!mm_map_level(begin, end, pa, attrs, nt, level - 1,
422 					  flags, ppool, id)) {
423 				return false;
424 			}
425 		}
426 
427 		begin = mm_start_of_next_block(begin, entry_size);
428 		pa = mm_pa_start_of_next_block(pa, entry_size);
429 		pte++;
430 	}
431 
432 	return true;
433 }
434 
435 /**
436  * Updates the page table from the root to map the given address range to a
437  * physical range using the provided (architecture-specific) attributes. Or if
438  * MM_FLAG_UNMAP is set, unmap the given range instead.
439  */
mm_map_root(struct mm_ptable * t,ptable_addr_t begin,ptable_addr_t end,uint64_t attrs,uint8_t root_level,int flags,struct mpool * ppool)440 static bool mm_map_root(struct mm_ptable *t, ptable_addr_t begin,
441 			ptable_addr_t end, uint64_t attrs, uint8_t root_level,
442 			int flags, struct mpool *ppool)
443 {
444 	size_t root_table_size = mm_entry_size(root_level);
445 	struct mm_page_table *table =
446 		&mm_page_table_from_pa(t->root)[mm_index(begin, root_level)];
447 
448 	while (begin < end) {
449 		if (!mm_map_level(begin, end, pa_init(begin), attrs, table,
450 				  root_level - 1, flags, ppool, t->id)) {
451 			return false;
452 		}
453 		begin = mm_start_of_next_block(begin, root_table_size);
454 		table++;
455 	}
456 
457 	return true;
458 }
459 
460 /**
461  * Updates the given table such that the given physical address range is mapped
462  * or not mapped into the address space with the architecture-agnostic mode
463  * provided. Only commits the change if MM_FLAG_COMMIT is set.
464  */
mm_ptable_identity_map(struct mm_ptable * t,paddr_t pa_begin,paddr_t pa_end,uint64_t attrs,int flags,struct mpool * ppool)465 static bool mm_ptable_identity_map(struct mm_ptable *t, paddr_t pa_begin,
466 				   paddr_t pa_end, uint64_t attrs, int flags,
467 				   struct mpool *ppool)
468 {
469 	uint8_t root_level = mm_max_level(flags) + 1;
470 	ptable_addr_t ptable_end = mm_ptable_addr_space_end(flags);
471 	ptable_addr_t end = mm_round_up_to_page(pa_addr(pa_end));
472 	ptable_addr_t begin = pa_addr(arch_mm_clear_pa(pa_begin));
473 
474 	/*
475 	 * Assert condition to communicate the API constraint of mm_max_level(),
476 	 * that isn't encoded in the types, to the static analyzer.
477 	 */
478 	assert(root_level >= 2);
479 
480 	/* Cap end to stay within the bounds of the page table. */
481 	if (end > ptable_end) {
482 		end = ptable_end;
483 	}
484 
485 	if (!mm_map_root(t, begin, end, attrs, root_level, flags, ppool)) {
486 		return false;
487 	}
488 
489 	/*
490 	 * All TLB invalidations must be complete already if any entries were
491 	 * replaced by mm_replace_entry. Sync all page table writes so that code
492 	 * following this can use them.
493 	 */
494 	arch_mm_sync_table_writes();
495 
496 	return true;
497 }
498 
499 /*
500  * Prepares the given page table for the given address mapping such that it
501  * will be able to commit the change without failure. It does so by ensuring
502  * the smallest granularity needed is available. This remains valid provided
503  * subsequent operations do not decrease the granularity.
504  *
505  * In particular, multiple calls to this function will result in the
506  * corresponding calls to commit the changes to succeed.
507  */
mm_ptable_identity_prepare(struct mm_ptable * t,paddr_t pa_begin,paddr_t pa_end,uint64_t attrs,int flags,struct mpool * ppool)508 static bool mm_ptable_identity_prepare(struct mm_ptable *t, paddr_t pa_begin,
509 				       paddr_t pa_end, uint64_t attrs,
510 				       int flags, struct mpool *ppool)
511 {
512 	flags &= ~MM_FLAG_COMMIT;
513 	return mm_ptable_identity_map(t, pa_begin, pa_end, attrs, flags, ppool);
514 }
515 
516 /**
517  * Commits the given address mapping to the page table assuming the operation
518  * cannot fail. `mm_ptable_identity_prepare` must used correctly before this to
519  * ensure this condition.
520  *
521  * Without the table being properly prepared, the commit may only partially
522  * complete if it runs out of memory resulting in an inconsistent state that
523  * isn't handled.
524  *
525  * Since the non-failure assumtion is used in the reasoning about the atomicity
526  * of higher level memory operations, any detected violations result in a panic.
527  *
528  * TODO: remove ppool argument to be sure no changes are made.
529  */
mm_ptable_identity_commit(struct mm_ptable * t,paddr_t pa_begin,paddr_t pa_end,uint64_t attrs,int flags,struct mpool * ppool)530 static void mm_ptable_identity_commit(struct mm_ptable *t, paddr_t pa_begin,
531 				      paddr_t pa_end, uint64_t attrs, int flags,
532 				      struct mpool *ppool)
533 {
534 	CHECK(mm_ptable_identity_map(t, pa_begin, pa_end, attrs,
535 				     flags | MM_FLAG_COMMIT, ppool));
536 }
537 
538 /**
539  * Updates the given table such that the given physical address range is mapped
540  * or not mapped into the address space with the architecture-agnostic mode
541  * provided.
542  *
543  * The page table is updated using the separate prepare and commit stages so
544  * that, on failure, a partial update of the address space cannot happen. The
545  * table may be left with extra internal tables but the address space is
546  * unchanged.
547  */
mm_ptable_identity_update(struct mm_ptable * t,paddr_t pa_begin,paddr_t pa_end,uint64_t attrs,int flags,struct mpool * ppool)548 static bool mm_ptable_identity_update(struct mm_ptable *t, paddr_t pa_begin,
549 				      paddr_t pa_end, uint64_t attrs, int flags,
550 				      struct mpool *ppool)
551 {
552 	if (!mm_ptable_identity_prepare(t, pa_begin, pa_end, attrs, flags,
553 					ppool)) {
554 		return false;
555 	}
556 
557 	mm_ptable_identity_commit(t, pa_begin, pa_end, attrs, flags, ppool);
558 
559 	return true;
560 }
561 
562 /**
563  * Writes the given table to the debug log, calling itself recursively to
564  * write sub-tables.
565  */
566 // NOLINTNEXTLINE(misc-no-recursion)
mm_dump_table_recursive(struct mm_page_table * table,uint8_t level,int max_level)567 static void mm_dump_table_recursive(struct mm_page_table *table, uint8_t level,
568 				    int max_level)
569 {
570 	uint64_t i;
571 
572 	for (i = 0; i < MM_PTE_PER_PAGE; i++) {
573 		if (!arch_mm_pte_is_present(table->entries[i], level)) {
574 			continue;
575 		}
576 
577 		dlog("%*s%lx: %lx\n", 4 * (max_level - level), "", i,
578 		     table->entries[i]);
579 
580 		if (arch_mm_pte_is_table(table->entries[i], level)) {
581 			mm_dump_table_recursive(
582 				mm_page_table_from_pa(arch_mm_table_from_pte(
583 					table->entries[i], level)),
584 				level - 1, max_level);
585 		}
586 	}
587 }
588 
589 /**
590  * Writes the given table to the debug log.
591  */
mm_ptable_dump(struct mm_ptable * t,int flags)592 static void mm_ptable_dump(struct mm_ptable *t, int flags)
593 {
594 	struct mm_page_table *tables = mm_page_table_from_pa(t->root);
595 	uint8_t max_level = mm_max_level(flags);
596 	uint8_t root_table_count = mm_root_table_count(flags);
597 	uint8_t i;
598 
599 	for (i = 0; i < root_table_count; ++i) {
600 		mm_dump_table_recursive(&tables[i], max_level, max_level);
601 	}
602 }
603 
604 /**
605  * Given the table PTE entries all have identical attributes, returns the single
606  * entry with which it can be replaced.
607  */
mm_merge_table_pte(pte_t table_pte,uint8_t level)608 static pte_t mm_merge_table_pte(pte_t table_pte, uint8_t level)
609 {
610 	struct mm_page_table *table;
611 	uint64_t block_attrs;
612 	uint64_t table_attrs;
613 	uint64_t combined_attrs;
614 	paddr_t block_address;
615 
616 	table = mm_page_table_from_pa(arch_mm_table_from_pte(table_pte, level));
617 
618 	if (!arch_mm_pte_is_present(table->entries[0], level - 1)) {
619 		return arch_mm_absent_pte(level);
620 	}
621 
622 	/* Might not be possible to merge the table into a single block. */
623 	if (!arch_mm_is_block_allowed(level)) {
624 		return table_pte;
625 	}
626 
627 	/* Replace table with a single block, with equivalent attributes. */
628 	block_attrs = arch_mm_pte_attrs(table->entries[0], level - 1);
629 	table_attrs = arch_mm_pte_attrs(table_pte, level);
630 	combined_attrs =
631 		arch_mm_combine_table_entry_attrs(table_attrs, block_attrs);
632 	block_address = arch_mm_block_from_pte(table->entries[0], level - 1);
633 
634 	return arch_mm_block_pte(level, block_address, combined_attrs);
635 }
636 
637 /**
638  * Defragments the given PTE by recursively replacing any tables with blocks or
639  * absent entries where possible.
640  */
641 // NOLINTNEXTLINE(misc-no-recursion)
mm_ptable_defrag_entry(ptable_addr_t base_addr,pte_t * entry,uint8_t level,int flags,struct mpool * ppool,uint16_t id,bool non_secure)642 static void mm_ptable_defrag_entry(ptable_addr_t base_addr, pte_t *entry,
643 				   uint8_t level, int flags,
644 				   struct mpool *ppool, uint16_t id,
645 				   bool non_secure)
646 {
647 	struct mm_page_table *table;
648 	uint64_t i;
649 	bool mergeable;
650 	bool base_present;
651 	uint64_t base_attrs;
652 	pte_t new_entry;
653 
654 	if (!arch_mm_pte_is_table(*entry, level)) {
655 		return;
656 	}
657 
658 	table = mm_page_table_from_pa(arch_mm_table_from_pte(*entry, level));
659 
660 	/* Defrag the first entry in the table and use it as the base entry. */
661 	static_assert(MM_PTE_PER_PAGE >= 1, "There must be at least one PTE.");
662 
663 	mm_ptable_defrag_entry(base_addr, &(table->entries[0]), level - 1,
664 			       flags, ppool, id, non_secure);
665 
666 	base_present = arch_mm_pte_is_present(table->entries[0], level - 1);
667 	base_attrs = arch_mm_pte_attrs(table->entries[0], level - 1);
668 
669 	/*
670 	 * Defrag the remaining entries in the table and check whether they are
671 	 * compatible with the base entry meaning the table can be merged into a
672 	 * block entry. It assumes addresses are contiguous due to identity
673 	 * mapping.
674 	 */
675 	mergeable = true;
676 	for (i = 1; i < MM_PTE_PER_PAGE; ++i) {
677 		bool present;
678 		ptable_addr_t block_addr =
679 			base_addr + (i * mm_entry_size(level - 1));
680 
681 		mm_ptable_defrag_entry(block_addr, &(table->entries[i]),
682 				       level - 1, flags, ppool, id, non_secure);
683 
684 		present = arch_mm_pte_is_present(table->entries[i], level - 1);
685 
686 		if (present != base_present) {
687 			mergeable = false;
688 			continue;
689 		}
690 
691 		if (!present) {
692 			continue;
693 		}
694 
695 		if (!arch_mm_pte_is_block(table->entries[i], level - 1)) {
696 			mergeable = false;
697 			continue;
698 		}
699 
700 		if (arch_mm_pte_attrs(table->entries[i], level - 1) !=
701 		    base_attrs) {
702 			mergeable = false;
703 			continue;
704 		}
705 	}
706 
707 	if (!mergeable) {
708 		return;
709 	}
710 
711 	new_entry = mm_merge_table_pte(*entry, level);
712 	if (*entry != new_entry) {
713 		mm_replace_entry(base_addr, entry, (uintptr_t)new_entry, level,
714 				 flags, ppool, id, non_secure);
715 	}
716 }
717 
718 /**
719  * Defragments the given page table by converting page table references to
720  * blocks whenever possible.
721  */
mm_ptable_defrag(struct mm_ptable * t,int flags,struct mpool * ppool,bool non_secure)722 static void mm_ptable_defrag(struct mm_ptable *t, int flags,
723 			     struct mpool *ppool, bool non_secure)
724 {
725 	struct mm_page_table *tables = mm_page_table_from_pa(t->root);
726 	uint8_t level = mm_max_level(flags);
727 	uint8_t root_table_count = mm_root_table_count(flags);
728 	uint8_t i;
729 	uint64_t j;
730 	ptable_addr_t block_addr = 0;
731 
732 	/*
733 	 * Loop through each entry in the table. If it points to another table,
734 	 * check if that table can be replaced by a block or an absent entry.
735 	 */
736 	for (i = 0; i < root_table_count; ++i) {
737 		for (j = 0; j < MM_PTE_PER_PAGE; ++j) {
738 			mm_ptable_defrag_entry(block_addr,
739 					       &(tables[i].entries[j]), level,
740 					       flags, ppool, t->id, non_secure);
741 			block_addr = mm_start_of_next_block(
742 				block_addr, mm_entry_size(level));
743 		}
744 	}
745 
746 	arch_mm_sync_table_writes();
747 }
748 
749 /**
750  * Gets the attributes applied to the given range of stage-2 addresses at the
751  * given level.
752  *
753  * The `got_attrs` argument is initially passed as false until `attrs` contains
754  * attributes of the memory region at which point it is passed as true.
755  *
756  * The value returned in `attrs` is only valid if the function returns true.
757  *
758  * Returns true if the whole range has the same attributes and false otherwise.
759  */
760 // NOLINTNEXTLINE(misc-no-recursion)
mm_ptable_get_attrs_level(struct mm_page_table * table,ptable_addr_t begin,ptable_addr_t end,uint8_t level,bool got_attrs,uint64_t * attrs)761 static bool mm_ptable_get_attrs_level(struct mm_page_table *table,
762 				      ptable_addr_t begin, ptable_addr_t end,
763 				      uint8_t level, bool got_attrs,
764 				      uint64_t *attrs)
765 {
766 	pte_t *pte = &table->entries[mm_index(begin, level)];
767 	ptable_addr_t level_end = mm_level_end(begin, level);
768 	size_t entry_size = mm_entry_size(level);
769 
770 	/* Cap end so that we don't go over the current level max. */
771 	if (end > level_end) {
772 		end = level_end;
773 	}
774 
775 	/* Check that each entry is owned. */
776 	while (begin < end) {
777 		if (arch_mm_pte_is_table(*pte, level)) {
778 			if (!mm_ptable_get_attrs_level(
779 				    mm_page_table_from_pa(
780 					    arch_mm_table_from_pte(*pte,
781 								   level)),
782 				    begin, end, level - 1, got_attrs, attrs)) {
783 				return false;
784 			}
785 			got_attrs = true;
786 		} else {
787 			if (!got_attrs) {
788 				*attrs = arch_mm_pte_attrs(*pte, level);
789 				got_attrs = true;
790 			} else if (arch_mm_pte_attrs(*pte, level) != *attrs) {
791 				return false;
792 			}
793 		}
794 
795 		begin = mm_start_of_next_block(begin, entry_size);
796 		pte++;
797 	}
798 
799 	/* The entry is a valid block. */
800 	return got_attrs;
801 }
802 
803 /**
804  * Gets the attributes applied to the given range of addresses in the page
805  * tables.
806  *
807  * The value returned in `attrs` is only valid if the function returns true.
808  *
809  * Returns true if the whole range has the same attributes and false otherwise.
810  */
mm_get_attrs(struct mm_ptable * t,ptable_addr_t begin,ptable_addr_t end,uint64_t * attrs,int flags)811 static bool mm_get_attrs(struct mm_ptable *t, ptable_addr_t begin,
812 			 ptable_addr_t end, uint64_t *attrs, int flags)
813 {
814 	uint8_t max_level = mm_max_level(flags);
815 	uint8_t root_level = max_level + 1;
816 	size_t root_table_size = mm_entry_size(root_level);
817 	ptable_addr_t ptable_end =
818 		mm_root_table_count(flags) * mm_entry_size(root_level);
819 	struct mm_page_table *table;
820 	bool got_attrs = false;
821 
822 	begin = mm_round_down_to_page(begin);
823 	end = mm_round_up_to_page(end);
824 
825 	/* Fail if the addresses are out of range. */
826 	if (end > ptable_end) {
827 		return false;
828 	}
829 
830 	table = &mm_page_table_from_pa(t->root)[mm_index(begin, root_level)];
831 	while (begin < end) {
832 		if (!mm_ptable_get_attrs_level(table, begin, end, max_level,
833 					       got_attrs, attrs)) {
834 			return false;
835 		}
836 
837 		got_attrs = true;
838 		begin = mm_start_of_next_block(begin, root_table_size);
839 		table++;
840 	}
841 
842 	return got_attrs;
843 }
844 
mm_vm_init(struct mm_ptable * t,uint16_t id,struct mpool * ppool)845 bool mm_vm_init(struct mm_ptable *t, uint16_t id, struct mpool *ppool)
846 {
847 	return mm_ptable_init(t, id, 0, ppool);
848 }
849 
mm_vm_fini(struct mm_ptable * t,struct mpool * ppool)850 void mm_vm_fini(struct mm_ptable *t, struct mpool *ppool)
851 {
852 	mm_ptable_fini(t, 0, ppool);
853 }
854 
855 /**
856  * Selects flags to pass to the page table manipulation operation based on the
857  * mapping mode.
858  */
mm_mode_to_flags(uint32_t mode)859 static int mm_mode_to_flags(uint32_t mode)
860 {
861 	if ((mode & MM_MODE_UNMAPPED_MASK) == MM_MODE_UNMAPPED_MASK) {
862 		return MM_FLAG_UNMAP;
863 	}
864 
865 	return 0;
866 }
867 
868 /**
869  * See `mm_ptable_identity_prepare`.
870  *
871  * This must be called before `mm_identity_commit` for the same mapping.
872  *
873  * Returns true on success, or false if the update would fail.
874  */
mm_identity_prepare(struct mm_ptable * t,paddr_t begin,paddr_t end,uint32_t mode,struct mpool * ppool)875 bool mm_identity_prepare(struct mm_ptable *t, paddr_t begin, paddr_t end,
876 			 uint32_t mode, struct mpool *ppool)
877 {
878 	int flags = MM_FLAG_STAGE1 | mm_mode_to_flags(mode);
879 
880 	return mm_ptable_identity_prepare(t, begin, end,
881 					  arch_mm_mode_to_stage1_attrs(mode),
882 					  flags, ppool);
883 }
884 
885 /**
886  * See `mm_ptable_identity_commit`.
887  *
888  * `mm_identity_prepare` must be called before this for the same mapping.
889  */
mm_identity_commit(struct mm_ptable * t,paddr_t begin,paddr_t end,uint32_t mode,struct mpool * ppool)890 void *mm_identity_commit(struct mm_ptable *t, paddr_t begin, paddr_t end,
891 			 uint32_t mode, struct mpool *ppool)
892 {
893 	int flags = MM_FLAG_STAGE1 | mm_mode_to_flags(mode);
894 
895 	mm_ptable_identity_commit(t, begin, end,
896 				  arch_mm_mode_to_stage1_attrs(mode), flags,
897 				  ppool);
898 	return ptr_from_va(va_from_pa(begin));
899 }
900 
901 /**
902  * See `mm_ptable_identity_prepare`.
903  *
904  * This must be called before `mm_vm_identity_commit` for the same mapping.
905  *
906  * Returns true on success, or false if the update would fail.
907  */
mm_vm_identity_prepare(struct mm_ptable * t,paddr_t begin,paddr_t end,uint32_t mode,struct mpool * ppool)908 bool mm_vm_identity_prepare(struct mm_ptable *t, paddr_t begin, paddr_t end,
909 			    uint32_t mode, struct mpool *ppool)
910 {
911 	int flags = mm_mode_to_flags(mode);
912 
913 	return mm_ptable_identity_prepare(t, begin, end,
914 					  arch_mm_mode_to_stage2_attrs(mode),
915 					  flags, ppool);
916 }
917 
918 /**
919  * See `mm_ptable_identity_commit`.
920  *
921  * `mm_vm_identity_prepare` must be called before this for the same mapping.
922  */
mm_vm_identity_commit(struct mm_ptable * t,paddr_t begin,paddr_t end,uint32_t mode,struct mpool * ppool,ipaddr_t * ipa)923 void mm_vm_identity_commit(struct mm_ptable *t, paddr_t begin, paddr_t end,
924 			   uint32_t mode, struct mpool *ppool, ipaddr_t *ipa)
925 {
926 	int flags = mm_mode_to_flags(mode);
927 
928 	mm_ptable_identity_commit(t, begin, end,
929 				  arch_mm_mode_to_stage2_attrs(mode), flags,
930 				  ppool);
931 
932 	if (ipa != NULL) {
933 		*ipa = ipa_from_pa(begin);
934 	}
935 }
936 
937 /**
938  * Updates a VM's page table such that the given physical address range is
939  * mapped in the address space at the corresponding address range in the
940  * architecture-agnostic mode provided.
941  *
942  * mm_vm_defrag should always be called after a series of page table updates,
943  * whether they succeed or fail. This is because on failure extra page table
944  * entries may have been allocated and then not used, while on success it may be
945  * possible to compact the page table by merging several entries into a block.
946  *
947  * Returns true on success, or false if the update failed and no changes were
948  * made.
949  */
mm_vm_identity_map(struct mm_ptable * t,paddr_t begin,paddr_t end,uint32_t mode,struct mpool * ppool,ipaddr_t * ipa)950 bool mm_vm_identity_map(struct mm_ptable *t, paddr_t begin, paddr_t end,
951 			uint32_t mode, struct mpool *ppool, ipaddr_t *ipa)
952 {
953 	int flags = mm_mode_to_flags(mode);
954 	bool success = mm_ptable_identity_update(
955 		t, begin, end, arch_mm_mode_to_stage2_attrs(mode), flags,
956 		ppool);
957 
958 	if (success && ipa != NULL) {
959 		*ipa = ipa_from_pa(begin);
960 	}
961 
962 	return success;
963 }
964 
965 /**
966  * Updates the VM's table such that the given physical address range has no
967  * connection to the VM.
968  */
mm_vm_unmap(struct mm_ptable * t,paddr_t begin,paddr_t end,struct mpool * ppool)969 bool mm_vm_unmap(struct mm_ptable *t, paddr_t begin, paddr_t end,
970 		 struct mpool *ppool)
971 {
972 	uint32_t mode = MM_MODE_UNMAPPED_MASK;
973 
974 	return mm_vm_identity_map(t, begin, end, mode, ppool, NULL);
975 }
976 
977 /**
978  * Write the given page table of a VM to the debug log.
979  */
mm_vm_dump(struct mm_ptable * t)980 void mm_vm_dump(struct mm_ptable *t)
981 {
982 	mm_ptable_dump(t, 0);
983 }
984 
985 /**
986  * Defragments a stage1 page table.
987  */
mm_stage1_defrag(struct mm_ptable * t,struct mpool * ppool)988 void mm_stage1_defrag(struct mm_ptable *t, struct mpool *ppool)
989 {
990 	mm_ptable_defrag(t, MM_FLAG_STAGE1, ppool, false);
991 }
992 
993 /**
994  * Defragments the VM page table.
995  */
mm_vm_defrag(struct mm_ptable * t,struct mpool * ppool,bool non_secure)996 void mm_vm_defrag(struct mm_ptable *t, struct mpool *ppool, bool non_secure)
997 {
998 	mm_ptable_defrag(t, 0, ppool, non_secure);
999 }
1000 
1001 /**
1002  * Gets the mode of the given range of intermediate physical addresses if they
1003  * are mapped with the same mode.
1004  *
1005  * Returns true if the range is mapped with the same mode and false otherwise.
1006  */
mm_vm_get_mode(struct mm_ptable * t,ipaddr_t begin,ipaddr_t end,uint32_t * mode)1007 bool mm_vm_get_mode(struct mm_ptable *t, ipaddr_t begin, ipaddr_t end,
1008 		    uint32_t *mode)
1009 {
1010 	uint64_t attrs;
1011 	bool ret;
1012 
1013 	ret = mm_get_attrs(t, ipa_addr(begin), ipa_addr(end), &attrs, 0);
1014 	if (ret) {
1015 		*mode = arch_mm_stage2_attrs_to_mode(attrs);
1016 	}
1017 
1018 	return ret;
1019 }
1020 
1021 /**
1022  * Gets the mode of the given range of virtual addresses if they
1023  * are mapped with the same mode.
1024  *
1025  * Returns true if the range is mapped with the same mode and false otherwise.
1026  */
mm_get_mode(struct mm_ptable * t,vaddr_t begin,vaddr_t end,uint32_t * mode)1027 bool mm_get_mode(struct mm_ptable *t, vaddr_t begin, vaddr_t end,
1028 		 uint32_t *mode)
1029 {
1030 	uint64_t attrs;
1031 	bool ret;
1032 
1033 	ret = mm_get_attrs(t, va_addr(begin), va_addr(end), &attrs,
1034 			   MM_FLAG_STAGE1);
1035 	if (ret) {
1036 		*mode = arch_mm_stage1_attrs_to_mode(attrs);
1037 	}
1038 
1039 	return ret;
1040 }
1041 
mm_stage1_lock_unsafe(void)1042 static struct mm_stage1_locked mm_stage1_lock_unsafe(void)
1043 {
1044 	return (struct mm_stage1_locked){.ptable = &ptable};
1045 }
1046 
mm_lock_ptable_unsafe(struct mm_ptable * ptable)1047 struct mm_stage1_locked mm_lock_ptable_unsafe(struct mm_ptable *ptable)
1048 {
1049 	return (struct mm_stage1_locked){.ptable = ptable};
1050 }
1051 
mm_lock_stage1(void)1052 struct mm_stage1_locked mm_lock_stage1(void)
1053 {
1054 	sl_lock(&ptable_lock);
1055 	return mm_stage1_lock_unsafe();
1056 }
1057 
mm_unlock_stage1(struct mm_stage1_locked * lock)1058 void mm_unlock_stage1(struct mm_stage1_locked *lock)
1059 {
1060 	CHECK(lock->ptable == &ptable);
1061 	sl_unlock(&ptable_lock);
1062 	lock->ptable = NULL;
1063 }
1064 
1065 /**
1066  * Updates the hypervisor page table such that the given physical address range
1067  * is mapped into the address space at the corresponding address range in the
1068  * architecture-agnostic mode provided.
1069  */
mm_identity_map(struct mm_stage1_locked stage1_locked,paddr_t begin,paddr_t end,uint32_t mode,struct mpool * ppool)1070 void *mm_identity_map(struct mm_stage1_locked stage1_locked, paddr_t begin,
1071 		      paddr_t end, uint32_t mode, struct mpool *ppool)
1072 {
1073 	int flags = MM_FLAG_STAGE1 | mm_mode_to_flags(mode);
1074 
1075 	if (mm_ptable_identity_update(stage1_locked.ptable, begin, end,
1076 				      arch_mm_mode_to_stage1_attrs(mode), flags,
1077 				      ppool)) {
1078 		return ptr_from_va(va_from_pa(begin));
1079 	}
1080 
1081 	return NULL;
1082 }
1083 
1084 /**
1085  * Updates the hypervisor table such that the given physical address range is
1086  * not mapped in the address space.
1087  */
mm_unmap(struct mm_stage1_locked stage1_locked,paddr_t begin,paddr_t end,struct mpool * ppool)1088 bool mm_unmap(struct mm_stage1_locked stage1_locked, paddr_t begin, paddr_t end,
1089 	      struct mpool *ppool)
1090 {
1091 	uint32_t mode = MM_MODE_UNMAPPED_MASK;
1092 
1093 	return mm_identity_map(stage1_locked, begin, end, mode, ppool);
1094 }
1095 
1096 /**
1097  * Defragments the hypervisor page table.
1098  */
mm_defrag(struct mm_stage1_locked stage1_locked,struct mpool * ppool)1099 void mm_defrag(struct mm_stage1_locked stage1_locked, struct mpool *ppool)
1100 {
1101 	mm_ptable_defrag(stage1_locked.ptable, MM_FLAG_STAGE1, ppool, false);
1102 }
1103 
1104 /**
1105  * Initialises memory management for the hypervisor itself.
1106  */
mm_init(struct mpool * ppool)1107 bool mm_init(struct mpool *ppool)
1108 {
1109 	/* Locking is not enabled yet so fake it, */
1110 	struct mm_stage1_locked stage1_locked = mm_stage1_lock_unsafe();
1111 
1112 	dlog_info("text: %#lx - %#lx\n", pa_addr(layout_text_begin()),
1113 		  pa_addr(layout_text_end()));
1114 	dlog_info("rodata: %#lx - %#lx\n", pa_addr(layout_rodata_begin()),
1115 		  pa_addr(layout_rodata_end()));
1116 	dlog_info("data: %#lx - %#lx\n", pa_addr(layout_data_begin()),
1117 		  pa_addr(layout_data_end()));
1118 	dlog_info("stacks: %#lx - %#lx\n", pa_addr(layout_stacks_begin()),
1119 		  pa_addr(layout_stacks_end()));
1120 
1121 	/* ASID 0 is reserved for use by the hypervisor. */
1122 	if (!mm_ptable_init(&ptable, 0, MM_FLAG_STAGE1, ppool)) {
1123 		dlog_error("Unable to allocate memory for page table.\n");
1124 		return false;
1125 	}
1126 
1127 	/* Initialize arch_mm before calling below mapping routines */
1128 	if (!arch_mm_init(ptable.root)) {
1129 		return false;
1130 	}
1131 
1132 	/* Let console driver map pages for itself. */
1133 	plat_console_mm_init(stage1_locked, ppool);
1134 
1135 	/* Map each section. */
1136 	CHECK(mm_identity_map(stage1_locked, layout_text_begin(),
1137 			      layout_text_end(), MM_MODE_X, ppool) != NULL);
1138 
1139 	CHECK(mm_identity_map(stage1_locked, layout_rodata_begin(),
1140 			      layout_rodata_end(), MM_MODE_R, ppool) != NULL);
1141 
1142 	CHECK(mm_identity_map(stage1_locked, layout_data_begin(),
1143 			      layout_data_end(), MM_MODE_R | MM_MODE_W,
1144 			      ppool) != NULL);
1145 
1146 	/* Arch-specific stack mapping. */
1147 	CHECK(arch_stack_mm_init(stage1_locked, ppool));
1148 
1149 	return true;
1150 }
1151