1 /*
2  * Copyright 2018 The Hafnium Authors.
3  *
4  * Use of this source code is governed by a BSD-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/BSD-3-Clause.
7  */
8 
9 #include "hf/mm.h"
10 
11 #include "hf/arch/barriers.h"
12 #include "hf/arch/cpu.h"
13 #include "hf/arch/mmu.h"
14 
15 #include "hf/check.h"
16 #include "hf/dlog.h"
17 
18 #include "msr.h"
19 #include "sysregs.h"
20 
21 /* Keep macro alignment */
22 /* clang-format off */
23 
24 #define NON_SHAREABLE   UINT64_C(0)
25 #define OUTER_SHAREABLE UINT64_C(2)
26 #define INNER_SHAREABLE UINT64_C(3)
27 
28 #define PTE_VALID        (UINT64_C(1) << 0)
29 #define PTE_LEVEL0_BLOCK (UINT64_C(1) << 1)
30 #define PTE_TABLE        (UINT64_C(1) << 1)
31 
32 #define STAGE1_XN          (UINT64_C(1) << 54)
33 #define STAGE1_UXN         (UINT64_C(1) << 54)
34 #define STAGE1_PXN         (UINT64_C(1) << 53)
35 #define STAGE1_CONTIGUOUS  (UINT64_C(1) << 52)
36 #define STAGE1_DBM         (UINT64_C(1) << 51)
37 #define STAGE1_GP          (UINT64_C(1) << 50)
38 #define STAGE1_NG          (UINT64_C(1) << 11)
39 #define STAGE1_AF          (UINT64_C(1) << 10)
40 #define STAGE1_SH(x)       ((x) << 8)
41 #define STAGE1_AP2         (UINT64_C(1) << 7)
42 #define STAGE1_AP1         (UINT64_C(1) << 6)
43 #define STAGE1_AP(x)       ((x) << 6)
44 #define STAGE1_NS          (UINT64_C(1) << 5)
45 #define STAGE1_ATTRINDX(x) ((x) << 2)
46 
47 #define STAGE1_READONLY  UINT64_C(2)
48 #define STAGE1_READWRITE UINT64_C(0)
49 #define STAGE1_AP_USER_RW UINT64_C(1)
50 
51 #define STAGE1_DEVICEINDX UINT64_C(0)
52 #define STAGE1_NORMALINDX UINT64_C(1)
53 #define STAGE1_STACKINDX UINT64_C(2)
54 
55 #define STAGE2_XN(x)      ((x) << 53)
56 #define STAGE2_CONTIGUOUS (UINT64_C(1) << 52)
57 #define STAGE2_DBM        (UINT64_C(1) << 51)
58 #define STAGE2_AF         (UINT64_C(1) << 10)
59 #define STAGE2_SH(x)      ((x) << 8)
60 #define STAGE2_S2AP(x)    ((x) << 6)
61 
62 #define STAGE2_EXECUTE_ALL  UINT64_C(0)
63 #define STAGE2_EXECUTE_EL0  UINT64_C(1)
64 #define STAGE2_EXECUTE_NONE UINT64_C(2)
65 #define STAGE2_EXECUTE_EL1  UINT64_C(3)
66 #define STAGE2_EXECUTE_MASK UINT64_C(3)
67 
68 /* Table attributes only apply to stage 1 translations. */
69 #define TABLE_NSTABLE  (UINT64_C(1) << 63)
70 #define TABLE_APTABLE1 (UINT64_C(1) << 62)
71 #define TABLE_APTABLE0 (UINT64_C(1) << 61)
72 #define TABLE_XNTABLE  (UINT64_C(1) << 60)
73 #define TABLE_PXNTABLE (UINT64_C(1) << 59)
74 
75 /* The following are stage-1 software defined attributes. */
76 #define STAGE1_SW_OWNED     (UINT64_C(1) << 55)
77 #define STAGE1_SW_EXCLUSIVE (UINT64_C(1) << 56)
78 
79 /* The following are stage-2 software defined attributes. */
80 #define STAGE2_SW_OWNED     (UINT64_C(1) << 55)
81 #define STAGE2_SW_EXCLUSIVE (UINT64_C(1) << 56)
82 
83 /* The following are stage-2 memory attributes for normal memory. */
84 #define STAGE2_DEVICE_MEMORY UINT64_C(0)
85 #define STAGE2_NONCACHEABLE  UINT64_C(1)
86 #define STAGE2_WRITETHROUGH  UINT64_C(2)
87 #define STAGE2_WRITEBACK     UINT64_C(3)
88 
89 /* The following are stage-2 memory attributes for device memory. */
90 #define STAGE2_MEMATTR_DEVICE_nGnRnE UINT64_C(0)
91 #define STAGE2_MEMATTR_DEVICE_nGnRE  UINT64_C(1)
92 #define STAGE2_MEMATTR_DEVICE_nGRE   UINT64_C(2)
93 #define STAGE2_MEMATTR_DEVICE_GRE    UINT64_C(3)
94 
95 /* The following construct and destruct stage-2 memory attributes. */
96 #define STAGE2_MEMATTR(outer, inner) ((((outer) << 2) | (inner)) << 2)
97 #define STAGE2_MEMATTR_TYPE_MASK (UINT64_C(3) << 4)
98 
99 #define STAGE2_ACCESS_READ  UINT64_C(1)
100 #define STAGE2_ACCESS_WRITE UINT64_C(2)
101 
102 #define CACHE_WORD_SIZE 4
103 
104 /**
105  * Threshold number of pages in TLB to invalidate after which we invalidate all
106  * TLB entries on a given level.
107  * Constant is the number of pointers per page table entry, also used by Linux.
108  */
109 #define MAX_TLBI_OPS  MM_PTE_PER_PAGE
110 
111 /* clang-format on */
112 
113 #define tlbi(op)                               \
114 	do {                                   \
115 		__asm__ volatile("tlbi " #op); \
116 	} while (0)
117 #define tlbi_reg(op, reg)                                              \
118 	do {                                                           \
119 		__asm__ __volatile__("tlbi " #op ", %0" : : "r"(reg)); \
120 	} while (0)
121 
122 /** Mask for the address bits of the pte. */
123 #define PTE_ADDR_MASK \
124 	(((UINT64_C(1) << 48) - 1) & ~((UINT64_C(1) << PAGE_BITS) - 1))
125 
126 /** Mask for the attribute bits of the pte. */
127 #define PTE_ATTR_MASK (~(PTE_ADDR_MASK | (UINT64_C(1) << 1)))
128 
129 /**
130  * Configuration information for memory management. Order is important as this
131  * is read from assembly.
132  *
133  * It must only be written to from `arch_mm_init()` to avoid cache and
134  * synchronization problems.
135  */
136 struct arch_mm_config {
137 	uintreg_t ttbr0_el2;
138 	uintreg_t mair_el2;
139 	uintreg_t tcr_el2;
140 	uintreg_t sctlr_el2;
141 	uintreg_t hcr_el2;
142 	uintreg_t vtcr_el2;
143 	uintreg_t vstcr_el2;
144 } arch_mm_config;
145 
146 static uint8_t mm_s1_max_level;
147 static uint8_t mm_s2_max_level;
148 static uint8_t mm_s2_root_table_count;
149 
150 /**
151  * Returns the encoding of a page table entry that isn't present.
152  */
arch_mm_absent_pte(uint8_t level)153 pte_t arch_mm_absent_pte(uint8_t level)
154 {
155 	(void)level;
156 	return 0;
157 }
158 
159 /**
160  * Converts a physical address to a table PTE.
161  *
162  * The spec says that 'Table descriptors for stage 2 translations do not
163  * include any attribute field', so we don't take any attributes as arguments.
164  */
arch_mm_table_pte(uint8_t level,paddr_t pa)165 pte_t arch_mm_table_pte(uint8_t level, paddr_t pa)
166 {
167 	/* This is the same for all levels on aarch64. */
168 	(void)level;
169 	return pa_addr(pa) | PTE_TABLE | PTE_VALID;
170 }
171 
172 /**
173  * Converts a physical address to a block PTE.
174  *
175  * The level must allow block entries.
176  */
arch_mm_block_pte(uint8_t level,paddr_t pa,uint64_t attrs)177 pte_t arch_mm_block_pte(uint8_t level, paddr_t pa, uint64_t attrs)
178 {
179 	pte_t pte = pa_addr(pa) | attrs;
180 
181 	if (level == 0) {
182 		/* A level 0 'block' is actually a page entry. */
183 		pte |= PTE_LEVEL0_BLOCK;
184 	}
185 	return pte;
186 }
187 
188 /**
189  * Specifies whether block mappings are acceptable at the given level.
190  *
191  * Level 0 must allow block entries.
192  */
arch_mm_is_block_allowed(uint8_t level)193 bool arch_mm_is_block_allowed(uint8_t level)
194 {
195 	return level <= 2;
196 }
197 
198 /**
199  * Determines if the given pte is present, i.e., if it is valid or it is invalid
200  * but still holds state about the memory so needs to be present in the table.
201  */
arch_mm_pte_is_present(pte_t pte,uint8_t level)202 bool arch_mm_pte_is_present(pte_t pte, uint8_t level)
203 {
204 	return arch_mm_pte_is_valid(pte, level) || (pte & STAGE2_SW_OWNED) != 0;
205 }
206 
207 /**
208  * Determines if the given pte is valid, i.e., if it points to another table,
209  * to a page, or a block of pages that can be accessed.
210  */
arch_mm_pte_is_valid(pte_t pte,uint8_t level)211 bool arch_mm_pte_is_valid(pte_t pte, uint8_t level)
212 {
213 	(void)level;
214 	return (pte & PTE_VALID) != 0;
215 }
216 
217 /**
218  * Determines if the given pte references a block of pages.
219  */
arch_mm_pte_is_block(pte_t pte,uint8_t level)220 bool arch_mm_pte_is_block(pte_t pte, uint8_t level)
221 {
222 	/* We count pages at level 0 as blocks. */
223 	return arch_mm_is_block_allowed(level) &&
224 	       (level == 0 ? (pte & PTE_LEVEL0_BLOCK) != 0
225 			   : arch_mm_pte_is_present(pte, level) &&
226 				     !arch_mm_pte_is_table(pte, level));
227 }
228 
229 /**
230  * Determines if the given pte references another table.
231  */
arch_mm_pte_is_table(pte_t pte,uint8_t level)232 bool arch_mm_pte_is_table(pte_t pte, uint8_t level)
233 {
234 	return level != 0 && arch_mm_pte_is_valid(pte, level) &&
235 	       (pte & PTE_TABLE) != 0;
236 }
237 
pte_addr(pte_t pte)238 static uint64_t pte_addr(pte_t pte)
239 {
240 	return pte & PTE_ADDR_MASK;
241 }
242 
243 /**
244  * Clears the given physical address, i.e., clears the bits of the address that
245  * are not used in the pte.
246  */
arch_mm_clear_pa(paddr_t pa)247 paddr_t arch_mm_clear_pa(paddr_t pa)
248 {
249 	return pa_init(pte_addr(pa_addr(pa)));
250 }
251 
252 /**
253  * Extracts the physical address of the block referred to by the given page
254  * table entry.
255  */
arch_mm_block_from_pte(pte_t pte,uint8_t level)256 paddr_t arch_mm_block_from_pte(pte_t pte, uint8_t level)
257 {
258 	(void)level;
259 	return pa_init(pte_addr(pte));
260 }
261 
262 /**
263  * Extracts the physical address of the page table referred to by the given page
264  * table entry.
265  */
arch_mm_table_from_pte(pte_t pte,uint8_t level)266 paddr_t arch_mm_table_from_pte(pte_t pte, uint8_t level)
267 {
268 	(void)level;
269 	return pa_init(pte_addr(pte));
270 }
271 
272 /**
273  * Extracts the architecture-specific attributes applies to the given page table
274  * entry.
275  */
arch_mm_pte_attrs(pte_t pte,uint8_t level)276 uint64_t arch_mm_pte_attrs(pte_t pte, uint8_t level)
277 {
278 	(void)level;
279 	return pte & PTE_ATTR_MASK;
280 }
281 
282 /**
283  * Execute any barriers or synchronization that is required
284  * by a given architecture, after page table writes.
285  */
arch_mm_sync_table_writes(void)286 void arch_mm_sync_table_writes(void)
287 {
288 	/*
289 	 * Ensure visibility of table updates to translation table walks.
290 	 */
291 	dsb(ish);
292 }
293 
294 /**
295  * Invalidates stage-1 TLB entries referring to the given virtual address range.
296  */
arch_mm_invalidate_stage1_range(uint16_t asid,vaddr_t va_begin,vaddr_t va_end)297 void arch_mm_invalidate_stage1_range(uint16_t asid, vaddr_t va_begin,
298 				     vaddr_t va_end)
299 {
300 	uintvaddr_t begin = va_addr(va_begin);
301 	uintvaddr_t end = va_addr(va_end);
302 	uintvaddr_t it;
303 
304 	/* Sync with page table updates. */
305 	arch_mm_sync_table_writes();
306 
307 	/*
308 	 * Revisions prior to Armv8.4 do not support invalidating a range of
309 	 * addresses, which means we have to loop over individual pages. If
310 	 * there are too many, it is quicker to invalidate all TLB entries.
311 	 */
312 	if ((end - begin) > (MAX_TLBI_OPS * PAGE_SIZE)) {
313 		if (VM_TOOLCHAIN == 1) {
314 			tlbi(vmalle1is);
315 		} else {
316 			tlbi(alle2is);
317 		}
318 	} else {
319 		begin >>= 12;
320 		end >>= 12;
321 		/* Invalidate stage-1 TLB, one page from the range at a time. */
322 		for (it = begin; it < end;
323 		     it += (UINT64_C(1) << (PAGE_BITS - 12))) {
324 			/*
325 			 * Mask upper 8 bits of asid passed in. Hafnium on
326 			 * aarch64 currently only uses 8 bit asids.TCR_EL2.AS is
327 			 * set to 0 on implementations which support 16 bit
328 			 * asids and is res0 on implementations that dont
329 			 * support 16 bit asids.
330 			 */
331 			asid &= 0xff;
332 			it |= (uint64_t)asid << 48;
333 			if (VM_TOOLCHAIN == 1) {
334 				tlbi_reg(vae1is, it);
335 			} else {
336 				tlbi_reg(vae2is, it);
337 			}
338 		}
339 	}
340 
341 	/* Sync data accesses with TLB invalidation completion. */
342 	dsb(ish);
343 
344 	/* Sync instruction fetches with TLB invalidation completion. */
345 	isb();
346 }
347 
348 /**
349  * Invalidates stage-2 TLB entries referring to the given intermediate physical
350  * address range.
351  */
arch_mm_invalidate_stage2_range(uint16_t vmid,ipaddr_t va_begin,ipaddr_t va_end)352 void arch_mm_invalidate_stage2_range(uint16_t vmid, ipaddr_t va_begin,
353 				     ipaddr_t va_end)
354 {
355 	uintpaddr_t begin = ipa_addr(va_begin);
356 	uintpaddr_t end = ipa_addr(va_end);
357 	uintpaddr_t it;
358 
359 	(void)vmid;
360 
361 	/* TODO: This only applies to the current VMID. */
362 
363 	/* Sync with page table updates. */
364 	arch_mm_sync_table_writes();
365 
366 	/*
367 	 * Switch to guest mode when VHE is enabled. This ensures that the TLB
368 	 * invalidates apply to the current VMID as opposed to the EL2&0
369 	 * translation regime. Note that in the following code snippet, only
370 	 * tlbi vmalle1is is affected by HCR_EL2.TGE bit. Bracketing all of the
371 	 * invalidate code inside guest mode will ensure changing any code below
372 	 * will apply to the guest VM as opposed to EL2&0 translation regime.
373 	 */
374 	vhe_switch_to_host_or_guest(true);
375 
376 	/*
377 	 * Revisions prior to Armv8.4 do not support invalidating a range of
378 	 * addresses, which means we have to loop over individual pages. If
379 	 * there are too many, it is quicker to invalidate all TLB entries.
380 	 */
381 	if ((end - begin) > (MAX_TLBI_OPS * PAGE_SIZE)) {
382 		/*
383 		 * Invalidate all stage-1 and stage-2 entries of the TLB for
384 		 * the current VMID.
385 		 */
386 		tlbi(vmalls12e1is);
387 	} else {
388 		begin >>= 12;
389 		end >>= 12;
390 
391 		/*
392 		 * Invalidate stage-2 TLB, one page from the range at a time.
393 		 * Note that this has no effect if the CPU has a TLB with
394 		 * combined stage-1/stage-2 translation.
395 		 */
396 		for (it = begin; it < end;
397 		     it += (UINT64_C(1) << (PAGE_BITS - 12))) {
398 			tlbi_reg(ipas2e1is, it);
399 		}
400 
401 		/*
402 		 * Ensure completion of stage-2 invalidation in case a page
403 		 * table walk on another CPU refilled the TLB with a complete
404 		 * stage-1 + stage-2 walk based on the old stage-2 mapping.
405 		 */
406 		dsb(ish);
407 
408 		/*
409 		 * Invalidate all stage-1 TLB entries. If the CPU has a combined
410 		 * TLB for stage-1 and stage-2, this will invalidate stage-2 as
411 		 * well.
412 		 */
413 		tlbi(vmalle1is);
414 	}
415 
416 	/* Sync data accesses with TLB invalidation completion. */
417 	dsb(ish);
418 
419 	/* Sync instruction fetches with TLB invalidation completion. */
420 	isb();
421 
422 	vhe_switch_to_host_or_guest(false);
423 }
424 
425 /**
426  * Returns the smallest cache line size of all the caches for this core.
427  */
arch_mm_dcache_line_size(void)428 static uint16_t arch_mm_dcache_line_size(void)
429 {
430 	return CACHE_WORD_SIZE *
431 	       (UINT16_C(1) << ((read_msr(CTR_EL0) >> 16) & 0xf));
432 }
433 
arch_mm_flush_dcache(void * base,size_t size)434 void arch_mm_flush_dcache(void *base, size_t size)
435 {
436 	/* Clean and invalidate each data cache line in the range. */
437 	uint16_t line_size = arch_mm_dcache_line_size();
438 	uintptr_t line_begin = (uintptr_t)base & ~(line_size - 1);
439 	uintptr_t end = (uintptr_t)base + size;
440 
441 	while (line_begin < end) {
442 		__asm__ volatile("dc civac, %0" : : "r"(line_begin));
443 		line_begin += line_size;
444 	}
445 	dsb(sy);
446 }
447 
arch_mm_mode_to_stage1_attrs(uint32_t mode)448 uint64_t arch_mm_mode_to_stage1_attrs(uint32_t mode)
449 {
450 	uint64_t attrs = 0;
451 
452 	attrs |= STAGE1_AF | STAGE1_SH(INNER_SHAREABLE);
453 
454 #if SECURE_WORLD == 1
455 
456 	/**
457 	 * Define the non-secure bit.
458 	 * At NS-EL2 the Stage-1 MMU NS bit is RES0. At S-EL1/2, this bit
459 	 * defines the Stage-1 security attribute for the block or page.
460 	 */
461 	if (mode & MM_MODE_NS) {
462 		attrs |= STAGE1_NS;
463 	}
464 
465 #endif
466 	/*
467 	 * STAGE1_XN can be XN or UXN depending on if the EL2
468 	 * translation regime uses one VA range or two VA ranges(VHE).
469 	 * PXN is res0 when the translation regime does not support two
470 	 * VA ranges.
471 	 */
472 	if (mode & MM_MODE_X) {
473 		if (has_vhe_support()) {
474 			attrs |=
475 				(mode & MM_MODE_USER) ? STAGE1_PXN : STAGE1_UXN;
476 		}
477 
478 #if BRANCH_PROTECTION
479 		/* Mark code pages as Guarded Pages if BTI is supported. */
480 		if (is_arch_feat_bti_supported()) {
481 			attrs |= STAGE1_GP;
482 		}
483 #endif
484 	} else {
485 		if (has_vhe_support()) {
486 			attrs |= (STAGE1_UXN | STAGE1_PXN);
487 		} else {
488 			attrs |= STAGE1_XN;
489 		}
490 	}
491 
492 	/* Define the read/write bits. */
493 	if (mode & MM_MODE_W) {
494 		attrs |= STAGE1_AP(STAGE1_READWRITE);
495 	} else {
496 		attrs |= STAGE1_AP(STAGE1_READONLY);
497 	}
498 
499 	if (has_vhe_support()) {
500 		attrs |= (mode & MM_MODE_USER) ? STAGE1_AP(STAGE1_AP_USER_RW)
501 					       : 0;
502 		if (mode & MM_MODE_NG) {
503 			attrs |= STAGE1_NG;
504 		}
505 	}
506 
507 	/* Define the memory attribute bits. */
508 	if (mode & MM_MODE_D) {
509 		attrs |= STAGE1_ATTRINDX(STAGE1_DEVICEINDX);
510 	} else if (mode & MM_MODE_T) {
511 		attrs |= STAGE1_ATTRINDX(STAGE1_STACKINDX);
512 	} else {
513 		attrs |= STAGE1_ATTRINDX(STAGE1_NORMALINDX);
514 	}
515 
516 	/* Define the ownership bit. */
517 	if (!(mode & MM_MODE_UNOWNED)) {
518 		attrs |= STAGE1_SW_OWNED;
519 	}
520 
521 	/* Define the exclusivity bit. */
522 	if (!(mode & MM_MODE_SHARED)) {
523 		attrs |= STAGE1_SW_EXCLUSIVE;
524 	}
525 
526 	/* Define the valid bit. */
527 	if (!(mode & MM_MODE_INVALID)) {
528 		attrs |= PTE_VALID;
529 	}
530 
531 	return attrs;
532 }
533 
arch_mm_stage1_attrs_to_mode(uint64_t attrs)534 uint32_t arch_mm_stage1_attrs_to_mode(uint64_t attrs)
535 {
536 	uint32_t mode = 0;
537 
538 #if SECURE_WORLD == 1
539 	if (attrs & STAGE1_NS) {
540 		mode |= MM_MODE_NS;
541 	}
542 #endif
543 
544 	if ((attrs & STAGE1_AP(STAGE1_READONLY)) ==
545 	    STAGE1_AP(STAGE1_READONLY)) {
546 		mode |= MM_MODE_R;
547 	} else {
548 		CHECK((attrs & STAGE1_AP(STAGE1_READWRITE)) ==
549 		      STAGE1_AP(STAGE1_READWRITE));
550 		mode |= MM_MODE_W | MM_MODE_R;
551 	}
552 
553 	if (has_vhe_support() && (attrs & STAGE1_AP(STAGE1_AP_USER_RW))) {
554 		mode |= MM_MODE_USER;
555 	}
556 
557 	if (!(attrs & STAGE1_XN) || !(attrs & STAGE1_PXN)) {
558 		mode |= MM_MODE_X;
559 	}
560 
561 	if (has_vhe_support() && (attrs & STAGE1_NG)) {
562 		mode |= MM_MODE_NG;
563 	}
564 
565 	if (!((attrs & STAGE1_ATTRINDX(STAGE1_NORMALINDX)) ==
566 	      STAGE1_ATTRINDX(STAGE1_NORMALINDX))) {
567 		mode |= MM_MODE_D;
568 	} else {
569 		CHECK((attrs & STAGE1_ATTRINDX(STAGE1_NORMALINDX)) ==
570 		      STAGE1_ATTRINDX(STAGE1_NORMALINDX));
571 	}
572 
573 	if (!(attrs & STAGE1_SW_OWNED)) {
574 		mode |= MM_MODE_UNOWNED;
575 	}
576 
577 	if (!(attrs & STAGE1_SW_EXCLUSIVE)) {
578 		mode |= MM_MODE_SHARED;
579 	}
580 
581 	if (!(attrs & PTE_VALID)) {
582 		mode |= MM_MODE_INVALID;
583 	}
584 
585 	return mode;
586 }
587 
arch_mm_mode_to_stage2_attrs(uint32_t mode)588 uint64_t arch_mm_mode_to_stage2_attrs(uint32_t mode)
589 {
590 	uint64_t attrs = 0;
591 	uint64_t access = 0;
592 
593 	/*
594 	 * Default shareability is inner shareable in stage 2 tables. Per
595 	 * table D5-45 of ARM ARM DDI0487G, Inner shareable attribute will
596 	 * pass through the stage 1 attribute of outer shareable and inner
597 	 * shareable, but NOT non-shareable. A stage 1 non-shareable attribute
598 	 * combined with stage 2 inner shareable, results in an inner shareable
599 	 * access. This is intentional, since a VCPU that marks a memory region
600 	 * as non-shareable in its stage 1 translation tables, can be migrated
601 	 * to a different PHYSICAL PE unless the VCPU is pinned to the PE.
602 	 * If stage 2 was marked as non-shareable below, the resulting accesses
603 	 * for a VCPU on a physical PE would be marked as non-shareable, and
604 	 * hence potentially not visible on another physical PE, which could
605 	 * cause coherency issues when the VCPU is migrated and expects its
606 	 * non-shareable accesses to be visible, but would read stale or invalid
607 	 * data. Note that for a access that results in device memory type, the
608 	 * shareability does not matter and is always treated as outer
609 	 * shareable.
610 	 */
611 	attrs |= STAGE2_AF | STAGE2_SH(INNER_SHAREABLE);
612 
613 	/* Define the read/write bits. */
614 	if (mode & MM_MODE_R) {
615 		access |= STAGE2_ACCESS_READ;
616 	}
617 
618 	if (mode & MM_MODE_W) {
619 		access |= STAGE2_ACCESS_WRITE;
620 	}
621 
622 	attrs |= STAGE2_S2AP(access);
623 
624 	/* Define the execute bits. */
625 	if (mode & MM_MODE_X) {
626 		attrs |= STAGE2_XN(STAGE2_EXECUTE_ALL);
627 	} else {
628 		attrs |= STAGE2_XN(STAGE2_EXECUTE_NONE);
629 	}
630 
631 	/*
632 	 * Define the memory attribute bits, using the "neutral" values which
633 	 * give the stage-1 attributes full control of the attributes.
634 	 */
635 	if (mode & MM_MODE_D) {
636 		attrs |= STAGE2_MEMATTR(STAGE2_DEVICE_MEMORY,
637 					STAGE2_MEMATTR_DEVICE_GRE);
638 	} else {
639 		attrs |= STAGE2_MEMATTR(STAGE2_WRITEBACK, STAGE2_WRITEBACK);
640 	}
641 
642 	/* Define the ownership bit. */
643 	if (!(mode & MM_MODE_UNOWNED)) {
644 		attrs |= STAGE2_SW_OWNED;
645 	}
646 
647 	/* Define the exclusivity bit. */
648 	if (!(mode & MM_MODE_SHARED)) {
649 		attrs |= STAGE2_SW_EXCLUSIVE;
650 	}
651 
652 	/* Define the valid bit. */
653 	if (!(mode & MM_MODE_INVALID)) {
654 		attrs |= PTE_VALID;
655 	}
656 
657 	return attrs;
658 }
659 
arch_mm_stage2_attrs_to_mode(uint64_t attrs)660 uint32_t arch_mm_stage2_attrs_to_mode(uint64_t attrs)
661 {
662 	uint32_t mode = 0;
663 
664 	if (attrs & STAGE2_S2AP(STAGE2_ACCESS_READ)) {
665 		mode |= MM_MODE_R;
666 	}
667 
668 	if (attrs & STAGE2_S2AP(STAGE2_ACCESS_WRITE)) {
669 		mode |= MM_MODE_W;
670 	}
671 
672 	if ((attrs & STAGE2_XN(STAGE2_EXECUTE_MASK)) ==
673 	    STAGE2_XN(STAGE2_EXECUTE_ALL)) {
674 		mode |= MM_MODE_X;
675 	}
676 
677 	if ((attrs & STAGE2_MEMATTR_TYPE_MASK) == STAGE2_DEVICE_MEMORY) {
678 		mode |= MM_MODE_D;
679 	}
680 
681 	if (!(attrs & STAGE2_SW_OWNED)) {
682 		mode |= MM_MODE_UNOWNED;
683 	}
684 
685 	if (!(attrs & STAGE2_SW_EXCLUSIVE)) {
686 		mode |= MM_MODE_SHARED;
687 	}
688 
689 	if (!(attrs & PTE_VALID)) {
690 		mode |= MM_MODE_INVALID;
691 	}
692 
693 	return mode;
694 }
695 
arch_mm_stage1_max_level_set(uint32_t pa_bits)696 void arch_mm_stage1_max_level_set(uint32_t pa_bits)
697 {
698 	/* Maximum supported PA range in bits is 48 */
699 	CHECK(pa_bits <= 48);
700 
701 	if (pa_bits >= 40) {
702 		mm_s1_max_level = 3;
703 	} else {
704 		/* Setting to 2 covers physical memory upto 512GB */
705 		mm_s1_max_level = 2;
706 	}
707 }
708 
arch_mm_stage1_max_level(void)709 uint8_t arch_mm_stage1_max_level(void)
710 {
711 	return mm_s1_max_level;
712 }
713 
arch_mm_stage2_max_level(void)714 uint8_t arch_mm_stage2_max_level(void)
715 {
716 	return mm_s2_max_level;
717 }
718 
arch_mm_stage1_root_table_count(void)719 uint8_t arch_mm_stage1_root_table_count(void)
720 {
721 	/* Stage 1 doesn't concatenate tables. */
722 	return 1;
723 }
724 
arch_mm_stage2_root_table_count(void)725 uint8_t arch_mm_stage2_root_table_count(void)
726 {
727 	return mm_s2_root_table_count;
728 }
729 
730 /**
731  * Given the attrs from a table at some level and the attrs from all the blocks
732  * in that table, returns equivalent attrs to use for a block which will replace
733  * the entire table.
734  */
arch_mm_combine_table_entry_attrs(uint64_t table_attrs,uint64_t block_attrs)735 uint64_t arch_mm_combine_table_entry_attrs(uint64_t table_attrs,
736 					   uint64_t block_attrs)
737 {
738 	/*
739 	 * Only stage 1 table descriptors have attributes, but the bits are res0
740 	 * for stage 2 table descriptors so this code is safe for both.
741 	 */
742 	if (table_attrs & TABLE_NSTABLE) {
743 		block_attrs |= STAGE1_NS;
744 	}
745 	if (table_attrs & TABLE_APTABLE1) {
746 		block_attrs |= STAGE1_AP2;
747 	}
748 	if (table_attrs & TABLE_APTABLE0) {
749 		/* When two VA ranges are supported, AP1 is valid */
750 		if (has_vhe_support()) {
751 			block_attrs |= STAGE1_AP1;
752 		} else {
753 			block_attrs &= ~STAGE1_AP1;
754 		}
755 	}
756 	if (table_attrs & TABLE_XNTABLE) {
757 		block_attrs |= STAGE1_XN;
758 	}
759 	if (table_attrs & TABLE_PXNTABLE) {
760 		block_attrs |= STAGE1_PXN;
761 	}
762 	return block_attrs;
763 }
764 
765 /**
766  * This is called early in initialization without MMU or caches enabled.
767  */
arch_mm_init(paddr_t table)768 bool arch_mm_init(paddr_t table)
769 {
770 	uint64_t features = read_msr(id_aa64mmfr0_el1);
771 	uint64_t pe_features = read_msr(id_aa64pfr0_el1);
772 	unsigned int nsa_nsw;
773 	uint32_t pa_bits = arch_mm_get_pa_range();
774 	uint32_t extend_bits;
775 	uint32_t sl0;
776 
777 	/* Check that 4KB granules are supported. */
778 	if (((features >> 28) & 0xf) == 0xf) {
779 		dlog_error("4KB granules are not supported\n");
780 		return false;
781 	}
782 
783 	/* Check the physical address range. */
784 	if (!pa_bits) {
785 		dlog_error(
786 			"Unsupported value of id_aa64mmfr0_el1.PARange: %x\n",
787 			features & 0xf);
788 		return false;
789 	}
790 
791 	/* Downgrade PA size from 52 to 48 bits (FEAT_LPA workaround). */
792 	if (pa_bits == 52) {
793 		dlog_verbose(
794 			"52-bit PA size not supported,"
795 			" falling back to 48-bit\n");
796 		pa_bits = 48;
797 	}
798 
799 	dlog_info("Supported bits in physical address: %d\n", pa_bits);
800 
801 	/*
802 	 * Determine sl0, starting level of the page table, based on the number
803 	 * of bits. The value is chosen to give the shallowest tree by making
804 	 * use of concatenated translation tables.
805 	 *
806 	 *  - 0 => start at level 1
807 	 *  - 1 => start at level 2
808 	 *  - 2 => start at level 3
809 	 */
810 	if (pa_bits >= 44) {
811 		sl0 = 2;
812 		mm_s2_max_level = 3;
813 	} else if (pa_bits >= 35) {
814 		sl0 = 1;
815 		mm_s2_max_level = 2;
816 	} else {
817 		sl0 = 0;
818 		mm_s2_max_level = 1;
819 	}
820 
821 	arch_mm_stage1_max_level_set(pa_bits);
822 
823 	/*
824 	 * Since the shallowest possible tree is used, the maximum number of
825 	 * concatenated tables must be used. This means if no more than 4 bits
826 	 * are used from the next level, they are instead used to index into the
827 	 * concatenated tables.
828 	 */
829 	extend_bits = ((pa_bits - PAGE_BITS) % PAGE_LEVEL_BITS);
830 	if (extend_bits > 4) {
831 		extend_bits = 0;
832 	}
833 	mm_s2_root_table_count = 1 << extend_bits;
834 
835 	dlog_info(
836 		"Stage 2 has %d page table levels with %d pages at the root.\n",
837 		mm_s2_max_level + 1, mm_s2_root_table_count);
838 
839 	dlog_info(
840 		"Stage 1 has %d page table levels with %d pages at the root.\n",
841 		mm_s1_max_level + 1, arch_mm_stage1_root_table_count());
842 
843 	/*
844 	 * If the PE implements S-EL2 then VTCR_EL2.NSA/NSW bits are significant
845 	 * in secure state. In non-secure state, NSA/NSW behave as if set to
846 	 * 11b. If S-EL2 is not implemented NSA/NSW bits are RES0.
847 	 */
848 	if (((pe_features >> 36) & 0xF) == 1) {
849 		/*
850 		 * NSA/NSW=10b: in secure state,
851 		 * S2 translations for the NS IPA space access the NS PA space.
852 		 * S2 translation table walks for the NS IPA space are to the
853 		 * secure PA space.
854 		 */
855 		nsa_nsw = 2;
856 	} else {
857 		nsa_nsw = 0;
858 	}
859 
860 	arch_mm_config = (struct arch_mm_config)
861 	{
862 		.ttbr0_el2 = pa_addr(table),
863 
864 		.vtcr_el2 =
865 			(1U << 31) |		   /* RES1. */
866 			(nsa_nsw << 29) |	   /* NSA/NSW. */
867 			((features & 0xf) << 16) | /* PS, matching features. */
868 			(0 << 14) |		   /* TG0: 4 KB granule. */
869 			(3 << 12) |		   /* SH0: inner shareable. */
870 			(1 << 10) |  /* ORGN0: normal, cacheable ... */
871 			(1 << 8) |   /* IRGN0: normal, cacheable ... */
872 			(sl0 << 6) | /* SL0. */
873 			((64 - pa_bits) << 0) | /* T0SZ: dependent on PS. */
874 			0,
875 
876 		/*
877 		 * 0    -> Device-nGnRnE memory
878 		 * 0xff -> Normal memory, Inner/Outer Write-Back Non-transient,
879 		 *         Write-Alloc, Read-Alloc.
880 		 * 0xf0 -> Tagged Normal, Inner/Outer Write-Back,
881 		 *         Read/Write-Alloc non-transient memory.
882 		 */
883 			.mair_el2 = (0 << (8 * STAGE1_DEVICEINDX)) |
884 #if ENABLE_MTE
885 				    (0xf0 << (8 * STAGE1_STACKINDX)) |
886 #endif
887 				    (0xff << (8 * STAGE1_NORMALINDX)),
888 
889 		.sctlr_el2 = get_sctlr_el2_value(false),
890 		.vstcr_el2 = (1U << 31) |	    /* RES1. */
891 			     (0 << 30) |	    /* SA. */
892 			     (0 << 29) |	    /* SW. */
893 			     (0 << 14) |	    /* TG0: 4 KB granule. */
894 			     (sl0 << 6) |	    /* SL0. */
895 			     ((64 - pa_bits) << 0), /* T0SZ: dependent on PS. */
896 	};
897 
898 	/*
899 	 * Configure tcr_el2 and hcr_el2. The configuration depends on whether
900 	 * VHE support is enabled by the build and is available in HW. If VHE is
901 	 * enabled and available, hcr_el2.e2h is set during boot, before the MMU
902 	 * is turned on. This is because setting e2h redefines registers, can be
903 	 * cached in the TLBs and enables the use of ttbr1_el2, among other
904 	 * things, which makes enabling it at run time much more complicated.
905 	 * The bit is set once during boot and is not expected to change for the
906 	 * boot cycle. When VHE is enabled, currently, only the lower virtual
907 	 * address range (ttbr0_el2) is used and the upper address
908 	 * range(ttbr0_el1) is disabled. This keeps hafnium simple and
909 	 * consistent with its behavior when VHE is not enabled. When VHE is
910 	 * not enabled, hcr_el2 will default to 0 and will be set up during vCPU
911 	 * initialization.
912 	 */
913 	arch_mm_config.hcr_el2 = 0;
914 	if (has_vhe_support()) {
915 		arch_mm_config.hcr_el2 |= (HCR_EL2_E2H | HCR_EL2_TGE);
916 		arch_mm_config.tcr_el2 =
917 			(1UL << 38) | /* TBI1, top byte ignored. */
918 			(1UL << 37) | /* TBI0, top byte ignored. */
919 			(2UL << 32) | /* IPS, IPA size */
920 			(2UL << 30) | /* TG1, granule size, 4KB. */
921 			(3UL << 28) | /* SH1, inner shareable. */
922 			(1UL
923 			 << 26) | /* ORGN1, normal mem, WB RA WA Cacheable. */
924 			(1UL
925 			 << 24) | /* IRGN1, normal mem, WB RA WA Cacheable. */
926 			(1UL << 23) | /* EPD1 - Disable TTBR1_EL2 translation */
927 			(0UL << 22) | /* TTBR0_EL2.ASID defines ASID */
928 			((64 - pa_bits)
929 			 << 16) | /* T1SZ, input address is 2^pa_bits bytes. */
930 			(0UL << 14) | /* TG0, granule size, 4KB. */
931 			(3UL << 12) | /* SH0, inner shareable. */
932 			(1UL
933 			 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
934 			(1UL
935 			 << 8) | /* IRGN0, normal mem, WB RA WA Cacheable. */
936 			((64 - pa_bits)
937 			 << 0) | /* T0SZ, input address is 2^pa_bits bytes. */
938 			0;
939 	} else {
940 		arch_mm_config.tcr_el2 =
941 			(1 << 20) |		   /* TBI, top byte ignored. */
942 			((features & 0xf) << 16) | /* PS. */
943 			(0 << 14) |		   /* TG0, granule size, 4KB. */
944 			(3 << 12) |		   /* SH0, inner shareable. */
945 			(1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
946 			(1 << 8) |  /* IRGN0, normal mem, WB RA WA Cacheable. */
947 			((64 - pa_bits)
948 			 << 0) | /* T0SZ, input address is  2^pa_bits bytes. */
949 			0;
950 	}
951 	return true;
952 }
953 
954 /**
955  * Return the arch specific mm mode for send/recv pages of given VM ID.
956  */
arch_mm_extra_attributes_from_vm(ffa_vm_id_t id)957 uint32_t arch_mm_extra_attributes_from_vm(ffa_vm_id_t id)
958 {
959 	return ((id & HF_VM_ID_WORLD_MASK) == HF_HYPERVISOR_VM_ID) ? MM_MODE_NS
960 								   : 0;
961 }
962 
963 /**
964  * Returns the maximum supported PA Range in bits.
965  */
arch_mm_get_pa_range(void)966 uint32_t arch_mm_get_pa_range(void)
967 {
968 	static const uint32_t pa_bits_table[16] = {32, 36, 40, 42, 44, 48, 52};
969 	uint64_t features = read_msr(id_aa64mmfr0_el1);
970 	return pa_bits_table[features & 0xf];
971 }
972 
arch_mm_get_vtcr_el2(void)973 uintptr_t arch_mm_get_vtcr_el2(void)
974 {
975 	return arch_mm_config.vtcr_el2;
976 }
977 
arch_mm_get_vstcr_el2(void)978 uintptr_t arch_mm_get_vstcr_el2(void)
979 {
980 	return arch_mm_config.vstcr_el2;
981 }
982