1 /*
2 * Copyright 2018 The Hafnium Authors.
3 *
4 * Use of this source code is governed by a BSD-style
5 * license that can be found in the LICENSE file or at
6 * https://opensource.org/licenses/BSD-3-Clause.
7 */
8
9 #include "hf/mm.h"
10
11 #include "hf/arch/barriers.h"
12 #include "hf/arch/cpu.h"
13 #include "hf/arch/mmu.h"
14
15 #include "hf/check.h"
16 #include "hf/dlog.h"
17
18 #include "msr.h"
19 #include "sysregs.h"
20
21 /* Keep macro alignment */
22 /* clang-format off */
23
24 #define NON_SHAREABLE UINT64_C(0)
25 #define OUTER_SHAREABLE UINT64_C(2)
26 #define INNER_SHAREABLE UINT64_C(3)
27
28 #define PTE_VALID (UINT64_C(1) << 0)
29 #define PTE_LEVEL0_BLOCK (UINT64_C(1) << 1)
30 #define PTE_TABLE (UINT64_C(1) << 1)
31
32 #define STAGE1_XN (UINT64_C(1) << 54)
33 #define STAGE1_UXN (UINT64_C(1) << 54)
34 #define STAGE1_PXN (UINT64_C(1) << 53)
35 #define STAGE1_CONTIGUOUS (UINT64_C(1) << 52)
36 #define STAGE1_DBM (UINT64_C(1) << 51)
37 #define STAGE1_GP (UINT64_C(1) << 50)
38 #define STAGE1_NG (UINT64_C(1) << 11)
39 #define STAGE1_AF (UINT64_C(1) << 10)
40 #define STAGE1_SH(x) ((x) << 8)
41 #define STAGE1_AP2 (UINT64_C(1) << 7)
42 #define STAGE1_AP1 (UINT64_C(1) << 6)
43 #define STAGE1_AP(x) ((x) << 6)
44 #define STAGE1_NS (UINT64_C(1) << 5)
45 #define STAGE1_ATTRINDX(x) ((x) << 2)
46
47 #define STAGE1_READONLY UINT64_C(2)
48 #define STAGE1_READWRITE UINT64_C(0)
49 #define STAGE1_AP_USER_RW UINT64_C(1)
50
51 #define STAGE1_DEVICEINDX UINT64_C(0)
52 #define STAGE1_NORMALINDX UINT64_C(1)
53 #define STAGE1_STACKINDX UINT64_C(2)
54
55 #define STAGE2_XN(x) ((x) << 53)
56 #define STAGE2_CONTIGUOUS (UINT64_C(1) << 52)
57 #define STAGE2_DBM (UINT64_C(1) << 51)
58 #define STAGE2_AF (UINT64_C(1) << 10)
59 #define STAGE2_SH(x) ((x) << 8)
60 #define STAGE2_S2AP(x) ((x) << 6)
61
62 #define STAGE2_EXECUTE_ALL UINT64_C(0)
63 #define STAGE2_EXECUTE_EL0 UINT64_C(1)
64 #define STAGE2_EXECUTE_NONE UINT64_C(2)
65 #define STAGE2_EXECUTE_EL1 UINT64_C(3)
66 #define STAGE2_EXECUTE_MASK UINT64_C(3)
67
68 /* Table attributes only apply to stage 1 translations. */
69 #define TABLE_NSTABLE (UINT64_C(1) << 63)
70 #define TABLE_APTABLE1 (UINT64_C(1) << 62)
71 #define TABLE_APTABLE0 (UINT64_C(1) << 61)
72 #define TABLE_XNTABLE (UINT64_C(1) << 60)
73 #define TABLE_PXNTABLE (UINT64_C(1) << 59)
74
75 /* The following are stage-1 software defined attributes. */
76 #define STAGE1_SW_OWNED (UINT64_C(1) << 55)
77 #define STAGE1_SW_EXCLUSIVE (UINT64_C(1) << 56)
78
79 /* The following are stage-2 software defined attributes. */
80 #define STAGE2_SW_OWNED (UINT64_C(1) << 55)
81 #define STAGE2_SW_EXCLUSIVE (UINT64_C(1) << 56)
82
83 /* The following are stage-2 memory attributes for normal memory. */
84 #define STAGE2_DEVICE_MEMORY UINT64_C(0)
85 #define STAGE2_NONCACHEABLE UINT64_C(1)
86 #define STAGE2_WRITETHROUGH UINT64_C(2)
87 #define STAGE2_WRITEBACK UINT64_C(3)
88
89 /* The following are stage-2 memory attributes for device memory. */
90 #define STAGE2_MEMATTR_DEVICE_nGnRnE UINT64_C(0)
91 #define STAGE2_MEMATTR_DEVICE_nGnRE UINT64_C(1)
92 #define STAGE2_MEMATTR_DEVICE_nGRE UINT64_C(2)
93 #define STAGE2_MEMATTR_DEVICE_GRE UINT64_C(3)
94
95 /* The following construct and destruct stage-2 memory attributes. */
96 #define STAGE2_MEMATTR(outer, inner) ((((outer) << 2) | (inner)) << 2)
97 #define STAGE2_MEMATTR_TYPE_MASK (UINT64_C(3) << 4)
98
99 #define STAGE2_ACCESS_READ UINT64_C(1)
100 #define STAGE2_ACCESS_WRITE UINT64_C(2)
101
102 #define CACHE_WORD_SIZE 4
103
104 /**
105 * Threshold number of pages in TLB to invalidate after which we invalidate all
106 * TLB entries on a given level.
107 * Constant is the number of pointers per page table entry, also used by Linux.
108 */
109 #define MAX_TLBI_OPS MM_PTE_PER_PAGE
110
111 /* clang-format on */
112
113 #define tlbi(op) \
114 do { \
115 __asm__ volatile("tlbi " #op); \
116 } while (0)
117 #define tlbi_reg(op, reg) \
118 do { \
119 __asm__ __volatile__("tlbi " #op ", %0" : : "r"(reg)); \
120 } while (0)
121
122 /** Mask for the address bits of the pte. */
123 #define PTE_ADDR_MASK \
124 (((UINT64_C(1) << 48) - 1) & ~((UINT64_C(1) << PAGE_BITS) - 1))
125
126 /** Mask for the attribute bits of the pte. */
127 #define PTE_ATTR_MASK (~(PTE_ADDR_MASK | (UINT64_C(1) << 1)))
128
129 /**
130 * Configuration information for memory management. Order is important as this
131 * is read from assembly.
132 *
133 * It must only be written to from `arch_mm_init()` to avoid cache and
134 * synchronization problems.
135 */
136 struct arch_mm_config {
137 uintreg_t ttbr0_el2;
138 uintreg_t mair_el2;
139 uintreg_t tcr_el2;
140 uintreg_t sctlr_el2;
141 uintreg_t hcr_el2;
142 uintreg_t vtcr_el2;
143 uintreg_t vstcr_el2;
144 } arch_mm_config;
145
146 static uint8_t mm_s1_max_level;
147 static uint8_t mm_s2_max_level;
148 static uint8_t mm_s2_root_table_count;
149
150 /**
151 * Returns the encoding of a page table entry that isn't present.
152 */
arch_mm_absent_pte(uint8_t level)153 pte_t arch_mm_absent_pte(uint8_t level)
154 {
155 (void)level;
156 return 0;
157 }
158
159 /**
160 * Converts a physical address to a table PTE.
161 *
162 * The spec says that 'Table descriptors for stage 2 translations do not
163 * include any attribute field', so we don't take any attributes as arguments.
164 */
arch_mm_table_pte(uint8_t level,paddr_t pa)165 pte_t arch_mm_table_pte(uint8_t level, paddr_t pa)
166 {
167 /* This is the same for all levels on aarch64. */
168 (void)level;
169 return pa_addr(pa) | PTE_TABLE | PTE_VALID;
170 }
171
172 /**
173 * Converts a physical address to a block PTE.
174 *
175 * The level must allow block entries.
176 */
arch_mm_block_pte(uint8_t level,paddr_t pa,uint64_t attrs)177 pte_t arch_mm_block_pte(uint8_t level, paddr_t pa, uint64_t attrs)
178 {
179 pte_t pte = pa_addr(pa) | attrs;
180
181 if (level == 0) {
182 /* A level 0 'block' is actually a page entry. */
183 pte |= PTE_LEVEL0_BLOCK;
184 }
185 return pte;
186 }
187
188 /**
189 * Specifies whether block mappings are acceptable at the given level.
190 *
191 * Level 0 must allow block entries.
192 */
arch_mm_is_block_allowed(uint8_t level)193 bool arch_mm_is_block_allowed(uint8_t level)
194 {
195 return level <= 2;
196 }
197
198 /**
199 * Determines if the given pte is present, i.e., if it is valid or it is invalid
200 * but still holds state about the memory so needs to be present in the table.
201 */
arch_mm_pte_is_present(pte_t pte,uint8_t level)202 bool arch_mm_pte_is_present(pte_t pte, uint8_t level)
203 {
204 return arch_mm_pte_is_valid(pte, level) || (pte & STAGE2_SW_OWNED) != 0;
205 }
206
207 /**
208 * Determines if the given pte is valid, i.e., if it points to another table,
209 * to a page, or a block of pages that can be accessed.
210 */
arch_mm_pte_is_valid(pte_t pte,uint8_t level)211 bool arch_mm_pte_is_valid(pte_t pte, uint8_t level)
212 {
213 (void)level;
214 return (pte & PTE_VALID) != 0;
215 }
216
217 /**
218 * Determines if the given pte references a block of pages.
219 */
arch_mm_pte_is_block(pte_t pte,uint8_t level)220 bool arch_mm_pte_is_block(pte_t pte, uint8_t level)
221 {
222 /* We count pages at level 0 as blocks. */
223 return arch_mm_is_block_allowed(level) &&
224 (level == 0 ? (pte & PTE_LEVEL0_BLOCK) != 0
225 : arch_mm_pte_is_present(pte, level) &&
226 !arch_mm_pte_is_table(pte, level));
227 }
228
229 /**
230 * Determines if the given pte references another table.
231 */
arch_mm_pte_is_table(pte_t pte,uint8_t level)232 bool arch_mm_pte_is_table(pte_t pte, uint8_t level)
233 {
234 return level != 0 && arch_mm_pte_is_valid(pte, level) &&
235 (pte & PTE_TABLE) != 0;
236 }
237
pte_addr(pte_t pte)238 static uint64_t pte_addr(pte_t pte)
239 {
240 return pte & PTE_ADDR_MASK;
241 }
242
243 /**
244 * Clears the given physical address, i.e., clears the bits of the address that
245 * are not used in the pte.
246 */
arch_mm_clear_pa(paddr_t pa)247 paddr_t arch_mm_clear_pa(paddr_t pa)
248 {
249 return pa_init(pte_addr(pa_addr(pa)));
250 }
251
252 /**
253 * Extracts the physical address of the block referred to by the given page
254 * table entry.
255 */
arch_mm_block_from_pte(pte_t pte,uint8_t level)256 paddr_t arch_mm_block_from_pte(pte_t pte, uint8_t level)
257 {
258 (void)level;
259 return pa_init(pte_addr(pte));
260 }
261
262 /**
263 * Extracts the physical address of the page table referred to by the given page
264 * table entry.
265 */
arch_mm_table_from_pte(pte_t pte,uint8_t level)266 paddr_t arch_mm_table_from_pte(pte_t pte, uint8_t level)
267 {
268 (void)level;
269 return pa_init(pte_addr(pte));
270 }
271
272 /**
273 * Extracts the architecture-specific attributes applies to the given page table
274 * entry.
275 */
arch_mm_pte_attrs(pte_t pte,uint8_t level)276 uint64_t arch_mm_pte_attrs(pte_t pte, uint8_t level)
277 {
278 (void)level;
279 return pte & PTE_ATTR_MASK;
280 }
281
282 /**
283 * Execute any barriers or synchronization that is required
284 * by a given architecture, after page table writes.
285 */
arch_mm_sync_table_writes(void)286 void arch_mm_sync_table_writes(void)
287 {
288 /*
289 * Ensure visibility of table updates to translation table walks.
290 */
291 dsb(ish);
292 }
293
294 /**
295 * Invalidates stage-1 TLB entries referring to the given virtual address range.
296 */
arch_mm_invalidate_stage1_range(uint16_t asid,vaddr_t va_begin,vaddr_t va_end)297 void arch_mm_invalidate_stage1_range(uint16_t asid, vaddr_t va_begin,
298 vaddr_t va_end)
299 {
300 uintvaddr_t begin = va_addr(va_begin);
301 uintvaddr_t end = va_addr(va_end);
302 uintvaddr_t it;
303
304 /* Sync with page table updates. */
305 arch_mm_sync_table_writes();
306
307 /*
308 * Revisions prior to Armv8.4 do not support invalidating a range of
309 * addresses, which means we have to loop over individual pages. If
310 * there are too many, it is quicker to invalidate all TLB entries.
311 */
312 if ((end - begin) > (MAX_TLBI_OPS * PAGE_SIZE)) {
313 if (VM_TOOLCHAIN == 1) {
314 tlbi(vmalle1is);
315 } else {
316 tlbi(alle2is);
317 }
318 } else {
319 begin >>= 12;
320 end >>= 12;
321 /* Invalidate stage-1 TLB, one page from the range at a time. */
322 for (it = begin; it < end;
323 it += (UINT64_C(1) << (PAGE_BITS - 12))) {
324 /*
325 * Mask upper 8 bits of asid passed in. Hafnium on
326 * aarch64 currently only uses 8 bit asids.TCR_EL2.AS is
327 * set to 0 on implementations which support 16 bit
328 * asids and is res0 on implementations that dont
329 * support 16 bit asids.
330 */
331 asid &= 0xff;
332 it |= (uint64_t)asid << 48;
333 if (VM_TOOLCHAIN == 1) {
334 tlbi_reg(vae1is, it);
335 } else {
336 tlbi_reg(vae2is, it);
337 }
338 }
339 }
340
341 /* Sync data accesses with TLB invalidation completion. */
342 dsb(ish);
343
344 /* Sync instruction fetches with TLB invalidation completion. */
345 isb();
346 }
347
348 /**
349 * Invalidates stage-2 TLB entries referring to the given intermediate physical
350 * address range.
351 */
arch_mm_invalidate_stage2_range(uint16_t vmid,ipaddr_t va_begin,ipaddr_t va_end)352 void arch_mm_invalidate_stage2_range(uint16_t vmid, ipaddr_t va_begin,
353 ipaddr_t va_end)
354 {
355 uintpaddr_t begin = ipa_addr(va_begin);
356 uintpaddr_t end = ipa_addr(va_end);
357 uintpaddr_t it;
358
359 (void)vmid;
360
361 /* TODO: This only applies to the current VMID. */
362
363 /* Sync with page table updates. */
364 arch_mm_sync_table_writes();
365
366 /*
367 * Switch to guest mode when VHE is enabled. This ensures that the TLB
368 * invalidates apply to the current VMID as opposed to the EL2&0
369 * translation regime. Note that in the following code snippet, only
370 * tlbi vmalle1is is affected by HCR_EL2.TGE bit. Bracketing all of the
371 * invalidate code inside guest mode will ensure changing any code below
372 * will apply to the guest VM as opposed to EL2&0 translation regime.
373 */
374 vhe_switch_to_host_or_guest(true);
375
376 /*
377 * Revisions prior to Armv8.4 do not support invalidating a range of
378 * addresses, which means we have to loop over individual pages. If
379 * there are too many, it is quicker to invalidate all TLB entries.
380 */
381 if ((end - begin) > (MAX_TLBI_OPS * PAGE_SIZE)) {
382 /*
383 * Invalidate all stage-1 and stage-2 entries of the TLB for
384 * the current VMID.
385 */
386 tlbi(vmalls12e1is);
387 } else {
388 begin >>= 12;
389 end >>= 12;
390
391 /*
392 * Invalidate stage-2 TLB, one page from the range at a time.
393 * Note that this has no effect if the CPU has a TLB with
394 * combined stage-1/stage-2 translation.
395 */
396 for (it = begin; it < end;
397 it += (UINT64_C(1) << (PAGE_BITS - 12))) {
398 tlbi_reg(ipas2e1is, it);
399 }
400
401 /*
402 * Ensure completion of stage-2 invalidation in case a page
403 * table walk on another CPU refilled the TLB with a complete
404 * stage-1 + stage-2 walk based on the old stage-2 mapping.
405 */
406 dsb(ish);
407
408 /*
409 * Invalidate all stage-1 TLB entries. If the CPU has a combined
410 * TLB for stage-1 and stage-2, this will invalidate stage-2 as
411 * well.
412 */
413 tlbi(vmalle1is);
414 }
415
416 /* Sync data accesses with TLB invalidation completion. */
417 dsb(ish);
418
419 /* Sync instruction fetches with TLB invalidation completion. */
420 isb();
421
422 vhe_switch_to_host_or_guest(false);
423 }
424
425 /**
426 * Returns the smallest cache line size of all the caches for this core.
427 */
arch_mm_dcache_line_size(void)428 static uint16_t arch_mm_dcache_line_size(void)
429 {
430 return CACHE_WORD_SIZE *
431 (UINT16_C(1) << ((read_msr(CTR_EL0) >> 16) & 0xf));
432 }
433
arch_mm_flush_dcache(void * base,size_t size)434 void arch_mm_flush_dcache(void *base, size_t size)
435 {
436 /* Clean and invalidate each data cache line in the range. */
437 uint16_t line_size = arch_mm_dcache_line_size();
438 uintptr_t line_begin = (uintptr_t)base & ~(line_size - 1);
439 uintptr_t end = (uintptr_t)base + size;
440
441 while (line_begin < end) {
442 __asm__ volatile("dc civac, %0" : : "r"(line_begin));
443 line_begin += line_size;
444 }
445 dsb(sy);
446 }
447
arch_mm_mode_to_stage1_attrs(uint32_t mode)448 uint64_t arch_mm_mode_to_stage1_attrs(uint32_t mode)
449 {
450 uint64_t attrs = 0;
451
452 attrs |= STAGE1_AF | STAGE1_SH(INNER_SHAREABLE);
453
454 #if SECURE_WORLD == 1
455
456 /**
457 * Define the non-secure bit.
458 * At NS-EL2 the Stage-1 MMU NS bit is RES0. At S-EL1/2, this bit
459 * defines the Stage-1 security attribute for the block or page.
460 */
461 if (mode & MM_MODE_NS) {
462 attrs |= STAGE1_NS;
463 }
464
465 #endif
466 /*
467 * STAGE1_XN can be XN or UXN depending on if the EL2
468 * translation regime uses one VA range or two VA ranges(VHE).
469 * PXN is res0 when the translation regime does not support two
470 * VA ranges.
471 */
472 if (mode & MM_MODE_X) {
473 if (has_vhe_support()) {
474 attrs |=
475 (mode & MM_MODE_USER) ? STAGE1_PXN : STAGE1_UXN;
476 }
477
478 #if BRANCH_PROTECTION
479 /* Mark code pages as Guarded Pages if BTI is supported. */
480 if (is_arch_feat_bti_supported()) {
481 attrs |= STAGE1_GP;
482 }
483 #endif
484 } else {
485 if (has_vhe_support()) {
486 attrs |= (STAGE1_UXN | STAGE1_PXN);
487 } else {
488 attrs |= STAGE1_XN;
489 }
490 }
491
492 /* Define the read/write bits. */
493 if (mode & MM_MODE_W) {
494 attrs |= STAGE1_AP(STAGE1_READWRITE);
495 } else {
496 attrs |= STAGE1_AP(STAGE1_READONLY);
497 }
498
499 if (has_vhe_support()) {
500 attrs |= (mode & MM_MODE_USER) ? STAGE1_AP(STAGE1_AP_USER_RW)
501 : 0;
502 if (mode & MM_MODE_NG) {
503 attrs |= STAGE1_NG;
504 }
505 }
506
507 /* Define the memory attribute bits. */
508 if (mode & MM_MODE_D) {
509 attrs |= STAGE1_ATTRINDX(STAGE1_DEVICEINDX);
510 } else if (mode & MM_MODE_T) {
511 attrs |= STAGE1_ATTRINDX(STAGE1_STACKINDX);
512 } else {
513 attrs |= STAGE1_ATTRINDX(STAGE1_NORMALINDX);
514 }
515
516 /* Define the ownership bit. */
517 if (!(mode & MM_MODE_UNOWNED)) {
518 attrs |= STAGE1_SW_OWNED;
519 }
520
521 /* Define the exclusivity bit. */
522 if (!(mode & MM_MODE_SHARED)) {
523 attrs |= STAGE1_SW_EXCLUSIVE;
524 }
525
526 /* Define the valid bit. */
527 if (!(mode & MM_MODE_INVALID)) {
528 attrs |= PTE_VALID;
529 }
530
531 return attrs;
532 }
533
arch_mm_stage1_attrs_to_mode(uint64_t attrs)534 uint32_t arch_mm_stage1_attrs_to_mode(uint64_t attrs)
535 {
536 uint32_t mode = 0;
537
538 #if SECURE_WORLD == 1
539 if (attrs & STAGE1_NS) {
540 mode |= MM_MODE_NS;
541 }
542 #endif
543
544 if ((attrs & STAGE1_AP(STAGE1_READONLY)) ==
545 STAGE1_AP(STAGE1_READONLY)) {
546 mode |= MM_MODE_R;
547 } else {
548 CHECK((attrs & STAGE1_AP(STAGE1_READWRITE)) ==
549 STAGE1_AP(STAGE1_READWRITE));
550 mode |= MM_MODE_W | MM_MODE_R;
551 }
552
553 if (has_vhe_support() && (attrs & STAGE1_AP(STAGE1_AP_USER_RW))) {
554 mode |= MM_MODE_USER;
555 }
556
557 if (!(attrs & STAGE1_XN) || !(attrs & STAGE1_PXN)) {
558 mode |= MM_MODE_X;
559 }
560
561 if (has_vhe_support() && (attrs & STAGE1_NG)) {
562 mode |= MM_MODE_NG;
563 }
564
565 if (!((attrs & STAGE1_ATTRINDX(STAGE1_NORMALINDX)) ==
566 STAGE1_ATTRINDX(STAGE1_NORMALINDX))) {
567 mode |= MM_MODE_D;
568 } else {
569 CHECK((attrs & STAGE1_ATTRINDX(STAGE1_NORMALINDX)) ==
570 STAGE1_ATTRINDX(STAGE1_NORMALINDX));
571 }
572
573 if (!(attrs & STAGE1_SW_OWNED)) {
574 mode |= MM_MODE_UNOWNED;
575 }
576
577 if (!(attrs & STAGE1_SW_EXCLUSIVE)) {
578 mode |= MM_MODE_SHARED;
579 }
580
581 if (!(attrs & PTE_VALID)) {
582 mode |= MM_MODE_INVALID;
583 }
584
585 return mode;
586 }
587
arch_mm_mode_to_stage2_attrs(uint32_t mode)588 uint64_t arch_mm_mode_to_stage2_attrs(uint32_t mode)
589 {
590 uint64_t attrs = 0;
591 uint64_t access = 0;
592
593 /*
594 * Default shareability is inner shareable in stage 2 tables. Per
595 * table D5-45 of ARM ARM DDI0487G, Inner shareable attribute will
596 * pass through the stage 1 attribute of outer shareable and inner
597 * shareable, but NOT non-shareable. A stage 1 non-shareable attribute
598 * combined with stage 2 inner shareable, results in an inner shareable
599 * access. This is intentional, since a VCPU that marks a memory region
600 * as non-shareable in its stage 1 translation tables, can be migrated
601 * to a different PHYSICAL PE unless the VCPU is pinned to the PE.
602 * If stage 2 was marked as non-shareable below, the resulting accesses
603 * for a VCPU on a physical PE would be marked as non-shareable, and
604 * hence potentially not visible on another physical PE, which could
605 * cause coherency issues when the VCPU is migrated and expects its
606 * non-shareable accesses to be visible, but would read stale or invalid
607 * data. Note that for a access that results in device memory type, the
608 * shareability does not matter and is always treated as outer
609 * shareable.
610 */
611 attrs |= STAGE2_AF | STAGE2_SH(INNER_SHAREABLE);
612
613 /* Define the read/write bits. */
614 if (mode & MM_MODE_R) {
615 access |= STAGE2_ACCESS_READ;
616 }
617
618 if (mode & MM_MODE_W) {
619 access |= STAGE2_ACCESS_WRITE;
620 }
621
622 attrs |= STAGE2_S2AP(access);
623
624 /* Define the execute bits. */
625 if (mode & MM_MODE_X) {
626 attrs |= STAGE2_XN(STAGE2_EXECUTE_ALL);
627 } else {
628 attrs |= STAGE2_XN(STAGE2_EXECUTE_NONE);
629 }
630
631 /*
632 * Define the memory attribute bits, using the "neutral" values which
633 * give the stage-1 attributes full control of the attributes.
634 */
635 if (mode & MM_MODE_D) {
636 attrs |= STAGE2_MEMATTR(STAGE2_DEVICE_MEMORY,
637 STAGE2_MEMATTR_DEVICE_GRE);
638 } else {
639 attrs |= STAGE2_MEMATTR(STAGE2_WRITEBACK, STAGE2_WRITEBACK);
640 }
641
642 /* Define the ownership bit. */
643 if (!(mode & MM_MODE_UNOWNED)) {
644 attrs |= STAGE2_SW_OWNED;
645 }
646
647 /* Define the exclusivity bit. */
648 if (!(mode & MM_MODE_SHARED)) {
649 attrs |= STAGE2_SW_EXCLUSIVE;
650 }
651
652 /* Define the valid bit. */
653 if (!(mode & MM_MODE_INVALID)) {
654 attrs |= PTE_VALID;
655 }
656
657 return attrs;
658 }
659
arch_mm_stage2_attrs_to_mode(uint64_t attrs)660 uint32_t arch_mm_stage2_attrs_to_mode(uint64_t attrs)
661 {
662 uint32_t mode = 0;
663
664 if (attrs & STAGE2_S2AP(STAGE2_ACCESS_READ)) {
665 mode |= MM_MODE_R;
666 }
667
668 if (attrs & STAGE2_S2AP(STAGE2_ACCESS_WRITE)) {
669 mode |= MM_MODE_W;
670 }
671
672 if ((attrs & STAGE2_XN(STAGE2_EXECUTE_MASK)) ==
673 STAGE2_XN(STAGE2_EXECUTE_ALL)) {
674 mode |= MM_MODE_X;
675 }
676
677 if ((attrs & STAGE2_MEMATTR_TYPE_MASK) == STAGE2_DEVICE_MEMORY) {
678 mode |= MM_MODE_D;
679 }
680
681 if (!(attrs & STAGE2_SW_OWNED)) {
682 mode |= MM_MODE_UNOWNED;
683 }
684
685 if (!(attrs & STAGE2_SW_EXCLUSIVE)) {
686 mode |= MM_MODE_SHARED;
687 }
688
689 if (!(attrs & PTE_VALID)) {
690 mode |= MM_MODE_INVALID;
691 }
692
693 return mode;
694 }
695
arch_mm_stage1_max_level_set(uint32_t pa_bits)696 void arch_mm_stage1_max_level_set(uint32_t pa_bits)
697 {
698 /* Maximum supported PA range in bits is 48 */
699 CHECK(pa_bits <= 48);
700
701 if (pa_bits >= 40) {
702 mm_s1_max_level = 3;
703 } else {
704 /* Setting to 2 covers physical memory upto 512GB */
705 mm_s1_max_level = 2;
706 }
707 }
708
arch_mm_stage1_max_level(void)709 uint8_t arch_mm_stage1_max_level(void)
710 {
711 return mm_s1_max_level;
712 }
713
arch_mm_stage2_max_level(void)714 uint8_t arch_mm_stage2_max_level(void)
715 {
716 return mm_s2_max_level;
717 }
718
arch_mm_stage1_root_table_count(void)719 uint8_t arch_mm_stage1_root_table_count(void)
720 {
721 /* Stage 1 doesn't concatenate tables. */
722 return 1;
723 }
724
arch_mm_stage2_root_table_count(void)725 uint8_t arch_mm_stage2_root_table_count(void)
726 {
727 return mm_s2_root_table_count;
728 }
729
730 /**
731 * Given the attrs from a table at some level and the attrs from all the blocks
732 * in that table, returns equivalent attrs to use for a block which will replace
733 * the entire table.
734 */
arch_mm_combine_table_entry_attrs(uint64_t table_attrs,uint64_t block_attrs)735 uint64_t arch_mm_combine_table_entry_attrs(uint64_t table_attrs,
736 uint64_t block_attrs)
737 {
738 /*
739 * Only stage 1 table descriptors have attributes, but the bits are res0
740 * for stage 2 table descriptors so this code is safe for both.
741 */
742 if (table_attrs & TABLE_NSTABLE) {
743 block_attrs |= STAGE1_NS;
744 }
745 if (table_attrs & TABLE_APTABLE1) {
746 block_attrs |= STAGE1_AP2;
747 }
748 if (table_attrs & TABLE_APTABLE0) {
749 /* When two VA ranges are supported, AP1 is valid */
750 if (has_vhe_support()) {
751 block_attrs |= STAGE1_AP1;
752 } else {
753 block_attrs &= ~STAGE1_AP1;
754 }
755 }
756 if (table_attrs & TABLE_XNTABLE) {
757 block_attrs |= STAGE1_XN;
758 }
759 if (table_attrs & TABLE_PXNTABLE) {
760 block_attrs |= STAGE1_PXN;
761 }
762 return block_attrs;
763 }
764
765 /**
766 * This is called early in initialization without MMU or caches enabled.
767 */
arch_mm_init(paddr_t table)768 bool arch_mm_init(paddr_t table)
769 {
770 uint64_t features = read_msr(id_aa64mmfr0_el1);
771 uint64_t pe_features = read_msr(id_aa64pfr0_el1);
772 unsigned int nsa_nsw;
773 uint32_t pa_bits = arch_mm_get_pa_range();
774 uint32_t extend_bits;
775 uint32_t sl0;
776
777 /* Check that 4KB granules are supported. */
778 if (((features >> 28) & 0xf) == 0xf) {
779 dlog_error("4KB granules are not supported\n");
780 return false;
781 }
782
783 /* Check the physical address range. */
784 if (!pa_bits) {
785 dlog_error(
786 "Unsupported value of id_aa64mmfr0_el1.PARange: %x\n",
787 features & 0xf);
788 return false;
789 }
790
791 /* Downgrade PA size from 52 to 48 bits (FEAT_LPA workaround). */
792 if (pa_bits == 52) {
793 dlog_verbose(
794 "52-bit PA size not supported,"
795 " falling back to 48-bit\n");
796 pa_bits = 48;
797 }
798
799 dlog_info("Supported bits in physical address: %d\n", pa_bits);
800
801 /*
802 * Determine sl0, starting level of the page table, based on the number
803 * of bits. The value is chosen to give the shallowest tree by making
804 * use of concatenated translation tables.
805 *
806 * - 0 => start at level 1
807 * - 1 => start at level 2
808 * - 2 => start at level 3
809 */
810 if (pa_bits >= 44) {
811 sl0 = 2;
812 mm_s2_max_level = 3;
813 } else if (pa_bits >= 35) {
814 sl0 = 1;
815 mm_s2_max_level = 2;
816 } else {
817 sl0 = 0;
818 mm_s2_max_level = 1;
819 }
820
821 arch_mm_stage1_max_level_set(pa_bits);
822
823 /*
824 * Since the shallowest possible tree is used, the maximum number of
825 * concatenated tables must be used. This means if no more than 4 bits
826 * are used from the next level, they are instead used to index into the
827 * concatenated tables.
828 */
829 extend_bits = ((pa_bits - PAGE_BITS) % PAGE_LEVEL_BITS);
830 if (extend_bits > 4) {
831 extend_bits = 0;
832 }
833 mm_s2_root_table_count = 1 << extend_bits;
834
835 dlog_info(
836 "Stage 2 has %d page table levels with %d pages at the root.\n",
837 mm_s2_max_level + 1, mm_s2_root_table_count);
838
839 dlog_info(
840 "Stage 1 has %d page table levels with %d pages at the root.\n",
841 mm_s1_max_level + 1, arch_mm_stage1_root_table_count());
842
843 /*
844 * If the PE implements S-EL2 then VTCR_EL2.NSA/NSW bits are significant
845 * in secure state. In non-secure state, NSA/NSW behave as if set to
846 * 11b. If S-EL2 is not implemented NSA/NSW bits are RES0.
847 */
848 if (((pe_features >> 36) & 0xF) == 1) {
849 /*
850 * NSA/NSW=10b: in secure state,
851 * S2 translations for the NS IPA space access the NS PA space.
852 * S2 translation table walks for the NS IPA space are to the
853 * secure PA space.
854 */
855 nsa_nsw = 2;
856 } else {
857 nsa_nsw = 0;
858 }
859
860 arch_mm_config = (struct arch_mm_config)
861 {
862 .ttbr0_el2 = pa_addr(table),
863
864 .vtcr_el2 =
865 (1U << 31) | /* RES1. */
866 (nsa_nsw << 29) | /* NSA/NSW. */
867 ((features & 0xf) << 16) | /* PS, matching features. */
868 (0 << 14) | /* TG0: 4 KB granule. */
869 (3 << 12) | /* SH0: inner shareable. */
870 (1 << 10) | /* ORGN0: normal, cacheable ... */
871 (1 << 8) | /* IRGN0: normal, cacheable ... */
872 (sl0 << 6) | /* SL0. */
873 ((64 - pa_bits) << 0) | /* T0SZ: dependent on PS. */
874 0,
875
876 /*
877 * 0 -> Device-nGnRnE memory
878 * 0xff -> Normal memory, Inner/Outer Write-Back Non-transient,
879 * Write-Alloc, Read-Alloc.
880 * 0xf0 -> Tagged Normal, Inner/Outer Write-Back,
881 * Read/Write-Alloc non-transient memory.
882 */
883 .mair_el2 = (0 << (8 * STAGE1_DEVICEINDX)) |
884 #if ENABLE_MTE
885 (0xf0 << (8 * STAGE1_STACKINDX)) |
886 #endif
887 (0xff << (8 * STAGE1_NORMALINDX)),
888
889 .sctlr_el2 = get_sctlr_el2_value(false),
890 .vstcr_el2 = (1U << 31) | /* RES1. */
891 (0 << 30) | /* SA. */
892 (0 << 29) | /* SW. */
893 (0 << 14) | /* TG0: 4 KB granule. */
894 (sl0 << 6) | /* SL0. */
895 ((64 - pa_bits) << 0), /* T0SZ: dependent on PS. */
896 };
897
898 /*
899 * Configure tcr_el2 and hcr_el2. The configuration depends on whether
900 * VHE support is enabled by the build and is available in HW. If VHE is
901 * enabled and available, hcr_el2.e2h is set during boot, before the MMU
902 * is turned on. This is because setting e2h redefines registers, can be
903 * cached in the TLBs and enables the use of ttbr1_el2, among other
904 * things, which makes enabling it at run time much more complicated.
905 * The bit is set once during boot and is not expected to change for the
906 * boot cycle. When VHE is enabled, currently, only the lower virtual
907 * address range (ttbr0_el2) is used and the upper address
908 * range(ttbr0_el1) is disabled. This keeps hafnium simple and
909 * consistent with its behavior when VHE is not enabled. When VHE is
910 * not enabled, hcr_el2 will default to 0 and will be set up during vCPU
911 * initialization.
912 */
913 arch_mm_config.hcr_el2 = 0;
914 if (has_vhe_support()) {
915 arch_mm_config.hcr_el2 |= (HCR_EL2_E2H | HCR_EL2_TGE);
916 arch_mm_config.tcr_el2 =
917 (1UL << 38) | /* TBI1, top byte ignored. */
918 (1UL << 37) | /* TBI0, top byte ignored. */
919 (2UL << 32) | /* IPS, IPA size */
920 (2UL << 30) | /* TG1, granule size, 4KB. */
921 (3UL << 28) | /* SH1, inner shareable. */
922 (1UL
923 << 26) | /* ORGN1, normal mem, WB RA WA Cacheable. */
924 (1UL
925 << 24) | /* IRGN1, normal mem, WB RA WA Cacheable. */
926 (1UL << 23) | /* EPD1 - Disable TTBR1_EL2 translation */
927 (0UL << 22) | /* TTBR0_EL2.ASID defines ASID */
928 ((64 - pa_bits)
929 << 16) | /* T1SZ, input address is 2^pa_bits bytes. */
930 (0UL << 14) | /* TG0, granule size, 4KB. */
931 (3UL << 12) | /* SH0, inner shareable. */
932 (1UL
933 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
934 (1UL
935 << 8) | /* IRGN0, normal mem, WB RA WA Cacheable. */
936 ((64 - pa_bits)
937 << 0) | /* T0SZ, input address is 2^pa_bits bytes. */
938 0;
939 } else {
940 arch_mm_config.tcr_el2 =
941 (1 << 20) | /* TBI, top byte ignored. */
942 ((features & 0xf) << 16) | /* PS. */
943 (0 << 14) | /* TG0, granule size, 4KB. */
944 (3 << 12) | /* SH0, inner shareable. */
945 (1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
946 (1 << 8) | /* IRGN0, normal mem, WB RA WA Cacheable. */
947 ((64 - pa_bits)
948 << 0) | /* T0SZ, input address is 2^pa_bits bytes. */
949 0;
950 }
951 return true;
952 }
953
954 /**
955 * Return the arch specific mm mode for send/recv pages of given VM ID.
956 */
arch_mm_extra_attributes_from_vm(ffa_vm_id_t id)957 uint32_t arch_mm_extra_attributes_from_vm(ffa_vm_id_t id)
958 {
959 return ((id & HF_VM_ID_WORLD_MASK) == HF_HYPERVISOR_VM_ID) ? MM_MODE_NS
960 : 0;
961 }
962
963 /**
964 * Returns the maximum supported PA Range in bits.
965 */
arch_mm_get_pa_range(void)966 uint32_t arch_mm_get_pa_range(void)
967 {
968 static const uint32_t pa_bits_table[16] = {32, 36, 40, 42, 44, 48, 52};
969 uint64_t features = read_msr(id_aa64mmfr0_el1);
970 return pa_bits_table[features & 0xf];
971 }
972
arch_mm_get_vtcr_el2(void)973 uintptr_t arch_mm_get_vtcr_el2(void)
974 {
975 return arch_mm_config.vtcr_el2;
976 }
977
arch_mm_get_vstcr_el2(void)978 uintptr_t arch_mm_get_vstcr_el2(void)
979 {
980 return arch_mm_config.vstcr_el2;
981 }
982