1 /*
2 * Copyright 2023 Google LLC
3 * SPDX-License-Identifier: Apache-2.0
4 */
5 #include <stdint.h>
6 #include <stdbool.h>
7 #include <zephyr/kernel.h>
8 #include <xtensa/config/core-isa.h>
9 #include <xtensa_mmu_priv.h>
10 #include <zephyr/cache.h>
11
12 #ifdef CONFIG_USERSPACE
13 BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) &&
14 (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0);
15 #endif
16
17 #define ASID_INVALID 0
18
19 extern uint32_t *xtensa_kernel_ptables;
20
xtensa_mmu_compute_domain_regs(struct arch_mem_domain * domain)21 void xtensa_mmu_compute_domain_regs(struct arch_mem_domain *domain)
22 {
23 uint32_t vecbase = XTENSA_RSR("VECBASE");
24 uint32_t *l1_page = domain->ptables;
25 uint32_t user_asid = domain->asid;
26
27 __ASSERT_NO_MSG((((uint32_t)l1_page) & 0xfff) == 0);
28 __ASSERT_NO_MSG((user_asid == 0) || ((user_asid > 2) &&
29 (user_asid < XTENSA_MMU_SHARED_ASID)));
30
31 /* We don't use ring 1, ring 0 ASID must be 1 */
32 domain->reg_asid = (XTENSA_MMU_SHARED_ASID << 24) |
33 (user_asid << 16) | 0x000201;
34
35 /* Derive PTEVADDR from ASID so each domain gets its own PTE area */
36 domain->reg_ptevaddr = CONFIG_XTENSA_MMU_PTEVADDR + user_asid * 0x400000;
37
38 /* The ptables code doesn't add the mapping for the l1 page itself */
39 l1_page[XTENSA_MMU_L1_POS(domain->reg_ptevaddr)] =
40 (uint32_t)l1_page | XTENSA_MMU_PAGE_TABLE_ATTR;
41
42 domain->reg_ptepin_at = (uint32_t)l1_page;
43 domain->reg_ptepin_as = XTENSA_MMU_PTE_ENTRY_VADDR(domain->reg_ptevaddr,
44 domain->reg_ptevaddr)
45 | XTENSA_MMU_PTE_WAY;
46
47 /* Pin mapping for refilling the vector address into the ITLB
48 * (for handling TLB miss exceptions). Note: this is NOT an
49 * instruction TLB entry for the vector code itself, it's a
50 * DATA TLB entry for the page containing the vector mapping
51 * so the refill on instruction fetch can find it. The
52 * hardware doesn't have a 4k pinnable instruction TLB way,
53 * frustratingly.
54 */
55 uint32_t vb_pte = l1_page[XTENSA_MMU_L1_POS(vecbase)];
56
57 domain->reg_vecpin_at = vb_pte;
58 domain->reg_vecpin_as = XTENSA_MMU_PTE_ENTRY_VADDR(domain->reg_ptevaddr,
59 vecbase)
60 | XTENSA_MMU_VECBASE_WAY;
61 }
62
63 /* Switch to a new page table. There are four items we have to set in
64 * the hardware: the PTE virtual address, the ring/ASID mapping
65 * register, and two pinned entries in the data TLB handling refills
66 * for the page tables and the vector handlers.
67 *
68 * These can be done in any order, provided that we ensure that no
69 * memory access which cause a TLB miss can happen during the process.
70 * This means that we must work entirely within registers in a single
71 * asm block. Also note that instruction fetches are memory accesses
72 * too, which means we cannot cross a page boundary which might reach
73 * a new page not in the TLB (a single jump to an aligned address that
74 * holds our five instructions is sufficient to guarantee that: I
75 * couldn't think of a way to do the alignment statically that also
76 * interoperated well with inline assembly).
77 */
xtensa_mmu_set_paging(struct arch_mem_domain * domain)78 void xtensa_mmu_set_paging(struct arch_mem_domain *domain)
79 {
80 __asm__ volatile("j 1f\n"
81 ".align 16\n" /* enough for 5 insns */
82 "1:\n"
83 "wsr %0, PTEVADDR\n"
84 "wsr %1, RASID\n"
85 "wdtlb %2, %3\n"
86 "wdtlb %4, %5\n"
87 "isync"
88 :: "r"(domain->reg_ptevaddr), "r"(domain->reg_asid),
89 "r"(domain->reg_ptepin_at), "r"(domain->reg_ptepin_as),
90 "r"(domain->reg_vecpin_at), "r"(domain->reg_vecpin_as));
91 }
92
93 /* This is effectively the same algorithm from xtensa_mmu_set_paging(),
94 * but it also disables the hardware-initialized 512M TLB entries in
95 * way 6 (because the hardware disallows duplicate TLB mappings). For
96 * instruction fetches this produces a critical ordering constraint:
97 * the instruction following the invalidation of ITLB entry mapping
98 * the current PC will by definition create a refill condition, which
99 * will (because the data TLB was invalidated) cause a refill
100 * exception. Therefore this step must be the very last one, once
101 * everything else is setup up and working, which includes the
102 * invalidation of the virtual PTEVADDR area so that the resulting
103 * refill can complete.
104 *
105 * Note that we can't guarantee that the compiler won't insert a data
106 * fetch from our stack memory after exit from the asm block (while it
107 * might be double-mapped), so we invalidate that data TLB inside the
108 * asm for correctness. The other 13 entries get invalidated in a C
109 * loop at the end.
110 */
xtensa_mmu_init_paging(void)111 void xtensa_mmu_init_paging(void)
112 {
113 extern char z_xt_init_pc; /* defined in asm below */
114 unsigned int initial_rasid;
115
116 /* When this is called in xtensa_mmu_init(), the default memory
117 * domain struct has not been initialized, and memory domains
118 * are not ready to be used. So we need a local copy of
119 * struct arch_mem_domain to store the register values to be
120 * programmed into hardware.
121 */
122 struct arch_mem_domain domain;
123
124 /* The initial rasid after hardware initialization is 0x04030201.
125 * 1 is hardwired to ring 0, other slots must be different
126 * from each other and must not be 0.
127 *
128 * For our initial implementation we just set the 4th slot (ring 3),
129 * to use the ASID value used for memory that is shared with all threads.
130 */
131 initial_rasid = 0xff030201;
132
133 #if CONFIG_MP_MAX_NUM_CPUS > 1
134 /* The incoherent cache can get into terrible trouble if it's
135 * allowed to cache PTEs differently across CPUs. We require
136 * that all page tables supplied by the OS have exclusively
137 * uncached mappings for page data, but can't do anything
138 * about earlier code/firmware. Dump the cache to be safe.
139 */
140 sys_cache_data_flush_and_invd_all();
141 #endif
142
143 domain.asid = ASID_INVALID;
144 domain.ptables = xtensa_kernel_ptables;
145 xtensa_mmu_compute_domain_regs(&domain);
146
147 uint32_t idtlb_pte = (domain.reg_ptevaddr & 0xe0000000) | XCHAL_SPANNING_WAY;
148 uint32_t idtlb_stk = (((uint32_t)&domain) & ~0xfff) | XCHAL_SPANNING_WAY;
149 uint32_t iitlb_pc = (((uint32_t)&z_xt_init_pc) & ~0xfff) | XCHAL_SPANNING_WAY;
150
151 /* Note: the jump is mostly pedantry, as it's almost
152 * inconceivable that a hardware memory region at boot is
153 * going to cross a 512M page boundary. But we need the entry
154 * symbol to get the address above, so the jump is here for
155 * symmetry with the set_paging() code.
156 */
157 __asm__ volatile("j z_xt_init_pc\n"
158 ".align 32\n" /* room for 10 insns */
159 ".globl z_xt_init_pc\n"
160 "z_xt_init_pc:\n"
161 "wsr %0, PTEVADDR\n"
162 "wsr %1, RASID\n"
163 "wdtlb %2, %3\n"
164 "wdtlb %4, %5\n"
165 "idtlb %6\n" /* invalidate pte */
166 "idtlb %7\n" /* invalidate stk */
167 "isync\n"
168 "iitlb %8\n" /* invalidate pc */
169 "isync\n" /* <--- traps a ITLB miss */
170 :: "r"(domain.reg_ptevaddr), "r"(initial_rasid),
171 "r"(domain.reg_ptepin_at), "r"(domain.reg_ptepin_as),
172 "r"(domain.reg_vecpin_at), "r"(domain.reg_vecpin_as),
173 "r"(idtlb_pte), "r"(idtlb_stk), "r"(iitlb_pc));
174
175 /* Invalidate the remaining (unused by this function)
176 * initialization entries. Now we're flying free with our own
177 * page table.
178 */
179 for (uint32_t i = 0; i < 8; i++) {
180 uint32_t ixtlb = (i * 0x20000000) | XCHAL_SPANNING_WAY;
181
182 if (ixtlb != iitlb_pc) {
183 __asm__ volatile("iitlb %0" :: "r"(ixtlb));
184 }
185 if (ixtlb != idtlb_stk && ixtlb != idtlb_pte) {
186 __asm__ volatile("idtlb %0" :: "r"(ixtlb));
187 }
188 }
189 __asm__ volatile("isync");
190 }
191