1 // © 2021 Qualcomm Innovation Center, Inc. All rights reserved.
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4
5 #include <assert.h>
6 #include <hyptypes.h>
7
8 #include <compiler.h>
9 #include <idle.h>
10 #include <object.h>
11 #include <panic.h>
12 #include <partition.h>
13 #include <pgtable.h>
14 #include <platform_timer.h>
15 #include <preempt.h>
16 #include <prng.h>
17 #include <scheduler.h>
18 #include <thread.h>
19 #include <trace.h>
20
21 #include <events/thread.h>
22
23 #include "event_handlers.h"
24 #include "thread_arch.h"
25
26 typedef register_t (*fptr_t)(register_t arg);
27 typedef void (*fptr_noreturn_t)(register_t arg);
28
29 const size_t thread_stack_min_align = 16;
30 const size_t thread_stack_alloc_align = PGTABLE_HYP_PAGE_SIZE;
31 const size_t thread_stack_size_default = PGTABLE_HYP_PAGE_SIZE;
32
33 static size_t
thread_get_tls_offset(void)34 thread_get_tls_offset(void)
35 {
36 size_t offset = 0;
37 __asm__("add %0, %0, :tprel_hi12:current_thread ;"
38 "add %0, %0, :tprel_lo12_nc:current_thread ;"
39 : "+r"(offset));
40 return offset;
41 }
42
43 static uintptr_t
thread_get_tls_base(thread_t * thread)44 thread_get_tls_base(thread_t *thread)
45 {
46 return (uintptr_t)thread - thread_get_tls_offset();
47 }
48
49 static noreturn void
thread_arch_main(thread_t * prev,ticks_t schedtime)50 thread_arch_main(thread_t *prev, ticks_t schedtime) LOCK_IMPL
51 {
52 thread_t *thread = thread_get_self();
53
54 trigger_thread_start_event();
55
56 trigger_thread_context_switch_post_event(prev, schedtime, (ticks_t)0UL);
57 object_put_thread(prev);
58
59 thread_func_t thread_func =
60 trigger_thread_get_entry_fn_event(thread->kind);
61 trigger_thread_load_state_event(true);
62
63 if (thread_func != NULL) {
64 preempt_enable();
65 thread_func(thread->params);
66 }
67
68 thread_exit();
69 }
70
71 thread_t *
thread_arch_switch_thread(thread_t * next_thread,ticks_t * schedtime)72 thread_arch_switch_thread(thread_t *next_thread, ticks_t *schedtime)
73 {
74 // The previous thread and the scheduling time must be kept in X0 and X1
75 // to ensure that thread_arch_main() receives them as arguments on the
76 // first context switch.
77 register thread_t *old __asm__("x0") = thread_get_self();
78 register ticks_t ticks __asm__("x1") = *schedtime;
79
80 // The remaining hard-coded registers here are only needed to ensure a
81 // correct clobber list below. The union of the clobber list, hard-coded
82 // registers and explicitly saved registers (x29, sp and pc) must be the
83 // entire integer register state.
84 register register_t old_pc __asm__("x2");
85 register register_t old_sp __asm__("x3");
86 register register_t old_fp __asm__("x4");
87 register uintptr_t old_context __asm__("x5") =
88 (uintptr_t)&old->context.pc;
89 static_assert(offsetof(thread_t, context.sp) ==
90 offsetof(thread_t, context.pc) +
91 sizeof(next_thread->context.pc),
92 "PC and SP must be adjacent in context");
93 static_assert(offsetof(thread_t, context.fp) ==
94 offsetof(thread_t, context.sp) +
95 sizeof(next_thread->context.sp),
96 "SP and FP must be adjacent in context");
97
98 // The new PC must be in x16 or x17 so ARMv8.5-BTI will treat the BR
99 // below as a call trampoline, and thus allow it to jump to the BTI C
100 // instruction at a new thread's entry point.
101 register register_t new_pc __asm__("x16") = next_thread->context.pc;
102 register register_t new_sp __asm__("x6") = next_thread->context.sp;
103 register register_t new_fp __asm__("x7") = next_thread->context.fp;
104 register uintptr_t new_tls_base __asm__("x8") =
105 thread_get_tls_base(next_thread);
106
107 __asm__ volatile(
108 "adr %[old_pc], .Lthread_continue.%= ;"
109 "mov %[old_sp], sp ;"
110 "mov %[old_fp], x29 ;"
111 "mov sp, %[new_sp] ;"
112 "mov x29, %[new_fp] ;"
113 "msr TPIDR_EL2, %[new_tls_base] ;"
114 "stp %[old_pc], %[old_sp], [%[old_context]] ;"
115 "str %[old_fp], [%[old_context], 16] ;"
116 "br %[new_pc] ;"
117 ".Lthread_continue.%=: ;"
118 #if defined(ARCH_ARM_FEAT_BTI)
119 "bti j ;"
120 #endif
121 : [old] "+r"(old), [old_pc] "=&r"(old_pc),
122 [old_sp] "=&r"(old_sp), [old_fp] "=&r"(old_fp),
123 [old_context] "+r"(old_context), [new_pc] "+r"(new_pc),
124 [new_sp] "+r"(new_sp), [new_fp] "+r"(new_fp),
125 [new_tls_base] "+r"(new_tls_base), [ticks] "+r"(ticks)
126 : /* This must not have any inputs */
127 : "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x17", "x18",
128 "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27",
129 "x28", "x30", "cc", "memory");
130
131 // Update schedtime from the tick count passed by the previous thread
132 *schedtime = ticks;
133
134 return old;
135 }
136
137 noreturn void
thread_arch_set_thread(thread_t * thread)138 thread_arch_set_thread(thread_t *thread)
139 {
140 // This should only be called on the idle thread during power-up, which
141 // should already be the current thread for TLS. It discards the current
142 // execution state.
143 assert(thread == thread_get_self());
144 assert(thread == idle_thread());
145
146 // The previous thread and the scheduling time must be kept in X0 and X1
147 // to ensure that thread_arch_main() receives them as arguments on the
148 // first context switch during CPU cold boot. The scheduling time is set
149 // to 0 because we consider the idle thread to have been scheduled at
150 // the epoch. These are unused on warm boot, which is always resuming a
151 // thread_freeze() call.
152 register thread_t *old __asm__("x0") = thread;
153 register ticks_t ticks __asm__("x1") = (ticks_t)0U;
154
155 // The new PC must be in x16 or x17 so ARMv8.5-BTI will treat the BR
156 // below as a call trampoline, and thus allow it to jump to the BTI C
157 // instruction at a new thread's entry point.
158 register register_t new_pc __asm__("x16");
159 new_pc = thread->context.pc;
160 register_t new_sp = thread->context.sp;
161 register_t new_fp = thread->context.fp;
162
163 __asm__ volatile(
164 "mov sp, %[new_sp] ;"
165 "mov x29, %[new_fp] ;"
166 "br %[new_pc] ;"
167 :
168 : [old] "r"(old), [ticks] "r"(ticks), [new_pc] "r"(new_pc),
169 [new_sp] "r"(new_sp), [new_fp] "r"(new_fp)
170 : "memory");
171 __builtin_unreachable();
172 }
173
174 register_t
thread_freeze(fptr_t fn,register_t param,register_t resumed_result)175 thread_freeze(fptr_t fn, register_t param, register_t resumed_result)
176 {
177 TRACE(INFO, INFO, "thread_freeze start fn: {:#x} param: {:#x}",
178 (uintptr_t)fn, (uintptr_t)param);
179
180 trigger_thread_save_state_event();
181
182 thread_t *thread = thread_get_self();
183 assert(thread != NULL);
184
185 // The parameter must be kept in X0 so the freeze function gets it as an
186 // argument.
187 register register_t x0 __asm__("x0") = param;
188
189 // The remaining hard-coded registers here are only needed to
190 // ensure a correct clobber list below. The union of the clobber
191 // list, fixed output registers and explicitly saved registers
192 // (x29, sp and pc) must be the entire integer register state.
193 register register_t saved_pc __asm__("x1");
194 register register_t saved_sp __asm__("x2");
195 register uintptr_t context __asm__("x3") =
196 (uintptr_t)&thread->context.pc;
197 register fptr_t fn_reg __asm__("x4") = fn;
198 register bool is_resuming __asm__("x5");
199
200 static_assert(offsetof(thread_t, context.sp) ==
201 offsetof(thread_t, context.pc) +
202 sizeof(thread->context.pc),
203 "PC and SP must be adjacent in context");
204 static_assert(offsetof(thread_t, context.fp) ==
205 offsetof(thread_t, context.sp) +
206 sizeof(thread->context.sp),
207 "SP and FP must be adjacent in context");
208
209 __asm__ volatile(
210 "adr %[saved_pc], .Lthread_freeze.resumed.%= ;"
211 "mov %[saved_sp], sp ;"
212 "stp %[saved_pc], %[saved_sp], [%[context]] ;"
213 "str x29, [%[context], 16] ;"
214 "blr %[fn_reg] ;"
215 "mov %[is_resuming], 0 ;"
216 "b .Lthread_freeze.done.%= ;"
217 ".Lthread_freeze.resumed.%=: ;"
218 #if defined(ARCH_ARM_FEAT_BTI)
219 "bti j ;"
220 #endif
221 "mov %[is_resuming], 1 ;"
222 ".Lthread_freeze.done.%=: ;"
223 : [is_resuming] "=%r"(is_resuming), [saved_pc] "=&r"(saved_pc),
224 [saved_sp] "=&r"(saved_sp), [context] "+r"(context),
225 [fn_reg] "+r"(fn_reg), "+r"(x0)
226 : /* This must not have any inputs */
227 : "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
228 "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
229 "x24", "x25", "x26", "x27", "x28", "x30", "cc", "memory");
230
231 if (is_resuming) {
232 x0 = resumed_result;
233 trigger_thread_load_state_event(false);
234
235 TRACE(INFO, INFO, "thread_freeze resumed: {:#x}", x0);
236 } else {
237 TRACE(INFO, INFO, "thread_freeze returned: {:#x}", x0);
238 }
239
240 return x0;
241 }
242
243 noreturn void
thread_reset_stack(fptr_noreturn_t fn,register_t param)244 thread_reset_stack(fptr_noreturn_t fn, register_t param)
245 {
246 thread_t *thread = thread_get_self();
247 register register_t x0 __asm__("x0") = param;
248 uintptr_t new_sp = (uintptr_t)thread->stack_base + thread->stack_size;
249
250 __asm__ volatile("mov sp, %[new_sp] ;"
251 "mov x29, 0 ;"
252 "blr %[new_pc] ;"
253 :
254 : [new_pc] "r"(fn), [new_sp] "r"(new_sp), "r"(x0)
255 : "memory");
256 panic("returned to thread_reset_stack()");
257 }
258
259 void
thread_arch_init_context(thread_t * thread)260 thread_arch_init_context(thread_t *thread)
261 {
262 assert(thread != NULL);
263
264 thread->context.pc = (uintptr_t)thread_arch_main;
265 thread->context.sp = (uintptr_t)thread->stack_base + thread->stack_size;
266 thread->context.fp = (uintptr_t)0;
267 }
268
269 error_t
thread_arch_map_stack(thread_t * thread)270 thread_arch_map_stack(thread_t *thread)
271 {
272 error_t err;
273
274 assert(thread != NULL);
275 assert(thread->stack_base != 0U);
276
277 partition_t *partition = thread->header.partition;
278 paddr_t stack_phys =
279 partition_virt_to_phys(partition, thread->stack_mem);
280
281 pgtable_hyp_start();
282 err = pgtable_hyp_map(partition, thread->stack_base, thread->stack_size,
283 stack_phys, PGTABLE_HYP_MEMTYPE_WRITEBACK,
284 PGTABLE_ACCESS_RW,
285 VMSA_SHAREABILITY_INNER_SHAREABLE);
286 pgtable_hyp_commit();
287
288 return err;
289 }
290
291 void
thread_arch_unmap_stack(thread_t * thread)292 thread_arch_unmap_stack(thread_t *thread)
293 {
294 pgtable_hyp_start();
295 pgtable_hyp_unmap(thread->header.partition, thread->stack_base,
296 thread->stack_size, thread->stack_size);
297 pgtable_hyp_commit();
298 }
299