1 // © 2021 Qualcomm Innovation Center, Inc. All rights reserved.
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 #include <assert.h>
6 #include <hyptypes.h>
7 
8 #include <compiler.h>
9 #include <idle.h>
10 #include <object.h>
11 #include <panic.h>
12 #include <partition.h>
13 #include <pgtable.h>
14 #include <platform_timer.h>
15 #include <preempt.h>
16 #include <prng.h>
17 #include <scheduler.h>
18 #include <thread.h>
19 #include <trace.h>
20 
21 #include <events/thread.h>
22 
23 #include "event_handlers.h"
24 #include "thread_arch.h"
25 
26 typedef register_t (*fptr_t)(register_t arg);
27 typedef void (*fptr_noreturn_t)(register_t arg);
28 
29 const size_t thread_stack_min_align    = 16;
30 const size_t thread_stack_alloc_align  = PGTABLE_HYP_PAGE_SIZE;
31 const size_t thread_stack_size_default = PGTABLE_HYP_PAGE_SIZE;
32 
33 static size_t
thread_get_tls_offset(void)34 thread_get_tls_offset(void)
35 {
36 	size_t offset = 0;
37 	__asm__("add     %0, %0, :tprel_hi12:current_thread	;"
38 		"add     %0, %0, :tprel_lo12_nc:current_thread	;"
39 		: "+r"(offset));
40 	return offset;
41 }
42 
43 static uintptr_t
thread_get_tls_base(thread_t * thread)44 thread_get_tls_base(thread_t *thread)
45 {
46 	return (uintptr_t)thread - thread_get_tls_offset();
47 }
48 
49 static noreturn void
thread_arch_main(thread_t * prev,ticks_t schedtime)50 thread_arch_main(thread_t *prev, ticks_t schedtime) LOCK_IMPL
51 {
52 	thread_t *thread = thread_get_self();
53 
54 	trigger_thread_start_event();
55 
56 	trigger_thread_context_switch_post_event(prev, schedtime, (ticks_t)0UL);
57 	object_put_thread(prev);
58 
59 	thread_func_t thread_func =
60 		trigger_thread_get_entry_fn_event(thread->kind);
61 	trigger_thread_load_state_event(true);
62 
63 	if (thread_func != NULL) {
64 		preempt_enable();
65 		thread_func(thread->params);
66 	}
67 
68 	thread_exit();
69 }
70 
71 thread_t *
thread_arch_switch_thread(thread_t * next_thread,ticks_t * schedtime)72 thread_arch_switch_thread(thread_t *next_thread, ticks_t *schedtime)
73 {
74 	// The previous thread and the scheduling time must be kept in X0 and X1
75 	// to ensure that thread_arch_main() receives them as arguments on the
76 	// first context switch.
77 	register thread_t *old __asm__("x0")   = thread_get_self();
78 	register ticks_t   ticks __asm__("x1") = *schedtime;
79 
80 	// The remaining hard-coded registers here are only needed to ensure a
81 	// correct clobber list below. The union of the clobber list, hard-coded
82 	// registers and explicitly saved registers (x29, sp and pc) must be the
83 	// entire integer register state.
84 	register register_t old_pc __asm__("x2");
85 	register register_t old_sp __asm__("x3");
86 	register register_t old_fp __asm__("x4");
87 	register uintptr_t  old_context __asm__("x5") =
88 		(uintptr_t)&old->context.pc;
89 	static_assert(offsetof(thread_t, context.sp) ==
90 			      offsetof(thread_t, context.pc) +
91 				      sizeof(next_thread->context.pc),
92 		      "PC and SP must be adjacent in context");
93 	static_assert(offsetof(thread_t, context.fp) ==
94 			      offsetof(thread_t, context.sp) +
95 				      sizeof(next_thread->context.sp),
96 		      "SP and FP must be adjacent in context");
97 
98 	// The new PC must be in x16 or x17 so ARMv8.5-BTI will treat the BR
99 	// below as a call trampoline, and thus allow it to jump to the BTI C
100 	// instruction at a new thread's entry point.
101 	register register_t new_pc __asm__("x16") = next_thread->context.pc;
102 	register register_t new_sp __asm__("x6")  = next_thread->context.sp;
103 	register register_t new_fp __asm__("x7")  = next_thread->context.fp;
104 	register uintptr_t  new_tls_base __asm__("x8") =
105 		thread_get_tls_base(next_thread);
106 
107 	__asm__ volatile(
108 		"adr	%[old_pc], .Lthread_continue.%=		;"
109 		"mov	%[old_sp], sp				;"
110 		"mov	%[old_fp], x29				;"
111 		"mov   sp, %[new_sp]				;"
112 		"mov   x29, %[new_fp]				;"
113 		"msr	TPIDR_EL2, %[new_tls_base]		;"
114 		"stp	%[old_pc], %[old_sp], [%[old_context]]	;"
115 		"str	%[old_fp], [%[old_context], 16]		;"
116 		"br	%[new_pc]				;"
117 		".Lthread_continue.%=:				;"
118 #if defined(ARCH_ARM_FEAT_BTI)
119 		"bti	j					;"
120 #endif
121 		: [old] "+r"(old), [old_pc] "=&r"(old_pc),
122 		  [old_sp] "=&r"(old_sp), [old_fp] "=&r"(old_fp),
123 		  [old_context] "+r"(old_context), [new_pc] "+r"(new_pc),
124 		  [new_sp] "+r"(new_sp), [new_fp] "+r"(new_fp),
125 		  [new_tls_base] "+r"(new_tls_base), [ticks] "+r"(ticks)
126 		: /* This must not have any inputs */
127 		: "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x17", "x18",
128 		  "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27",
129 		  "x28", "x30", "cc", "memory");
130 
131 	// Update schedtime from the tick count passed by the previous thread
132 	*schedtime = ticks;
133 
134 	return old;
135 }
136 
137 noreturn void
thread_arch_set_thread(thread_t * thread)138 thread_arch_set_thread(thread_t *thread)
139 {
140 	// This should only be called on the idle thread during power-up, which
141 	// should already be the current thread for TLS. It discards the current
142 	// execution state.
143 	assert(thread == thread_get_self());
144 	assert(thread == idle_thread());
145 
146 	// The previous thread and the scheduling time must be kept in X0 and X1
147 	// to ensure that thread_arch_main() receives them as arguments on the
148 	// first context switch during CPU cold boot. The scheduling time is set
149 	// to 0 because we consider the idle thread to have been scheduled at
150 	// the epoch. These are unused on warm boot, which is always resuming a
151 	// thread_freeze() call.
152 	register thread_t *old __asm__("x0")   = thread;
153 	register ticks_t   ticks __asm__("x1") = (ticks_t)0U;
154 
155 	// The new PC must be in x16 or x17 so ARMv8.5-BTI will treat the BR
156 	// below as a call trampoline, and thus allow it to jump to the BTI C
157 	// instruction at a new thread's entry point.
158 	register register_t new_pc __asm__("x16");
159 	new_pc		  = thread->context.pc;
160 	register_t new_sp = thread->context.sp;
161 	register_t new_fp = thread->context.fp;
162 
163 	__asm__ volatile(
164 		"mov   sp, %[new_sp]			;"
165 		"mov   x29, %[new_fp]			;"
166 		"br	%[new_pc]			;"
167 		:
168 		: [old] "r"(old), [ticks] "r"(ticks), [new_pc] "r"(new_pc),
169 		  [new_sp] "r"(new_sp), [new_fp] "r"(new_fp)
170 		: "memory");
171 	__builtin_unreachable();
172 }
173 
174 register_t
thread_freeze(fptr_t fn,register_t param,register_t resumed_result)175 thread_freeze(fptr_t fn, register_t param, register_t resumed_result)
176 {
177 	TRACE(INFO, INFO, "thread_freeze start fn: {:#x} param: {:#x}",
178 	      (uintptr_t)fn, (uintptr_t)param);
179 
180 	trigger_thread_save_state_event();
181 
182 	thread_t *thread = thread_get_self();
183 	assert(thread != NULL);
184 
185 	// The parameter must be kept in X0 so the freeze function gets it as an
186 	// argument.
187 	register register_t x0 __asm__("x0") = param;
188 
189 	// The remaining hard-coded registers here are only needed to
190 	// ensure a correct clobber list below. The union of the clobber
191 	// list, fixed output registers and explicitly saved registers
192 	// (x29, sp and pc) must be the entire integer register state.
193 	register register_t saved_pc __asm__("x1");
194 	register register_t saved_sp __asm__("x2");
195 	register uintptr_t  context __asm__("x3") =
196 		(uintptr_t)&thread->context.pc;
197 	register fptr_t fn_reg __asm__("x4") = fn;
198 	register bool	is_resuming __asm__("x5");
199 
200 	static_assert(offsetof(thread_t, context.sp) ==
201 			      offsetof(thread_t, context.pc) +
202 				      sizeof(thread->context.pc),
203 		      "PC and SP must be adjacent in context");
204 	static_assert(offsetof(thread_t, context.fp) ==
205 			      offsetof(thread_t, context.sp) +
206 				      sizeof(thread->context.sp),
207 		      "SP and FP must be adjacent in context");
208 
209 	__asm__ volatile(
210 		"adr	%[saved_pc], .Lthread_freeze.resumed.%=	;"
211 		"mov	%[saved_sp], sp				;"
212 		"stp	%[saved_pc], %[saved_sp], [%[context]]	;"
213 		"str	x29, [%[context], 16]			;"
214 		"blr	%[fn_reg]				;"
215 		"mov	%[is_resuming], 0			;"
216 		"b	.Lthread_freeze.done.%=			;"
217 		".Lthread_freeze.resumed.%=:			;"
218 #if defined(ARCH_ARM_FEAT_BTI)
219 		"bti	j					;"
220 #endif
221 		"mov	%[is_resuming], 1			;"
222 		".Lthread_freeze.done.%=:			;"
223 		: [is_resuming] "=%r"(is_resuming), [saved_pc] "=&r"(saved_pc),
224 		  [saved_sp] "=&r"(saved_sp), [context] "+r"(context),
225 		  [fn_reg] "+r"(fn_reg), "+r"(x0)
226 		: /* This must not have any inputs */
227 		: "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
228 		  "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
229 		  "x24", "x25", "x26", "x27", "x28", "x30", "cc", "memory");
230 
231 	if (is_resuming) {
232 		x0 = resumed_result;
233 		trigger_thread_load_state_event(false);
234 
235 		TRACE(INFO, INFO, "thread_freeze resumed: {:#x}", x0);
236 	} else {
237 		TRACE(INFO, INFO, "thread_freeze returned: {:#x}", x0);
238 	}
239 
240 	return x0;
241 }
242 
243 noreturn void
thread_reset_stack(fptr_noreturn_t fn,register_t param)244 thread_reset_stack(fptr_noreturn_t fn, register_t param)
245 {
246 	thread_t	   *thread	     = thread_get_self();
247 	register register_t x0 __asm__("x0") = param;
248 	uintptr_t new_sp = (uintptr_t)thread->stack_base + thread->stack_size;
249 
250 	__asm__ volatile("mov	sp, %[new_sp]	;"
251 			 "mov	x29, 0		;"
252 			 "blr	%[new_pc]	;"
253 			 :
254 			 : [new_pc] "r"(fn), [new_sp] "r"(new_sp), "r"(x0)
255 			 : "memory");
256 	panic("returned to thread_reset_stack()");
257 }
258 
259 void
thread_arch_init_context(thread_t * thread)260 thread_arch_init_context(thread_t *thread)
261 {
262 	assert(thread != NULL);
263 
264 	thread->context.pc = (uintptr_t)thread_arch_main;
265 	thread->context.sp = (uintptr_t)thread->stack_base + thread->stack_size;
266 	thread->context.fp = (uintptr_t)0;
267 }
268 
269 error_t
thread_arch_map_stack(thread_t * thread)270 thread_arch_map_stack(thread_t *thread)
271 {
272 	error_t err;
273 
274 	assert(thread != NULL);
275 	assert(thread->stack_base != 0U);
276 
277 	partition_t *partition = thread->header.partition;
278 	paddr_t	     stack_phys =
279 		partition_virt_to_phys(partition, thread->stack_mem);
280 
281 	pgtable_hyp_start();
282 	err = pgtable_hyp_map(partition, thread->stack_base, thread->stack_size,
283 			      stack_phys, PGTABLE_HYP_MEMTYPE_WRITEBACK,
284 			      PGTABLE_ACCESS_RW,
285 			      VMSA_SHAREABILITY_INNER_SHAREABLE);
286 	pgtable_hyp_commit();
287 
288 	return err;
289 }
290 
291 void
thread_arch_unmap_stack(thread_t * thread)292 thread_arch_unmap_stack(thread_t *thread)
293 {
294 	pgtable_hyp_start();
295 	pgtable_hyp_unmap(thread->header.partition, thread->stack_base,
296 			  thread->stack_size, thread->stack_size);
297 	pgtable_hyp_commit();
298 }
299