1 /*
2  * Copyright (c) 2019 Carlo Caione <ccaione@baylibre.com>
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /**
8  * @file
9  * @brief Kernel fatal error handler for ARM64 Cortex-A
10  *
11  * This module provides the z_arm64_fatal_error() routine for ARM64 Cortex-A
12  * CPUs and z_arm64_do_kernel_oops() routine to manage software-generated fatal
13  * exceptions
14  */
15 
16 #include <zephyr/debug/symtab.h>
17 #include <zephyr/drivers/pm_cpu_ops.h>
18 #include <zephyr/arch/common/exc_handle.h>
19 #include <zephyr/kernel.h>
20 #include <zephyr/linker/linker-defs.h>
21 #include <zephyr/logging/log.h>
22 #include <zephyr/sys/poweroff.h>
23 #include <kernel_arch_func.h>
24 #include <kernel_arch_interface.h>
25 #include <zephyr/arch/exception.h>
26 
27 #include "paging.h"
28 
29 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
30 
31 #ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK
32 K_KERNEL_PINNED_STACK_ARRAY_DEFINE(z_arm64_safe_exception_stacks,
33 				   CONFIG_MP_MAX_NUM_CPUS,
34 				   CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE);
35 
z_arm64_safe_exception_stack_init(void)36 void z_arm64_safe_exception_stack_init(void)
37 {
38 	int cpu_id;
39 	char *safe_exc_sp;
40 
41 	cpu_id = arch_curr_cpu()->id;
42 	safe_exc_sp = K_KERNEL_STACK_BUFFER(z_arm64_safe_exception_stacks[cpu_id]) +
43 		      CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE;
44 	arch_curr_cpu()->arch.safe_exception_stack = (uint64_t)safe_exc_sp;
45 	write_sp_el0((uint64_t)safe_exc_sp);
46 
47 	arch_curr_cpu()->arch.current_stack_limit = 0UL;
48 	arch_curr_cpu()->arch.corrupted_sp = 0UL;
49 }
50 #endif
51 
52 #ifdef CONFIG_USERSPACE
53 Z_EXC_DECLARE(z_arm64_user_string_nlen);
54 
55 static const struct z_exc_handle exceptions[] = {
56 	Z_EXC_HANDLE(z_arm64_user_string_nlen),
57 };
58 #endif /* CONFIG_USERSPACE */
59 
60 #ifdef CONFIG_EXCEPTION_DEBUG
dump_esr(uint64_t esr,bool * dump_far)61 static void dump_esr(uint64_t esr, bool *dump_far)
62 {
63 	const char *err;
64 
65 	switch (GET_ESR_EC(esr)) {
66 	case 0b000000: /* 0x00 */
67 		err = "Unknown reason";
68 		break;
69 	case 0b000001: /* 0x01 */
70 		err = "Trapped WFI or WFE instruction execution";
71 		break;
72 	case 0b000011: /* 0x03 */
73 		err = "Trapped MCR or MRC access with (coproc==0b1111) that "
74 		      "is not reported using EC 0b000000";
75 		break;
76 	case 0b000100: /* 0x04 */
77 		err = "Trapped MCRR or MRRC access with (coproc==0b1111) "
78 		      "that is not reported using EC 0b000000";
79 		break;
80 	case 0b000101: /* 0x05 */
81 		err = "Trapped MCR or MRC access with (coproc==0b1110)";
82 		break;
83 	case 0b000110: /* 0x06 */
84 		err = "Trapped LDC or STC access";
85 		break;
86 	case 0b000111: /* 0x07 */
87 		err = "Trapped access to SVE, Advanced SIMD, or "
88 		      "floating-point functionality";
89 		break;
90 	case 0b001100: /* 0x0c */
91 		err = "Trapped MRRC access with (coproc==0b1110)";
92 		break;
93 	case 0b001101: /* 0x0d */
94 		err = "Branch Target Exception";
95 		break;
96 	case 0b001110: /* 0x0e */
97 		err = "Illegal Execution state";
98 		break;
99 	case 0b010001: /* 0x11 */
100 		err = "SVC instruction execution in AArch32 state";
101 		break;
102 	case 0b011000: /* 0x18 */
103 		err = "Trapped MSR, MRS or System instruction execution in "
104 		      "AArch64 state, that is not reported using EC "
105 		      "0b000000, 0b000001 or 0b000111";
106 		break;
107 	case 0b011001: /* 0x19 */
108 		err = "Trapped access to SVE functionality";
109 		break;
110 	case 0b100000: /* 0x20 */
111 		*dump_far = true;
112 		err = "Instruction Abort from a lower Exception level, that "
113 		      "might be using AArch32 or AArch64";
114 		break;
115 	case 0b100001: /* 0x21 */
116 		*dump_far = true;
117 		err = "Instruction Abort taken without a change in Exception "
118 		      "level.";
119 		break;
120 	case 0b100010: /* 0x22 */
121 		*dump_far = true;
122 		err = "PC alignment fault exception.";
123 		break;
124 	case 0b100100: /* 0x24 */
125 		*dump_far = true;
126 		err = "Data Abort from a lower Exception level, that might "
127 		      "be using AArch32 or AArch64";
128 		break;
129 	case 0b100101: /* 0x25 */
130 		*dump_far = true;
131 		err = "Data Abort taken without a change in Exception level";
132 		break;
133 	case 0b100110: /* 0x26 */
134 		err = "SP alignment fault exception";
135 		break;
136 	case 0b101000: /* 0x28 */
137 		err = "Trapped floating-point exception taken from AArch32 "
138 		      "state";
139 		break;
140 	case 0b101100: /* 0x2c */
141 		err = "Trapped floating-point exception taken from AArch64 "
142 		      "state.";
143 		break;
144 	case 0b101111: /* 0x2f */
145 		err = "SError interrupt";
146 		break;
147 	case 0b110000: /* 0x30 */
148 		err = "Breakpoint exception from a lower Exception level, "
149 		      "that might be using AArch32 or AArch64";
150 		break;
151 	case 0b110001: /* 0x31 */
152 		err = "Breakpoint exception taken without a change in "
153 		      "Exception level";
154 		break;
155 	case 0b110010: /* 0x32 */
156 		err = "Software Step exception from a lower Exception level, "
157 		      "that might be using AArch32 or AArch64";
158 		break;
159 	case 0b110011: /* 0x33 */
160 		err = "Software Step exception taken without a change in "
161 		      "Exception level";
162 		break;
163 	case 0b110100: /* 0x34 */
164 		*dump_far = true;
165 		err = "Watchpoint exception from a lower Exception level, "
166 		      "that might be using AArch32 or AArch64";
167 		break;
168 	case 0b110101: /* 0x35 */
169 		*dump_far = true;
170 		err = "Watchpoint exception taken without a change in "
171 		      "Exception level.";
172 		break;
173 	case 0b111000: /* 0x38 */
174 		err = "BKPT instruction execution in AArch32 state";
175 		break;
176 	case 0b111100: /* 0x3c */
177 		err = "BRK instruction execution in AArch64 state.";
178 		break;
179 	default:
180 		err = "Unknown";
181 	}
182 
183 	EXCEPTION_DUMP("ESR_ELn: 0x%016llx", esr);
184 	EXCEPTION_DUMP("  EC:  0x%llx (%s)", GET_ESR_EC(esr), err);
185 	EXCEPTION_DUMP("  IL:  0x%llx", GET_ESR_IL(esr));
186 	EXCEPTION_DUMP("  ISS: 0x%llx", GET_ESR_ISS(esr));
187 }
188 
esf_dump(const struct arch_esf * esf)189 static void esf_dump(const struct arch_esf *esf)
190 {
191 	EXCEPTION_DUMP("x0:  0x%016llx  x1:  0x%016llx", esf->x0, esf->x1);
192 	EXCEPTION_DUMP("x2:  0x%016llx  x3:  0x%016llx", esf->x2, esf->x3);
193 	EXCEPTION_DUMP("x4:  0x%016llx  x5:  0x%016llx", esf->x4, esf->x5);
194 	EXCEPTION_DUMP("x6:  0x%016llx  x7:  0x%016llx", esf->x6, esf->x7);
195 	EXCEPTION_DUMP("x8:  0x%016llx  x9:  0x%016llx", esf->x8, esf->x9);
196 	EXCEPTION_DUMP("x10: 0x%016llx  x11: 0x%016llx", esf->x10, esf->x11);
197 	EXCEPTION_DUMP("x12: 0x%016llx  x13: 0x%016llx", esf->x12, esf->x13);
198 	EXCEPTION_DUMP("x14: 0x%016llx  x15: 0x%016llx", esf->x14, esf->x15);
199 	EXCEPTION_DUMP("x16: 0x%016llx  x17: 0x%016llx", esf->x16, esf->x17);
200 	EXCEPTION_DUMP("x18: 0x%016llx  lr:  0x%016llx", esf->x18, esf->lr);
201 }
202 #endif /* CONFIG_EXCEPTION_DEBUG */
203 
204 #ifdef CONFIG_ARCH_STACKWALK
205 typedef bool (*arm64_stacktrace_cb)(void *cookie, unsigned long addr, void *fp);
206 
is_address_mapped(uint64_t * addr)207 static bool is_address_mapped(uint64_t *addr)
208 {
209 	uintptr_t *phys = NULL;
210 
211 	if (*addr == 0U) {
212 		return false;
213 	}
214 
215 	/* Check alignment. */
216 	if ((*addr & (sizeof(uint32_t) - 1U)) != 0U) {
217 		return false;
218 	}
219 
220 	return !arch_page_phys_get((void *) addr, phys);
221 }
222 
is_valid_jump_address(uint64_t * addr)223 static bool is_valid_jump_address(uint64_t *addr)
224 {
225 	if (*addr == 0U) {
226 		return false;
227 	}
228 
229 	/* Check alignment. */
230 	if ((*addr & (sizeof(uint32_t) - 1U)) != 0U) {
231 		return false;
232 	}
233 
234 	return ((*addr >= (uint64_t)__text_region_start) &&
235 		(*addr <= (uint64_t)(__text_region_end)));
236 }
237 
walk_stackframe(arm64_stacktrace_cb cb,void * cookie,const struct arch_esf * esf,int max_frames)238 static void walk_stackframe(arm64_stacktrace_cb cb, void *cookie, const struct arch_esf *esf,
239 			    int max_frames)
240 {
241 	/*
242 	 * For GCC:
243 	 *
244 	 *  ^  +-----------------+
245 	 *  |  |                 |
246 	 *  |  |                 |
247 	 *  |  |                 |
248 	 *  |  |                 |
249 	 *  |  | function stack  |
250 	 *  |  |                 |
251 	 *  |  |                 |
252 	 *  |  |                 |
253 	 *  |  |                 |
254 	 *  |  +-----------------+
255 	 *  |  |       LR        |
256 	 *  |  +-----------------+
257 	 *  |  |   previous FP   | <---+ FP
258 	 *  +  +-----------------+
259 	 */
260 
261 	uint64_t *fp;
262 	uint64_t lr;
263 
264 	if (esf != NULL) {
265 		fp = (uint64_t *) esf->fp;
266 	} else {
267 		return;
268 	}
269 
270 	for (int i = 0; (fp != NULL) && (i < max_frames); i++) {
271 		if (!is_address_mapped(fp))
272 			break;
273 		lr = fp[1];
274 		if (!is_valid_jump_address(&lr)) {
275 			break;
276 		}
277 		if (!cb(cookie, lr, fp)) {
278 			break;
279 		}
280 		fp = (uint64_t *) fp[0];
281 	}
282 }
283 
arch_stack_walk(stack_trace_callback_fn callback_fn,void * cookie,const struct k_thread * thread,const struct arch_esf * esf)284 void arch_stack_walk(stack_trace_callback_fn callback_fn, void *cookie,
285 		     const struct k_thread *thread, const struct arch_esf *esf)
286 {
287 	ARG_UNUSED(thread);
288 
289 	walk_stackframe((arm64_stacktrace_cb)callback_fn, cookie, esf,
290 			CONFIG_ARCH_STACKWALK_MAX_FRAMES);
291 }
292 #endif /* CONFIG_ARCH_STACKWALK */
293 
294 #ifdef CONFIG_EXCEPTION_STACK_TRACE
print_trace_address(void * arg,unsigned long lr,void * fp)295 static bool print_trace_address(void *arg, unsigned long lr, void *fp)
296 {
297 	int *i = arg;
298 #ifdef CONFIG_SYMTAB
299 	uint32_t offset = 0;
300 	const char *name = symtab_find_symbol_name(lr, &offset);
301 
302 	EXCEPTION_DUMP("     %d: fp: 0x%016llx lr: 0x%016lx [%s+0x%x]",
303 			(*i)++, (uint64_t)fp, lr, name, offset);
304 #else
305 	EXCEPTION_DUMP("     %d: fp: 0x%016llx lr: 0x%016lx",
306 			(*i)++, (uint64_t)fp, lr);
307 #endif /* CONFIG_SYMTAB */
308 
309 	return true;
310 }
311 
esf_unwind(const struct arch_esf * esf)312 static void esf_unwind(const struct arch_esf *esf)
313 {
314 	int i = 0;
315 
316 	EXCEPTION_DUMP("");
317 	EXCEPTION_DUMP("call trace:");
318 	walk_stackframe(print_trace_address, &i, esf, CONFIG_ARCH_STACKWALK_MAX_FRAMES);
319 	EXCEPTION_DUMP("");
320 }
321 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
322 
323 #ifdef CONFIG_ARM64_STACK_PROTECTION
z_arm64_stack_corruption_check(struct arch_esf * esf,uint64_t esr,uint64_t far)324 static bool z_arm64_stack_corruption_check(struct arch_esf *esf, uint64_t esr, uint64_t far)
325 {
326 	uint64_t sp, sp_limit, guard_start;
327 	/* 0x25 means data abort from current EL */
328 	if (GET_ESR_EC(esr) == 0x25) {
329 		sp_limit = arch_curr_cpu()->arch.current_stack_limit;
330 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
331 		sp = arch_curr_cpu()->arch.corrupted_sp;
332 		if ((sp != 0 && sp <= sp_limit) || (guard_start <= far && far <= sp_limit)) {
333 #ifdef CONFIG_FPU_SHARING
334 			/*
335 			 * We are in exception stack, and now we are sure the stack does overflow,
336 			 * so flush the fpu context to its owner, and then set no fpu trap to avoid
337 			 * a new nested exception triggered by FPU accessing (var_args).
338 			 */
339 			arch_flush_local_fpu();
340 			write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
341 #endif
342 			arch_curr_cpu()->arch.corrupted_sp = 0UL;
343 			EXCEPTION_DUMP("STACK OVERFLOW FROM KERNEL,"
344 				" SP: 0x%llx OR FAR: 0x%llx INVALID,"
345 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
346 			return true;
347 		}
348 	}
349 #ifdef CONFIG_USERSPACE
350 	else if ((_current->base.user_options & K_USER) != 0 && GET_ESR_EC(esr) == 0x24) {
351 		sp_limit = (uint64_t)_current->stack_info.start;
352 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
353 		sp = esf->sp;
354 		if (sp <= sp_limit || (guard_start <= far && far <= sp_limit)) {
355 			EXCEPTION_DUMP("STACK OVERFLOW FROM USERSPACE,"
356 					"SP: 0x%llx OR FAR: 0x%llx INVALID,"
357 					" SP LIMIT: 0x%llx", sp, far, sp_limit);
358 			return true;
359 		}
360 	}
361 #endif
362 	return false;
363 }
364 #endif
365 
is_recoverable(struct arch_esf * esf,uint64_t esr,uint64_t far,uint64_t elr)366 static bool is_recoverable(struct arch_esf *esf, uint64_t esr, uint64_t far,
367 			   uint64_t elr)
368 {
369 	ARG_UNUSED(esr);
370 	ARG_UNUSED(far);
371 	ARG_UNUSED(elr);
372 
373 	if (!esf) {
374 		return false;
375 	}
376 
377 #ifdef CONFIG_USERSPACE
378 	for (int i = 0; i < ARRAY_SIZE(exceptions); i++) {
379 		/* Mask out instruction mode */
380 		uint64_t start = (uint64_t)exceptions[i].start;
381 		uint64_t end = (uint64_t)exceptions[i].end;
382 
383 		if (esf->elr >= start && esf->elr < end) {
384 			esf->elr = (uint64_t)(exceptions[i].fixup);
385 			return true;
386 		}
387 	}
388 #endif
389 
390 	return false;
391 }
392 
z_arm64_fatal_error(unsigned int reason,struct arch_esf * esf)393 void z_arm64_fatal_error(unsigned int reason, struct arch_esf *esf)
394 {
395 	uint64_t esr = 0;
396 	uint64_t elr = 0;
397 	uint64_t far = 0;
398 	uint64_t el;
399 
400 	if (reason != K_ERR_SPURIOUS_IRQ) {
401 		el = read_currentel();
402 
403 		switch (GET_EL(el)) {
404 		case MODE_EL1:
405 			esr = read_esr_el1();
406 			far = read_far_el1();
407 			elr = read_elr_el1();
408 			break;
409 #if !defined(CONFIG_ARMV8_R)
410 		case MODE_EL3:
411 			esr = read_esr_el3();
412 			far = read_far_el3();
413 			elr = read_elr_el3();
414 			break;
415 #endif /* CONFIG_ARMV8_R */
416 		}
417 
418 #ifdef CONFIG_ARM64_STACK_PROTECTION
419 		if (z_arm64_stack_corruption_check(esf, esr, far)) {
420 			reason = K_ERR_STACK_CHK_FAIL;
421 		}
422 #endif
423 
424 		if (IS_ENABLED(CONFIG_DEMAND_PAGING) &&
425 		    reason != K_ERR_STACK_CHK_FAIL &&
426 		    z_arm64_do_demand_paging(esf, esr, far)) {
427 			return;
428 		}
429 
430 		if (GET_EL(el) != MODE_EL0) {
431 #ifdef CONFIG_EXCEPTION_DEBUG
432 			bool dump_far = false;
433 
434 			EXCEPTION_DUMP("ELR_ELn: 0x%016llx", elr);
435 
436 			dump_esr(esr, &dump_far);
437 
438 			if (dump_far) {
439 				EXCEPTION_DUMP("FAR_ELn: 0x%016llx", far);
440 			}
441 
442 			EXCEPTION_DUMP("TPIDRRO: 0x%016llx", read_tpidrro_el0());
443 #endif /* CONFIG_EXCEPTION_DEBUG */
444 
445 			if (is_recoverable(esf, esr, far, elr) &&
446 			    reason != K_ERR_STACK_CHK_FAIL) {
447 				return;
448 			}
449 		}
450 	}
451 
452 #ifdef CONFIG_EXCEPTION_DEBUG
453 	if (esf != NULL) {
454 		esf_dump(esf);
455 	}
456 
457 #ifdef CONFIG_EXCEPTION_STACK_TRACE
458 	esf_unwind(esf);
459 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
460 #endif /* CONFIG_EXCEPTION_DEBUG */
461 
462 	z_fatal_error(reason, esf);
463 }
464 
465 /**
466  * @brief Handle a software-generated fatal exception
467  * (e.g. kernel oops, panic, etc.).
468  *
469  * @param esf exception frame
470  */
z_arm64_do_kernel_oops(struct arch_esf * esf)471 void z_arm64_do_kernel_oops(struct arch_esf *esf)
472 {
473 	/* x8 holds the exception reason */
474 	unsigned int reason = esf->x8;
475 
476 #if defined(CONFIG_USERSPACE)
477 	/*
478 	 * User mode is only allowed to induce oopses and stack check
479 	 * failures via software-triggered system fatal exceptions.
480 	 */
481 	if (((_current->base.user_options & K_USER) != 0) &&
482 		reason != K_ERR_STACK_CHK_FAIL) {
483 		reason = K_ERR_KERNEL_OOPS;
484 	}
485 #endif
486 
487 	z_arm64_fatal_error(reason, esf);
488 }
489 
490 #ifdef CONFIG_USERSPACE
arch_syscall_oops(void * ssf_ptr)491 FUNC_NORETURN void arch_syscall_oops(void *ssf_ptr)
492 {
493 	z_arm64_fatal_error(K_ERR_KERNEL_OOPS, ssf_ptr);
494 	CODE_UNREACHABLE;
495 }
496 #endif
497 
498 #if defined(CONFIG_PM_CPU_OPS_PSCI)
arch_system_halt(unsigned int reason)499 FUNC_NORETURN void arch_system_halt(unsigned int reason)
500 {
501 	ARG_UNUSED(reason);
502 
503 	(void)arch_irq_lock();
504 
505 #ifdef CONFIG_POWEROFF
506 	sys_poweroff();
507 #endif /* CONFIG_POWEROFF */
508 
509 	for (;;) {
510 		/* Spin endlessly as fallback */
511 	}
512 }
513 #endif
514