1 /*
2  * Copyright (C) 2015-2018 Alibaba Group Holding Limited
3  */
4 #include <stdarg.h>
5 #include <stdbool.h>
6 #include <time.h>
7 #include "debug_api.h"
8 #include "k_compiler.h"
9 #include "aos/kernel.h"
10 #include "aos/debug.h"
11 #include "aos/errno.h"
12 
13 int backtrace_now(int (*print_func)(const char *fmt, ...));
14 void debug_panic_backtrace(char *PC, int *SP, char *LR,
15                            int (*print_func)(const char *fmt, ...));
16 
17 #if AOS_COMP_CLI
18 #include "aos/cli.h"
19 #endif
20 
21 #ifdef AOS_COMP_KV
22 #include "aos/kv.h"
23 #endif
24 
25 /* reboot reason*/
26 #define DEFAULT_REBOOT_REASON DEBUG_REBOOT_REASON_REPOWER
27 
28 #define SYS_REBOOT_REASON "reboot reason"
29 #ifndef panic_print
30 #define panic_print printk
31 #endif
32 #define panic_print_direct printk_direct
33 
34 #if (RHINO_CONFIG_CPU_NUM > 1)
35 kspinlock_t g_panic_print_lock = {KRHINO_SPINLOCK_FREE_VAL, 0, 0};
36 #endif
37 
38 #if DEBUG_ULOG_FLUSH
39 extern void uring_fifo_flush(void);
debug_log_flush(void)40 void debug_log_flush(void)
41 {
42     uring_fifo_flush();
43 }
44 #endif
45 
46 /* use cli in panic depends on mcu*/
alios_cli_panic_hook()47 __attribute__((weak)) void alios_cli_panic_hook()
48 {
49     return;
50 }
51 
52 extern void hal_reboot(void);
53 /* functions followed should defined by arch\...\panic_c.c */
54 extern void panicShowRegs(void *context,
55                           int (*print_func)(const char *fmt, ...));
56 extern void panicGetCtx(void *context, char **pPC, char **pLR, int **pSP);
57 
58 /* functions followed should defined by arch\...\backtrace.c */
59 extern int  backtrace_caller(char *PC, int *SP,
60                              int (*print_func)(const char *fmt, ...));
61 extern int  backtrace_callee(char *PC, int *SP, char *LR,
62                              int (*print_func)(const char *fmt, ...));
63 
64 /* how many steps has finished when crash */
65 #define DEBUG_PANIC_STEP_MAX    32
66 volatile uint32_t g_crash_steps = 0;
67 volatile uint32_t g_crash_by_NMI = 0;
68 volatile uint32_t g_crash_not_reboot = 0;
69 
debug_cpu_stop(void)70 void debug_cpu_stop(void)
71 {
72 #if (RHINO_CONFIG_CPU_NUM > 1)
73     cpu_freeze_others();
74 #endif
75     krhino_sched_disable();
76 }
77 
debug_cpu_goto_cli(void)78 void debug_cpu_goto_cli(void)
79 {
80     g_crash_not_reboot = OS_PANIC_NOT_REBOOT;
81 }
82 
debug_cpu_in_crash(void)83 uint32_t debug_cpu_in_crash(void)
84 {
85     return g_crash_steps;
86 }
87 
panicNmiFlagSet()88 void panicNmiFlagSet()
89 {
90     g_crash_by_NMI = OS_PANIC_BY_NMI;
91 }
92 
panicNmiFlagCheck()93 int panicNmiFlagCheck()
94 {
95     return (g_crash_by_NMI == OS_PANIC_BY_NMI);
96 }
97 
panic_goto_cli(void)98 static void panic_goto_cli(void)
99 {
100     cpu_intrpt_save();
101 
102     extern uint8_t g_sched_lock[];
103     if (0 == g_sched_lock[cpu_cur_get()]) {
104         g_sched_lock[cpu_cur_get()]++;
105     }
106 
107     extern int32_t g_cli_direct_read;
108     g_cli_direct_read = 1;
109 
110     alios_cli_panic_hook();
111 #if AOS_COMP_CLI
112     extern void cli_main_panic(void);
113     cli_main_panic();
114 #endif
115     while (1);
116 }
117 
118 /* should exeception be restored?
119    reture 1 YES, 0 NO*/
panicRestoreCheck(void)120 int panicRestoreCheck(void)
121 {
122    return 0;
123 }
124 
debug_panic_end(void)125 static void debug_panic_end(void)
126 {
127 #if !DEBUG_PANIC_CLI
128     if (g_crash_not_reboot == OS_PANIC_NOT_REBOOT) {
129         panic_goto_cli();
130     } else if (panicNmiFlagCheck() == 0) {
131         hal_reboot();
132     } else { /* '$' is also effective in release version*/
133         panic_goto_cli();
134     }
135 #else /* debug version*/
136     panic_goto_cli();
137 #endif
138 }
139 
stack_dump(cpu_stack_t * stack,uint32_t size)140 void stack_dump(cpu_stack_t *stack, uint32_t size)
141 {
142     uint32_t zero_cnt, zero_prt, idx;
143 
144     for (idx = 0, zero_cnt = 0, zero_prt = 0; idx < size; idx += 4) {
145         if (stack[idx] == 0 && stack[idx + 1] == 0
146             && stack[idx + 2] == 0 && stack[idx + 3] == 0) {
147             zero_cnt++;
148             if (zero_cnt == 1) {
149                 panic_print(".........................( All Zeros ).........................\r\n");
150                 zero_prt = 1;
151                 continue;
152             }
153             if (zero_prt == 1) {
154                 continue;
155             }
156         }
157         panic_print("(0x%08X): 0x%08X 0x%08X 0x%08X 0x%08X\r\n",
158                     &stack[idx],
159                     (void *)stack[idx], (void *)stack[idx + 1], (void *)stack[idx + 2], (void *)stack[idx + 3]);
160         zero_cnt = 0;
161         zero_prt = 0;
162     }
163 }
164 
debug_cur_task_stack_dump(void)165 void debug_cur_task_stack_dump(void)
166 {
167     cpu_stack_t *stack;
168     uint32_t     stack_size;
169 
170     ktask_t *task = g_active_task[cpu_cur_get()];
171     stack         = task->task_stack_base;
172     stack_size    = task->stack_size;
173 
174     if (stack != NULL) {
175         panic_print("========== Stack info ==========\r\n");
176         stack_dump(stack, stack_size);
177     }
178 }
179 
debug_cur_task_show(void)180 void debug_cur_task_show(void)
181 {
182     ktask_t *task;
183     uint8_t  time_buffer[30];
184 
185     long long ms   = aos_calendar_localtime_get();
186     time_t rawtime = ms / 1000;
187     struct tm *tm  = localtime(&rawtime);
188 
189     memset(time_buffer, 0, sizeof(time_buffer));
190 
191     if (tm) {
192         strftime((char *)time_buffer, sizeof(time_buffer), "%F %H:%M:%S", tm);
193         panic_print("crash   time : %s\r\n", time_buffer);
194     }
195 
196     /* output crash task's name */
197     task = g_active_task[cpu_cur_get()];
198     if (task->task_name != NULL) {
199         panic_print("current task : %s\r\n", task->task_name);
200     } else {
201         panic_print("cur task name is NULL\r\n");
202     }
203 }
204 
205 #if RHINO_CONFIG_MM_DEBUG
debug_print_block(k_mm_list_t * b,int (* print_func)(const char * fmt,...))206 static void debug_print_block(k_mm_list_t *b, int (*print_func)(const char *fmt, ...))
207 {
208     if (!b) {
209         return;
210     }
211 
212     if (print_func == NULL) {
213         print_func = panic_print_direct;
214     }
215 
216     print_func("0x%08x ", (uintptr_t)b);
217 
218     if (b->buf_size & MM_BUFF_FREE) {
219         if (b->dye != MM_DYE_FREE) {
220             print_func("!");
221         } else {
222             print_func(" ");
223         }
224         print_func("free ");
225     } else {
226         if (b->dye != MM_DYE_USED) {
227             print_func("!");
228         } else {
229             print_func(" ");
230         }
231         print_func("used ");
232     }
233 
234     if (MM_GET_BUF_SIZE(b)) {
235         print_func(" %6lu ", (unsigned long)MM_GET_BUF_SIZE(b));
236     } else {
237         print_func(" sentinel ");
238     }
239 
240     if (b->buf_size & MM_BUFF_FREE) {
241         if (b->dye != MM_DYE_FREE) {
242             print_func(" %8x ", b->dye);
243         } else {
244             print_func("  OK ");
245         }
246     } else {
247         if (b->dye != MM_DYE_USED) {
248             print_func(" %8x ", b->dye);
249         } else {
250             print_func("  OK ");
251         }
252     }
253 
254     print_func(" 0x%-8x ", b->owner);
255 
256 #if (RHINO_CONFIG_MM_TRACE_LVL > 0)
257     /* If double free, print last alloc trace maybe useful.
258     This info is not useful if this mem alloc-and-freed by another module between.
259     */
260     //if ((b->buf_size & MM_BUFF_FREE) == 0)
261     {
262         int idx;
263         print_func(" (%p", b->trace[0]);
264         for (idx = 1 ; idx < RHINO_CONFIG_MM_TRACE_LVL ; idx++) {
265             print_func(" <- %p", b->trace[idx]);
266         }
267         print_func(")");
268     }
269 #endif
270 
271     print_func("\r\n");
272 }
273 
274 
debug_blk_damaged(k_mm_list_t * mm_list)275 static bool debug_blk_damaged(k_mm_list_t *mm_list)
276 {
277     if (!mm_list) {
278         return false;
279     }
280 
281     if (mm_list->dye != MM_DYE_USED && mm_list->dye != MM_DYE_FREE) {
282         return true;
283     }
284 
285     if (mm_list->buf_size & MM_BUFF_FREE) {
286         if (mm_list->dye != MM_DYE_FREE) {
287             return true;
288         }
289     } else {
290         if (mm_list->dye != MM_DYE_USED) {
291             return true;
292         }
293     }
294     /* detect bufsize skip lastblk */
295     if (mm_list->owner != MM_LAST_BLK_MAGIC) {
296         k_mm_list_t *next_b = MM_GET_NEXT_BLK(mm_list);
297         if (next_b->dye != MM_DYE_USED && next_b->dye != MM_DYE_FREE) {
298             return true;
299         }
300     }
301 
302     return false;
303 }
304 
debug_dump_mm_error(k_mm_head * mmhead,int (* print_func)(const char * fmt,...))305 void debug_dump_mm_error(k_mm_head *mmhead, int (*print_func)(const char *fmt, ...))
306 {
307     k_mm_region_info_t *reginfo, *nextreg;
308     k_mm_list_t *next, *cur, *prev, *pprev;
309 
310     if (!mmhead) {
311         return;
312     }
313 
314     if (print_func == NULL) {
315         print_func = panic_print_direct;
316     }
317 
318     print_func("ALL BLOCKS\r\n");
319     print_func("Blk_Addr    Stat     Len  Chk      Caller    Point\r\n");
320     reginfo = mmhead->regioninfo;
321     prev = NULL;
322     pprev = NULL;
323     while (reginfo) {
324         cur = MM_GET_THIS_BLK(reginfo);
325         while (cur) {
326             if (debug_blk_damaged(cur)) {
327                 debug_print_block(pprev, print_func);
328                 debug_print_block(prev, print_func);
329                 debug_print_block(cur, print_func);
330             }
331 
332             if (MM_GET_BUF_SIZE(cur)) {
333                 next = MM_GET_NEXT_BLK(cur);
334             } else {
335                 next = NULL;
336             }
337             pprev = prev;
338             prev = cur;
339             cur = next;
340         }
341         nextreg = reginfo->next;
342         reginfo = nextreg;
343     }
344 }
345 
dump_mm_all_error_block(void * pmm_head,int (* print_func)(const char * fmt,...))346 void dump_mm_all_error_block(void *pmm_head, int (*print_func)(const char *fmt, ...))
347 {
348     if (print_func == NULL) {
349         print_func = panic_print_direct;
350     }
351 
352     print_func("g_kmm_head = %8x\r\n", (unsigned int)pmm_head);
353     /* kernel and user space use the same mm head file */
354     debug_dump_mm_error(pmm_head, print_func);
355 }
356 
dump_mm_sys_error_info(int (* print_func)(const char * fmt,...))357 void dump_mm_sys_error_info(int (*print_func)(const char *fmt, ...))
358 {
359     void *pmm_head = g_kmm_head;
360     print_func("kernel space mem layout:\r\n");
361     dump_mm_all_error_block(pmm_head, print_func);
362 }
363 #endif /* RHINO_CONFIG_MM_DEBUG */
364 
fiqafterpanicHandler(void * context)365 void fiqafterpanicHandler(void *context)
366 {
367     static int  *SP = NULL;
368     static char *PC = NULL;
369     static char *LR = NULL;
370 
371 #if (RHINO_CONFIG_CPU_NUM > 1)
372     krhino_spin_lock(&g_panic_print_lock);
373 #endif
374 
375     panic_print("\r\n!!!!!!!!!! core %d Enter fiq  !!!!!!!!!!\r\n", cpu_cur_get());
376     if (context != NULL) {
377         panicGetCtx(context, &PC, &LR, &SP);
378     }
379     panicShowRegs(context, panic_print);
380 
381     debug_panic_backtrace(PC, SP, LR, panic_print);
382 
383 #if (RHINO_CONFIG_CPU_NUM > 1)
384     krhino_spin_unlock(&g_panic_print_lock);
385 #endif
386 
387     return;
388 }
389 
390 
391 /* fault/exception entry
392    notice: this function maybe reentried by double exception
393    first exception, input context
394    second exception, input NULL  */
panicHandler(void * context)395 void panicHandler(void *context)
396 {
397     static int  *SP = NULL;
398     static char *PC = NULL;
399     static char *LR = NULL;
400     kstat_t stat_save;
401 
402 #if RHINO_CONFIG_MM_DEBUG
403     g_mmlk_cnt = 0;
404 #endif
405 
406 #if (RHINO_CONFIG_CPU_NUM > 1)
407     krhino_spin_lock(&g_panic_print_lock);
408 #endif
409 
410     stat_save  = g_sys_stat;
411     g_sys_stat = RHINO_STOPPED;
412 
413     /* g_crash_steps++ before panicHandler */
414     if (g_crash_steps > 1 && g_crash_steps < DEBUG_PANIC_STEP_MAX) {
415         panic_print("......\r\n");
416     }
417 
418     switch (g_crash_steps) {
419         case 1:
420             if (panicNmiFlagCheck()) { /*for $#@! feature*/
421                 panic_print("\r\n!!!!!!!! Stopped by '$#@!' !!!!!!!!\r\n");
422             }
423             panic_print("\r\n!!!!!!!!!! Exception  !!!!!!!!!!\r\n");
424 
425             debug_cur_task_show();
426 
427             if (context != NULL) {
428                 panicGetCtx(context, &PC, &LR, &SP);
429             }
430 
431             panicShowRegs(context, panic_print);
432             g_crash_steps++;
433         case 2:
434             panic_print("========== Call stack ==========\r\n");
435             debug_panic_backtrace(PC, SP, LR, panic_print);
436             g_crash_steps++;
437         case 3:
438 #if (RHINO_CONFIG_MM_TLF > 0)
439             panic_print("========== Heap Info  ==========\r\n");
440             debug_mm_overview(panic_print);
441 #endif
442             g_crash_steps++;
443         case 4:
444             panic_print("========== Task Info  ==========\r\n");
445             debug_task_overview(panic_print);
446             g_crash_steps++;
447         case 5:
448 #if (RHINO_CONFIG_QUEUE > 0)
449             panic_print("========== Queue Info ==========\r\n");
450             debug_queue_overview(panic_print);
451 #endif
452             g_crash_steps++;
453         case 6:
454 #if (RHINO_CONFIG_BUF_QUEUE > 0)
455             panic_print("======== Buf Queue Info ========\r\n");
456             debug_buf_queue_overview(panic_print);
457 #endif
458             g_crash_steps++;
459         case 7:
460 #if (RHINO_CONFIG_SEM > 0)
461             panic_print("========= Sem Waiting ==========\r\n");
462             debug_sem_overview(panic_print);
463 #endif
464             g_crash_steps++;
465         case 8:
466             panic_print("======== Mutex Waiting =========\r\n");
467             debug_mutex_overview(panic_print);
468             g_crash_steps++;
469         case 9:
470 #if RHINO_CONFIG_MM_DEBUG
471             panic_print("======== all memory error blocks =========\r\n");
472             dump_mm_sys_error_info(panic_print);
473 #endif
474             g_crash_steps++;
475         case 10:
476             if (SP != NULL) {
477                 debug_cur_task_stack_dump();
478             }
479             g_crash_steps++;
480         case 11:
481             panic_print("!!!!!!!!!! dump end   !!!!!!!!!!\r\n");
482             g_crash_steps++;
483         default:
484             break;
485     }
486 
487     g_crash_steps = DEBUG_PANIC_STEP_MAX;
488 
489 #if (RHINO_CONFIG_CPU_NUM > 1)
490     krhino_spin_unlock(&g_panic_print_lock);
491 #endif
492 
493 #if DEBUG_LAST_WORD_ENABLE
494     debug_reboot_reason_update(DEBUG_REBOOT_REASON_PANIC);
495 #endif
496 
497 #if DEBUG_ULOG_FLUSH
498     debug_log_flush();
499 #endif
500 
501     debug_panic_end();
502 
503     g_sys_stat = stat_save;
504     g_crash_steps   = 0;
505     g_crash_by_NMI  = 0;
506 }
507 
debug_fatal_error(kstat_t err,char * file,int line)508 void debug_fatal_error(kstat_t err, char *file, int line)
509 {
510     void *pmm_head = NULL;
511     (void)pmm_head;
512 
513 #if (RHINO_CONFIG_CPU_NUM > 1)
514     cpu_freeze_others();
515 #endif
516 
517     krhino_sched_disable();
518 
519     g_crash_steps = 1;
520 
521     panic_print("!!!!!!!!!! Fatal Error !!!!!!!!!!\r\n");
522 
523     if (err == RHINO_TASK_STACK_OVF) {
524         panic_print("Task : %s  Stack Overflow!\r\n", g_active_task[cpu_cur_get()]->task_name);
525     }
526 
527     debug_cur_task_show();
528 
529 #if (RHINO_CONFIG_MM_TLF > 0)
530     panic_print("========== Heap Info  ==========\r\n");
531     debug_mm_overview(panic_print);
532 #endif
533 
534     panic_print("========== Task Info  ==========\r\n");
535     debug_task_overview(panic_print);
536 
537     //debug_backtrace_now();
538     backtrace_now(panic_print);
539 
540 #if RHINO_CONFIG_MM_DEBUG
541     panic_print("======== all memory error blocks =========\r\n");
542     dump_mm_sys_error_info(panic_print);
543 #endif
544 
545     debug_cur_task_stack_dump();
546 
547     panic_print("!!!!!!!!!! dump end   !!!!!!!!!!\r\n");
548 
549 #if DEBUG_LAST_WORD_ENABLE
550     debug_reboot_reason_update(DEBUG_REBOOT_REASON_FATAL_ERR);
551 #endif
552 
553 #if DEBUG_ULOG_FLUSH
554     debug_log_flush();
555 #endif
556 
557     debug_panic_end();
558 
559     g_crash_steps = 0;
560 }
561 
debug_init(void)562 void debug_init(void)
563 {
564 #if AOS_COMP_CLI
565     debug_cli_cmd_init();
566 #endif
567 
568 #if DEBUG_LAST_WORD_ENABLE
569     debug_lastword_init();
570 #endif
571 }
572