1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
4  *
5  * This is an implementation of a DWARF unwinder. Its main purpose is
6  * for generating stacktrace information. Based on the DWARF 3
7  * specification from http://www.dwarfstd.org.
8  *
9  * TODO:
10  *	- DWARF64 doesn't work.
11  *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
12  */
13 
14 /* #define DEBUG */
15 #include <linux/kernel.h>
16 #include <linux/io.h>
17 #include <linux/list.h>
18 #include <linux/mempool.h>
19 #include <linux/mm.h>
20 #include <linux/elf.h>
21 #include <linux/ftrace.h>
22 #include <linux/module.h>
23 #include <linux/slab.h>
24 #include <asm/dwarf.h>
25 #include <asm/unwinder.h>
26 #include <asm/sections.h>
27 #include <asm/unaligned.h>
28 #include <asm/stacktrace.h>
29 
30 /* Reserve enough memory for two stack frames */
31 #define DWARF_FRAME_MIN_REQ	2
32 /* ... with 4 registers per frame. */
33 #define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
34 
35 static struct kmem_cache *dwarf_frame_cachep;
36 static mempool_t *dwarf_frame_pool;
37 
38 static struct kmem_cache *dwarf_reg_cachep;
39 static mempool_t *dwarf_reg_pool;
40 
41 static struct rb_root cie_root;
42 static DEFINE_SPINLOCK(dwarf_cie_lock);
43 
44 static struct rb_root fde_root;
45 static DEFINE_SPINLOCK(dwarf_fde_lock);
46 
47 static struct dwarf_cie *cached_cie;
48 
49 static unsigned int dwarf_unwinder_ready;
50 
51 /**
52  *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
53  *	@frame: the DWARF frame whose list of registers we insert on
54  *	@reg_num: the register number
55  *
56  *	Allocate space for, and initialise, a dwarf reg from
57  *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
58  *	dwarf registers for @frame.
59  *
60  *	Return the initialised DWARF reg.
61  */
dwarf_frame_alloc_reg(struct dwarf_frame * frame,unsigned int reg_num)62 static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
63 					       unsigned int reg_num)
64 {
65 	struct dwarf_reg *reg;
66 
67 	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
68 	if (!reg) {
69 		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
70 		/*
71 		 * Let's just bomb hard here, we have no way to
72 		 * gracefully recover.
73 		 */
74 		UNWINDER_BUG();
75 	}
76 
77 	reg->number = reg_num;
78 	reg->addr = 0;
79 	reg->flags = 0;
80 
81 	list_add(&reg->link, &frame->reg_list);
82 
83 	return reg;
84 }
85 
dwarf_frame_free_regs(struct dwarf_frame * frame)86 static void dwarf_frame_free_regs(struct dwarf_frame *frame)
87 {
88 	struct dwarf_reg *reg, *n;
89 
90 	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
91 		list_del(&reg->link);
92 		mempool_free(reg, dwarf_reg_pool);
93 	}
94 }
95 
96 /**
97  *	dwarf_frame_reg - return a DWARF register
98  *	@frame: the DWARF frame to search in for @reg_num
99  *	@reg_num: the register number to search for
100  *
101  *	Lookup and return the dwarf reg @reg_num for this frame. Return
102  *	NULL if @reg_num is an register invalid number.
103  */
dwarf_frame_reg(struct dwarf_frame * frame,unsigned int reg_num)104 static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
105 					 unsigned int reg_num)
106 {
107 	struct dwarf_reg *reg;
108 
109 	list_for_each_entry(reg, &frame->reg_list, link) {
110 		if (reg->number == reg_num)
111 			return reg;
112 	}
113 
114 	return NULL;
115 }
116 
117 /**
118  *	dwarf_read_addr - read dwarf data
119  *	@src: source address of data
120  *	@dst: destination address to store the data to
121  *
122  *	Read 'n' bytes from @src, where 'n' is the size of an address on
123  *	the native machine. We return the number of bytes read, which
124  *	should always be 'n'. We also have to be careful when reading
125  *	from @src and writing to @dst, because they can be arbitrarily
126  *	aligned. Return 'n' - the number of bytes read.
127  */
dwarf_read_addr(unsigned long * src,unsigned long * dst)128 static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
129 {
130 	u32 val = get_unaligned(src);
131 	put_unaligned(val, dst);
132 	return sizeof(unsigned long *);
133 }
134 
135 /**
136  *	dwarf_read_uleb128 - read unsigned LEB128 data
137  *	@addr: the address where the ULEB128 data is stored
138  *	@ret: address to store the result
139  *
140  *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
141  *	from Appendix C of the DWARF 3 spec. For information on the
142  *	encodings refer to section "7.6 - Variable Length Data". Return
143  *	the number of bytes read.
144  */
dwarf_read_uleb128(char * addr,unsigned int * ret)145 static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
146 {
147 	unsigned int result;
148 	unsigned char byte;
149 	int shift, count;
150 
151 	result = 0;
152 	shift = 0;
153 	count = 0;
154 
155 	while (1) {
156 		byte = __raw_readb(addr);
157 		addr++;
158 		count++;
159 
160 		result |= (byte & 0x7f) << shift;
161 		shift += 7;
162 
163 		if (!(byte & 0x80))
164 			break;
165 	}
166 
167 	*ret = result;
168 
169 	return count;
170 }
171 
172 /**
173  *	dwarf_read_leb128 - read signed LEB128 data
174  *	@addr: the address of the LEB128 encoded data
175  *	@ret: address to store the result
176  *
177  *	Decode signed LEB128 data. The algorithm is taken from Appendix
178  *	C of the DWARF 3 spec. Return the number of bytes read.
179  */
dwarf_read_leb128(char * addr,int * ret)180 static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
181 {
182 	unsigned char byte;
183 	int result, shift;
184 	int num_bits;
185 	int count;
186 
187 	result = 0;
188 	shift = 0;
189 	count = 0;
190 
191 	while (1) {
192 		byte = __raw_readb(addr);
193 		addr++;
194 		result |= (byte & 0x7f) << shift;
195 		shift += 7;
196 		count++;
197 
198 		if (!(byte & 0x80))
199 			break;
200 	}
201 
202 	/* The number of bits in a signed integer. */
203 	num_bits = 8 * sizeof(result);
204 
205 	if ((shift < num_bits) && (byte & 0x40))
206 		result |= (-1 << shift);
207 
208 	*ret = result;
209 
210 	return count;
211 }
212 
213 /**
214  *	dwarf_read_encoded_value - return the decoded value at @addr
215  *	@addr: the address of the encoded value
216  *	@val: where to write the decoded value
217  *	@encoding: the encoding with which we can decode @addr
218  *
219  *	GCC emits encoded address in the .eh_frame FDE entries. Decode
220  *	the value at @addr using @encoding. The decoded value is written
221  *	to @val and the number of bytes read is returned.
222  */
dwarf_read_encoded_value(char * addr,unsigned long * val,char encoding)223 static int dwarf_read_encoded_value(char *addr, unsigned long *val,
224 				    char encoding)
225 {
226 	unsigned long decoded_addr = 0;
227 	int count = 0;
228 
229 	switch (encoding & 0x70) {
230 	case DW_EH_PE_absptr:
231 		break;
232 	case DW_EH_PE_pcrel:
233 		decoded_addr = (unsigned long)addr;
234 		break;
235 	default:
236 		pr_debug("encoding=0x%x\n", (encoding & 0x70));
237 		UNWINDER_BUG();
238 	}
239 
240 	if ((encoding & 0x07) == 0x00)
241 		encoding |= DW_EH_PE_udata4;
242 
243 	switch (encoding & 0x0f) {
244 	case DW_EH_PE_sdata4:
245 	case DW_EH_PE_udata4:
246 		count += 4;
247 		decoded_addr += get_unaligned((u32 *)addr);
248 		__raw_writel(decoded_addr, val);
249 		break;
250 	default:
251 		pr_debug("encoding=0x%x\n", encoding);
252 		UNWINDER_BUG();
253 	}
254 
255 	return count;
256 }
257 
258 /**
259  *	dwarf_entry_len - return the length of an FDE or CIE
260  *	@addr: the address of the entry
261  *	@len: the length of the entry
262  *
263  *	Read the initial_length field of the entry and store the size of
264  *	the entry in @len. We return the number of bytes read. Return a
265  *	count of 0 on error.
266  */
dwarf_entry_len(char * addr,unsigned long * len)267 static inline int dwarf_entry_len(char *addr, unsigned long *len)
268 {
269 	u32 initial_len;
270 	int count;
271 
272 	initial_len = get_unaligned((u32 *)addr);
273 	count = 4;
274 
275 	/*
276 	 * An initial length field value in the range DW_LEN_EXT_LO -
277 	 * DW_LEN_EXT_HI indicates an extension, and should not be
278 	 * interpreted as a length. The only extension that we currently
279 	 * understand is the use of DWARF64 addresses.
280 	 */
281 	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
282 		/*
283 		 * The 64-bit length field immediately follows the
284 		 * compulsory 32-bit length field.
285 		 */
286 		if (initial_len == DW_EXT_DWARF64) {
287 			*len = get_unaligned((u64 *)addr + 4);
288 			count = 12;
289 		} else {
290 			printk(KERN_WARNING "Unknown DWARF extension\n");
291 			count = 0;
292 		}
293 	} else
294 		*len = initial_len;
295 
296 	return count;
297 }
298 
299 /**
300  *	dwarf_lookup_cie - locate the cie
301  *	@cie_ptr: pointer to help with lookup
302  */
dwarf_lookup_cie(unsigned long cie_ptr)303 static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
304 {
305 	struct rb_node **rb_node = &cie_root.rb_node;
306 	struct dwarf_cie *cie = NULL;
307 	unsigned long flags;
308 
309 	spin_lock_irqsave(&dwarf_cie_lock, flags);
310 
311 	/*
312 	 * We've cached the last CIE we looked up because chances are
313 	 * that the FDE wants this CIE.
314 	 */
315 	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
316 		cie = cached_cie;
317 		goto out;
318 	}
319 
320 	while (*rb_node) {
321 		struct dwarf_cie *cie_tmp;
322 
323 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
324 		BUG_ON(!cie_tmp);
325 
326 		if (cie_ptr == cie_tmp->cie_pointer) {
327 			cie = cie_tmp;
328 			cached_cie = cie_tmp;
329 			goto out;
330 		} else {
331 			if (cie_ptr < cie_tmp->cie_pointer)
332 				rb_node = &(*rb_node)->rb_left;
333 			else
334 				rb_node = &(*rb_node)->rb_right;
335 		}
336 	}
337 
338 out:
339 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
340 	return cie;
341 }
342 
343 /**
344  *	dwarf_lookup_fde - locate the FDE that covers pc
345  *	@pc: the program counter
346  */
dwarf_lookup_fde(unsigned long pc)347 struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
348 {
349 	struct rb_node **rb_node = &fde_root.rb_node;
350 	struct dwarf_fde *fde = NULL;
351 	unsigned long flags;
352 
353 	spin_lock_irqsave(&dwarf_fde_lock, flags);
354 
355 	while (*rb_node) {
356 		struct dwarf_fde *fde_tmp;
357 		unsigned long tmp_start, tmp_end;
358 
359 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
360 		BUG_ON(!fde_tmp);
361 
362 		tmp_start = fde_tmp->initial_location;
363 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
364 
365 		if (pc < tmp_start) {
366 			rb_node = &(*rb_node)->rb_left;
367 		} else {
368 			if (pc < tmp_end) {
369 				fde = fde_tmp;
370 				goto out;
371 			} else
372 				rb_node = &(*rb_node)->rb_right;
373 		}
374 	}
375 
376 out:
377 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
378 
379 	return fde;
380 }
381 
382 /**
383  *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
384  *	@insn_start: address of the first instruction
385  *	@insn_end: address of the last instruction
386  *	@cie: the CIE for this function
387  *	@fde: the FDE for this function
388  *	@frame: the instructions calculate the CFA for this frame
389  *	@pc: the program counter of the address we're interested in
390  *
391  *	Execute the Call Frame instruction sequence starting at
392  *	@insn_start and ending at @insn_end. The instructions describe
393  *	how to calculate the Canonical Frame Address of a stackframe.
394  *	Store the results in @frame.
395  */
dwarf_cfa_execute_insns(unsigned char * insn_start,unsigned char * insn_end,struct dwarf_cie * cie,struct dwarf_fde * fde,struct dwarf_frame * frame,unsigned long pc)396 static int dwarf_cfa_execute_insns(unsigned char *insn_start,
397 				   unsigned char *insn_end,
398 				   struct dwarf_cie *cie,
399 				   struct dwarf_fde *fde,
400 				   struct dwarf_frame *frame,
401 				   unsigned long pc)
402 {
403 	unsigned char insn;
404 	unsigned char *current_insn;
405 	unsigned int count, delta, reg, expr_len, offset;
406 	struct dwarf_reg *regp;
407 
408 	current_insn = insn_start;
409 
410 	while (current_insn < insn_end && frame->pc <= pc) {
411 		insn = __raw_readb(current_insn++);
412 
413 		/*
414 		 * Firstly, handle the opcodes that embed their operands
415 		 * in the instructions.
416 		 */
417 		switch (DW_CFA_opcode(insn)) {
418 		case DW_CFA_advance_loc:
419 			delta = DW_CFA_operand(insn);
420 			delta *= cie->code_alignment_factor;
421 			frame->pc += delta;
422 			continue;
423 			/* NOTREACHED */
424 		case DW_CFA_offset:
425 			reg = DW_CFA_operand(insn);
426 			count = dwarf_read_uleb128(current_insn, &offset);
427 			current_insn += count;
428 			offset *= cie->data_alignment_factor;
429 			regp = dwarf_frame_alloc_reg(frame, reg);
430 			regp->addr = offset;
431 			regp->flags |= DWARF_REG_OFFSET;
432 			continue;
433 			/* NOTREACHED */
434 		case DW_CFA_restore:
435 			reg = DW_CFA_operand(insn);
436 			continue;
437 			/* NOTREACHED */
438 		}
439 
440 		/*
441 		 * Secondly, handle the opcodes that don't embed their
442 		 * operands in the instruction.
443 		 */
444 		switch (insn) {
445 		case DW_CFA_nop:
446 			continue;
447 		case DW_CFA_advance_loc1:
448 			delta = *current_insn++;
449 			frame->pc += delta * cie->code_alignment_factor;
450 			break;
451 		case DW_CFA_advance_loc2:
452 			delta = get_unaligned((u16 *)current_insn);
453 			current_insn += 2;
454 			frame->pc += delta * cie->code_alignment_factor;
455 			break;
456 		case DW_CFA_advance_loc4:
457 			delta = get_unaligned((u32 *)current_insn);
458 			current_insn += 4;
459 			frame->pc += delta * cie->code_alignment_factor;
460 			break;
461 		case DW_CFA_offset_extended:
462 			count = dwarf_read_uleb128(current_insn, &reg);
463 			current_insn += count;
464 			count = dwarf_read_uleb128(current_insn, &offset);
465 			current_insn += count;
466 			offset *= cie->data_alignment_factor;
467 			break;
468 		case DW_CFA_restore_extended:
469 			count = dwarf_read_uleb128(current_insn, &reg);
470 			current_insn += count;
471 			break;
472 		case DW_CFA_undefined:
473 			count = dwarf_read_uleb128(current_insn, &reg);
474 			current_insn += count;
475 			regp = dwarf_frame_alloc_reg(frame, reg);
476 			regp->flags |= DWARF_UNDEFINED;
477 			break;
478 		case DW_CFA_def_cfa:
479 			count = dwarf_read_uleb128(current_insn,
480 						   &frame->cfa_register);
481 			current_insn += count;
482 			count = dwarf_read_uleb128(current_insn,
483 						   &frame->cfa_offset);
484 			current_insn += count;
485 
486 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
487 			break;
488 		case DW_CFA_def_cfa_register:
489 			count = dwarf_read_uleb128(current_insn,
490 						   &frame->cfa_register);
491 			current_insn += count;
492 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
493 			break;
494 		case DW_CFA_def_cfa_offset:
495 			count = dwarf_read_uleb128(current_insn, &offset);
496 			current_insn += count;
497 			frame->cfa_offset = offset;
498 			break;
499 		case DW_CFA_def_cfa_expression:
500 			count = dwarf_read_uleb128(current_insn, &expr_len);
501 			current_insn += count;
502 
503 			frame->cfa_expr = current_insn;
504 			frame->cfa_expr_len = expr_len;
505 			current_insn += expr_len;
506 
507 			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
508 			break;
509 		case DW_CFA_offset_extended_sf:
510 			count = dwarf_read_uleb128(current_insn, &reg);
511 			current_insn += count;
512 			count = dwarf_read_leb128(current_insn, &offset);
513 			current_insn += count;
514 			offset *= cie->data_alignment_factor;
515 			regp = dwarf_frame_alloc_reg(frame, reg);
516 			regp->flags |= DWARF_REG_OFFSET;
517 			regp->addr = offset;
518 			break;
519 		case DW_CFA_val_offset:
520 			count = dwarf_read_uleb128(current_insn, &reg);
521 			current_insn += count;
522 			count = dwarf_read_leb128(current_insn, &offset);
523 			offset *= cie->data_alignment_factor;
524 			regp = dwarf_frame_alloc_reg(frame, reg);
525 			regp->flags |= DWARF_VAL_OFFSET;
526 			regp->addr = offset;
527 			break;
528 		case DW_CFA_GNU_args_size:
529 			count = dwarf_read_uleb128(current_insn, &offset);
530 			current_insn += count;
531 			break;
532 		case DW_CFA_GNU_negative_offset_extended:
533 			count = dwarf_read_uleb128(current_insn, &reg);
534 			current_insn += count;
535 			count = dwarf_read_uleb128(current_insn, &offset);
536 			offset *= cie->data_alignment_factor;
537 
538 			regp = dwarf_frame_alloc_reg(frame, reg);
539 			regp->flags |= DWARF_REG_OFFSET;
540 			regp->addr = -offset;
541 			break;
542 		default:
543 			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
544 			UNWINDER_BUG();
545 			break;
546 		}
547 	}
548 
549 	return 0;
550 }
551 
552 /**
553  *	dwarf_free_frame - free the memory allocated for @frame
554  *	@frame: the frame to free
555  */
dwarf_free_frame(struct dwarf_frame * frame)556 void dwarf_free_frame(struct dwarf_frame *frame)
557 {
558 	dwarf_frame_free_regs(frame);
559 	mempool_free(frame, dwarf_frame_pool);
560 }
561 
562 extern void ret_from_irq(void);
563 
564 /**
565  *	dwarf_unwind_stack - unwind the stack
566  *
567  *	@pc: address of the function to unwind
568  *	@prev: struct dwarf_frame of the previous stackframe on the callstack
569  *
570  *	Return a struct dwarf_frame representing the most recent frame
571  *	on the callstack. Each of the lower (older) stack frames are
572  *	linked via the "prev" member.
573  */
dwarf_unwind_stack(unsigned long pc,struct dwarf_frame * prev)574 struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
575 				       struct dwarf_frame *prev)
576 {
577 	struct dwarf_frame *frame;
578 	struct dwarf_cie *cie;
579 	struct dwarf_fde *fde;
580 	struct dwarf_reg *reg;
581 	unsigned long addr;
582 
583 	/*
584 	 * If we've been called in to before initialization has
585 	 * completed, bail out immediately.
586 	 */
587 	if (!dwarf_unwinder_ready)
588 		return NULL;
589 
590 	/*
591 	 * If we're starting at the top of the stack we need get the
592 	 * contents of a physical register to get the CFA in order to
593 	 * begin the virtual unwinding of the stack.
594 	 *
595 	 * NOTE: the return address is guaranteed to be setup by the
596 	 * time this function makes its first function call.
597 	 */
598 	if (!pc || !prev)
599 		pc = _THIS_IP_;
600 
601 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
602 	/*
603 	 * If our stack has been patched by the function graph tracer
604 	 * then we might see the address of return_to_handler() where we
605 	 * expected to find the real return address.
606 	 */
607 	if (pc == (unsigned long)&return_to_handler) {
608 		struct ftrace_ret_stack *ret_stack;
609 
610 		ret_stack = ftrace_graph_get_ret_stack(current, 0);
611 		if (ret_stack)
612 			pc = ret_stack->ret;
613 		/*
614 		 * We currently have no way of tracking how many
615 		 * return_to_handler()'s we've seen. If there is more
616 		 * than one patched return address on our stack,
617 		 * complain loudly.
618 		 */
619 		WARN_ON(ftrace_graph_get_ret_stack(current, 1));
620 	}
621 #endif
622 
623 	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
624 	if (!frame) {
625 		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
626 		UNWINDER_BUG();
627 	}
628 
629 	INIT_LIST_HEAD(&frame->reg_list);
630 	frame->flags = 0;
631 	frame->prev = prev;
632 	frame->return_addr = 0;
633 
634 	fde = dwarf_lookup_fde(pc);
635 	if (!fde) {
636 		/*
637 		 * This is our normal exit path. There are two reasons
638 		 * why we might exit here,
639 		 *
640 		 *	a) pc has no asscociated DWARF frame info and so
641 		 *	we don't know how to unwind this frame. This is
642 		 *	usually the case when we're trying to unwind a
643 		 *	frame that was called from some assembly code
644 		 *	that has no DWARF info, e.g. syscalls.
645 		 *
646 		 *	b) the DEBUG info for pc is bogus. There's
647 		 *	really no way to distinguish this case from the
648 		 *	case above, which sucks because we could print a
649 		 *	warning here.
650 		 */
651 		goto bail;
652 	}
653 
654 	cie = dwarf_lookup_cie(fde->cie_pointer);
655 
656 	frame->pc = fde->initial_location;
657 
658 	/* CIE initial instructions */
659 	dwarf_cfa_execute_insns(cie->initial_instructions,
660 				cie->instructions_end, cie, fde,
661 				frame, pc);
662 
663 	/* FDE instructions */
664 	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
665 				fde, frame, pc);
666 
667 	/* Calculate the CFA */
668 	switch (frame->flags) {
669 	case DWARF_FRAME_CFA_REG_OFFSET:
670 		if (prev) {
671 			reg = dwarf_frame_reg(prev, frame->cfa_register);
672 			UNWINDER_BUG_ON(!reg);
673 			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
674 
675 			addr = prev->cfa + reg->addr;
676 			frame->cfa = __raw_readl(addr);
677 
678 		} else {
679 			/*
680 			 * Again, we're starting from the top of the
681 			 * stack. We need to physically read
682 			 * the contents of a register in order to get
683 			 * the Canonical Frame Address for this
684 			 * function.
685 			 */
686 			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
687 		}
688 
689 		frame->cfa += frame->cfa_offset;
690 		break;
691 	default:
692 		UNWINDER_BUG();
693 	}
694 
695 	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
696 
697 	/*
698 	 * If we haven't seen the return address register or the return
699 	 * address column is undefined then we must assume that this is
700 	 * the end of the callstack.
701 	 */
702 	if (!reg || reg->flags == DWARF_UNDEFINED)
703 		goto bail;
704 
705 	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
706 
707 	addr = frame->cfa + reg->addr;
708 	frame->return_addr = __raw_readl(addr);
709 
710 	/*
711 	 * Ah, the joys of unwinding through interrupts.
712 	 *
713 	 * Interrupts are tricky - the DWARF info needs to be _really_
714 	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
715 	 * info. For example, I've seen interrupts occur in epilogues
716 	 * just after the frame pointer (r14) had been restored. The
717 	 * problem was that the DWARF info claimed that the CFA could be
718 	 * reached by using the value of the frame pointer before it was
719 	 * restored.
720 	 *
721 	 * So until the compiler can be trusted to produce reliable
722 	 * DWARF info when it really matters, let's stop unwinding once
723 	 * we've calculated the function that was interrupted.
724 	 */
725 	if (prev && prev->pc == (unsigned long)ret_from_irq)
726 		frame->return_addr = 0;
727 
728 	return frame;
729 
730 bail:
731 	dwarf_free_frame(frame);
732 	return NULL;
733 }
734 
dwarf_parse_cie(void * entry,void * p,unsigned long len,unsigned char * end,struct module * mod)735 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
736 			   unsigned char *end, struct module *mod)
737 {
738 	struct rb_node **rb_node = &cie_root.rb_node;
739 	struct rb_node *parent = *rb_node;
740 	struct dwarf_cie *cie;
741 	unsigned long flags;
742 	int count;
743 
744 	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
745 	if (!cie)
746 		return -ENOMEM;
747 
748 	cie->length = len;
749 
750 	/*
751 	 * Record the offset into the .eh_frame section
752 	 * for this CIE. It allows this CIE to be
753 	 * quickly and easily looked up from the
754 	 * corresponding FDE.
755 	 */
756 	cie->cie_pointer = (unsigned long)entry;
757 
758 	cie->version = *(char *)p++;
759 	UNWINDER_BUG_ON(cie->version != 1);
760 
761 	cie->augmentation = p;
762 	p += strlen(cie->augmentation) + 1;
763 
764 	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
765 	p += count;
766 
767 	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
768 	p += count;
769 
770 	/*
771 	 * Which column in the rule table contains the
772 	 * return address?
773 	 */
774 	if (cie->version == 1) {
775 		cie->return_address_reg = __raw_readb(p);
776 		p++;
777 	} else {
778 		count = dwarf_read_uleb128(p, &cie->return_address_reg);
779 		p += count;
780 	}
781 
782 	if (cie->augmentation[0] == 'z') {
783 		unsigned int length, count;
784 		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
785 
786 		count = dwarf_read_uleb128(p, &length);
787 		p += count;
788 
789 		UNWINDER_BUG_ON((unsigned char *)p > end);
790 
791 		cie->initial_instructions = p + length;
792 		cie->augmentation++;
793 	}
794 
795 	while (*cie->augmentation) {
796 		/*
797 		 * "L" indicates a byte showing how the
798 		 * LSDA pointer is encoded. Skip it.
799 		 */
800 		if (*cie->augmentation == 'L') {
801 			p++;
802 			cie->augmentation++;
803 		} else if (*cie->augmentation == 'R') {
804 			/*
805 			 * "R" indicates a byte showing
806 			 * how FDE addresses are
807 			 * encoded.
808 			 */
809 			cie->encoding = *(char *)p++;
810 			cie->augmentation++;
811 		} else if (*cie->augmentation == 'P') {
812 			/*
813 			 * "R" indicates a personality
814 			 * routine in the CIE
815 			 * augmentation.
816 			 */
817 			UNWINDER_BUG();
818 		} else if (*cie->augmentation == 'S') {
819 			UNWINDER_BUG();
820 		} else {
821 			/*
822 			 * Unknown augmentation. Assume
823 			 * 'z' augmentation.
824 			 */
825 			p = cie->initial_instructions;
826 			UNWINDER_BUG_ON(!p);
827 			break;
828 		}
829 	}
830 
831 	cie->initial_instructions = p;
832 	cie->instructions_end = end;
833 
834 	/* Add to list */
835 	spin_lock_irqsave(&dwarf_cie_lock, flags);
836 
837 	while (*rb_node) {
838 		struct dwarf_cie *cie_tmp;
839 
840 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
841 
842 		parent = *rb_node;
843 
844 		if (cie->cie_pointer < cie_tmp->cie_pointer)
845 			rb_node = &parent->rb_left;
846 		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
847 			rb_node = &parent->rb_right;
848 		else
849 			WARN_ON(1);
850 	}
851 
852 	rb_link_node(&cie->node, parent, rb_node);
853 	rb_insert_color(&cie->node, &cie_root);
854 
855 #ifdef CONFIG_MODULES
856 	if (mod != NULL)
857 		list_add_tail(&cie->link, &mod->arch.cie_list);
858 #endif
859 
860 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
861 
862 	return 0;
863 }
864 
dwarf_parse_fde(void * entry,u32 entry_type,void * start,unsigned long len,unsigned char * end,struct module * mod)865 static int dwarf_parse_fde(void *entry, u32 entry_type,
866 			   void *start, unsigned long len,
867 			   unsigned char *end, struct module *mod)
868 {
869 	struct rb_node **rb_node = &fde_root.rb_node;
870 	struct rb_node *parent = *rb_node;
871 	struct dwarf_fde *fde;
872 	struct dwarf_cie *cie;
873 	unsigned long flags;
874 	int count;
875 	void *p = start;
876 
877 	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
878 	if (!fde)
879 		return -ENOMEM;
880 
881 	fde->length = len;
882 
883 	/*
884 	 * In a .eh_frame section the CIE pointer is the
885 	 * delta between the address within the FDE
886 	 */
887 	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
888 
889 	cie = dwarf_lookup_cie(fde->cie_pointer);
890 	fde->cie = cie;
891 
892 	if (cie->encoding)
893 		count = dwarf_read_encoded_value(p, &fde->initial_location,
894 						 cie->encoding);
895 	else
896 		count = dwarf_read_addr(p, &fde->initial_location);
897 
898 	p += count;
899 
900 	if (cie->encoding)
901 		count = dwarf_read_encoded_value(p, &fde->address_range,
902 						 cie->encoding & 0x0f);
903 	else
904 		count = dwarf_read_addr(p, &fde->address_range);
905 
906 	p += count;
907 
908 	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
909 		unsigned int length;
910 		count = dwarf_read_uleb128(p, &length);
911 		p += count + length;
912 	}
913 
914 	/* Call frame instructions. */
915 	fde->instructions = p;
916 	fde->end = end;
917 
918 	/* Add to list. */
919 	spin_lock_irqsave(&dwarf_fde_lock, flags);
920 
921 	while (*rb_node) {
922 		struct dwarf_fde *fde_tmp;
923 		unsigned long tmp_start, tmp_end;
924 		unsigned long start, end;
925 
926 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
927 
928 		start = fde->initial_location;
929 		end = fde->initial_location + fde->address_range;
930 
931 		tmp_start = fde_tmp->initial_location;
932 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
933 
934 		parent = *rb_node;
935 
936 		if (start < tmp_start)
937 			rb_node = &parent->rb_left;
938 		else if (start >= tmp_end)
939 			rb_node = &parent->rb_right;
940 		else
941 			WARN_ON(1);
942 	}
943 
944 	rb_link_node(&fde->node, parent, rb_node);
945 	rb_insert_color(&fde->node, &fde_root);
946 
947 #ifdef CONFIG_MODULES
948 	if (mod != NULL)
949 		list_add_tail(&fde->link, &mod->arch.fde_list);
950 #endif
951 
952 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
953 
954 	return 0;
955 }
956 
dwarf_unwinder_dump(struct task_struct * task,struct pt_regs * regs,unsigned long * sp,const struct stacktrace_ops * ops,void * data)957 static void dwarf_unwinder_dump(struct task_struct *task,
958 				struct pt_regs *regs,
959 				unsigned long *sp,
960 				const struct stacktrace_ops *ops,
961 				void *data)
962 {
963 	struct dwarf_frame *frame, *_frame;
964 	unsigned long return_addr;
965 
966 	_frame = NULL;
967 	return_addr = 0;
968 
969 	while (1) {
970 		frame = dwarf_unwind_stack(return_addr, _frame);
971 
972 		if (_frame)
973 			dwarf_free_frame(_frame);
974 
975 		_frame = frame;
976 
977 		if (!frame || !frame->return_addr)
978 			break;
979 
980 		return_addr = frame->return_addr;
981 		ops->address(data, return_addr, 1);
982 	}
983 
984 	if (frame)
985 		dwarf_free_frame(frame);
986 }
987 
988 static struct unwinder dwarf_unwinder = {
989 	.name = "dwarf-unwinder",
990 	.dump = dwarf_unwinder_dump,
991 	.rating = 150,
992 };
993 
dwarf_unwinder_cleanup(void)994 static void __init dwarf_unwinder_cleanup(void)
995 {
996 	struct dwarf_fde *fde, *next_fde;
997 	struct dwarf_cie *cie, *next_cie;
998 
999 	/*
1000 	 * Deallocate all the memory allocated for the DWARF unwinder.
1001 	 * Traverse all the FDE/CIE lists and remove and free all the
1002 	 * memory associated with those data structures.
1003 	 */
1004 	rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node)
1005 		kfree(fde);
1006 
1007 	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
1008 		kfree(cie);
1009 
1010 	mempool_destroy(dwarf_reg_pool);
1011 	mempool_destroy(dwarf_frame_pool);
1012 	kmem_cache_destroy(dwarf_reg_cachep);
1013 	kmem_cache_destroy(dwarf_frame_cachep);
1014 }
1015 
1016 /**
1017  *	dwarf_parse_section - parse DWARF section
1018  *	@eh_frame_start: start address of the .eh_frame section
1019  *	@eh_frame_end: end address of the .eh_frame section
1020  *	@mod: the kernel module containing the .eh_frame section
1021  *
1022  *	Parse the information in a .eh_frame section.
1023  */
dwarf_parse_section(char * eh_frame_start,char * eh_frame_end,struct module * mod)1024 static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1025 			       struct module *mod)
1026 {
1027 	u32 entry_type;
1028 	void *p, *entry;
1029 	int count, err = 0;
1030 	unsigned long len = 0;
1031 	unsigned int c_entries, f_entries;
1032 	unsigned char *end;
1033 
1034 	c_entries = 0;
1035 	f_entries = 0;
1036 	entry = eh_frame_start;
1037 
1038 	while ((char *)entry < eh_frame_end) {
1039 		p = entry;
1040 
1041 		count = dwarf_entry_len(p, &len);
1042 		if (count == 0) {
1043 			/*
1044 			 * We read a bogus length field value. There is
1045 			 * nothing we can do here apart from disabling
1046 			 * the DWARF unwinder. We can't even skip this
1047 			 * entry and move to the next one because 'len'
1048 			 * tells us where our next entry is.
1049 			 */
1050 			err = -EINVAL;
1051 			goto out;
1052 		} else
1053 			p += count;
1054 
1055 		/* initial length does not include itself */
1056 		end = p + len;
1057 
1058 		entry_type = get_unaligned((u32 *)p);
1059 		p += 4;
1060 
1061 		if (entry_type == DW_EH_FRAME_CIE) {
1062 			err = dwarf_parse_cie(entry, p, len, end, mod);
1063 			if (err < 0)
1064 				goto out;
1065 			else
1066 				c_entries++;
1067 		} else {
1068 			err = dwarf_parse_fde(entry, entry_type, p, len,
1069 					      end, mod);
1070 			if (err < 0)
1071 				goto out;
1072 			else
1073 				f_entries++;
1074 		}
1075 
1076 		entry = (char *)entry + len + 4;
1077 	}
1078 
1079 	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1080 	       c_entries, f_entries);
1081 
1082 	return 0;
1083 
1084 out:
1085 	return err;
1086 }
1087 
1088 #ifdef CONFIG_MODULES
module_dwarf_finalize(const Elf_Ehdr * hdr,const Elf_Shdr * sechdrs,struct module * me)1089 int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1090 			  struct module *me)
1091 {
1092 	unsigned int i, err;
1093 	unsigned long start, end;
1094 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1095 
1096 	start = end = 0;
1097 
1098 	for (i = 1; i < hdr->e_shnum; i++) {
1099 		/* Alloc bit cleared means "ignore it." */
1100 		if ((sechdrs[i].sh_flags & SHF_ALLOC)
1101 		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1102 			start = sechdrs[i].sh_addr;
1103 			end = start + sechdrs[i].sh_size;
1104 			break;
1105 		}
1106 	}
1107 
1108 	/* Did we find the .eh_frame section? */
1109 	if (i != hdr->e_shnum) {
1110 		INIT_LIST_HEAD(&me->arch.cie_list);
1111 		INIT_LIST_HEAD(&me->arch.fde_list);
1112 		err = dwarf_parse_section((char *)start, (char *)end, me);
1113 		if (err) {
1114 			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1115 			       me->name);
1116 			return err;
1117 		}
1118 	}
1119 
1120 	return 0;
1121 }
1122 
1123 /**
1124  *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1125  *	@mod: the module that is being unloaded
1126  *
1127  *	Remove any FDEs and CIEs from the global lists that came from
1128  *	@mod's .eh_frame section because @mod is being unloaded.
1129  */
module_dwarf_cleanup(struct module * mod)1130 void module_dwarf_cleanup(struct module *mod)
1131 {
1132 	struct dwarf_fde *fde, *ftmp;
1133 	struct dwarf_cie *cie, *ctmp;
1134 	unsigned long flags;
1135 
1136 	spin_lock_irqsave(&dwarf_cie_lock, flags);
1137 
1138 	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1139 		list_del(&cie->link);
1140 		rb_erase(&cie->node, &cie_root);
1141 		kfree(cie);
1142 	}
1143 
1144 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1145 
1146 	spin_lock_irqsave(&dwarf_fde_lock, flags);
1147 
1148 	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1149 		list_del(&fde->link);
1150 		rb_erase(&fde->node, &fde_root);
1151 		kfree(fde);
1152 	}
1153 
1154 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1155 }
1156 #endif /* CONFIG_MODULES */
1157 
1158 /**
1159  *	dwarf_unwinder_init - initialise the dwarf unwinder
1160  *
1161  *	Build the data structures describing the .dwarf_frame section to
1162  *	make it easier to lookup CIE and FDE entries. Because the
1163  *	.eh_frame section is packed as tightly as possible it is not
1164  *	easy to lookup the FDE for a given PC, so we build a list of FDE
1165  *	and CIE entries that make it easier.
1166  */
dwarf_unwinder_init(void)1167 static int __init dwarf_unwinder_init(void)
1168 {
1169 	int err = -ENOMEM;
1170 
1171 	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1172 			sizeof(struct dwarf_frame), 0,
1173 			SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
1174 
1175 	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1176 			sizeof(struct dwarf_reg), 0,
1177 			SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
1178 
1179 	dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
1180 						    dwarf_frame_cachep);
1181 	if (!dwarf_frame_pool)
1182 		goto out;
1183 
1184 	dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ,
1185 						  dwarf_reg_cachep);
1186 	if (!dwarf_reg_pool)
1187 		goto out;
1188 
1189 	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1190 	if (err)
1191 		goto out;
1192 
1193 	err = unwinder_register(&dwarf_unwinder);
1194 	if (err)
1195 		goto out;
1196 
1197 	dwarf_unwinder_ready = 1;
1198 
1199 	return 0;
1200 
1201 out:
1202 	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1203 	dwarf_unwinder_cleanup();
1204 	return err;
1205 }
1206 early_initcall(dwarf_unwinder_init);
1207