1#!/usr/bin/env python 2 3# Copyright 2016 The Fuchsia Authors 4# 5# Use of this source code is governed by a MIT-style 6# license that can be found in the LICENSE file or at 7# https://opensource.org/licenses/MIT 8 9""" 10 11This tool will symbolize a crash from Zircon's crash logger, adding 12function names and, if available, source code locations (filenames and 13line numbers from debug info). 14 15Example usage #1: 16 ./scripts/run-zircon -a x64 | ./scripts/symbolize devmgr.elf --build-dir=build-x64 17 18Example usage #2: 19 ./scripts/symbolize devmgr.elf --build-dir=build-x64 20 <copy and paste output from Zircon> 21 22Example usage #3 (for zircon kernel output): 23 ./scripts/symbolize --build-dir=build-x64 24 <copy and paste output from Zircon> 25 26""" 27 28import argparse 29import errno 30import os 31import re 32import subprocess 33import sys 34 35SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) 36PREBUILTS_BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(SCRIPT_DIR), "prebuilt", 37 "downloads")) 38GCC_VERSION = '6.3.0' 39name_to_full_path = {} 40debug_mode = False 41 42# Paths to various external tools can be provided on the command line. 43# If these are None then use the prebuilt location. 44addr2line_tool_path = None 45gdb_tool_path = None 46 47 48def find_func(find_args, dirname, names): 49 if find_args["path"] != "": # found something! 50 return 51 if dirname.find("sysroot") != -1: 52 return 53 for name in names: 54 if name == find_args["name"]: 55 find_args["path"] = dirname 56 return 57 58 59def find_file_in_build_dir(name, build_dirs): 60 find_args = {"name": name, "path": ""} 61 for location in build_dirs: 62 os.path.walk(location, find_func, find_args) 63 if find_args["path"] != "": 64 return os.path.abspath(os.path.join(find_args["path"], name)) 65 return None 66 67 68def buildid_to_full_path(buildid, build_dirs): 69 for build_dir in build_dirs: 70 id_file_path = os.path.join(build_dir, "ids.txt") 71 if os.path.exists(id_file_path): 72 with open(id_file_path) as id_file: 73 for line in id_file: 74 id, path = line.split() 75 if id == buildid: 76 return path 77 return None 78 79 80def find_file_in_boot_manifest(boot_app_name, build_dirs): 81 manifest_path = find_file_in_build_dir("bootfs.manifest", build_dirs) 82 if manifest_path: 83 with open(manifest_path) as manifest_file: 84 for line in manifest_file: 85 out_path, in_path = line.rstrip().split("=") 86 if out_path == boot_app_name: 87 if in_path.endswith(".strip"): 88 in_path = in_path[:-len(".strip")] 89 return in_path 90 return None 91 92 93def find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs): 94 if dso in name_to_buildid: 95 found_path = buildid_to_full_path(name_to_buildid[dso], build_dirs) 96 if found_path: 97 return found_path 98 # This can be a bug in the generation of the ids.txt file, report it. 99 # It's not necessarily a bug though, so for now only report in debug mode. 100 if debug_mode: 101 print "WARNING: Unable to find %s in any ids.txt file." % dso 102 103 # The name 'app' indicates the real app name is unknown. 104 # If the process has a name property that will be printed, but 105 # it has a max of 32 characters so it may be insufficient. 106 # Crashlogger prefixes such names with "app:" for our benefit. 107 if dso == "app" or dso.startswith("app:"): 108 # If an executable was passed on the command-line, try using that 109 if exe_name: 110 found_path = find_file_in_build_dir(exe_name, build_dirs) 111 if found_path: 112 return found_path 113 114 # If this looks like a program in boot fs, consult the manifest 115 if dso.startswith("app:/boot/"): 116 boot_app_name = dso[len("app:/boot/"):] 117 found_path = find_file_in_boot_manifest(boot_app_name, build_dirs) 118 if found_path: 119 return found_path 120 return None 121 122 # First, try an exact match for the filename 123 found_path = find_file_in_build_dir(dso, build_dirs) 124 if not found_path: 125 # If that fails, and this file doesn't end with .so, try the executable 126 # name 127 if not dso.endswith(".so"): 128 found_path = find_file_in_build_dir(exe_name, build_dirs) 129 if not found_path: 130 # If that still fails and this looks like an absolute path, try the 131 # last path component 132 if dso.startswith("/"): 133 short_name = dso[dso.rfind("/"):] 134 found_path = find_file_in_build_dir(short_name, build_dirs) 135 return found_path 136 137 138def find_dso_full_path(dso, exe_name, name_to_buildid, build_dirs): 139 if dso in name_to_full_path: 140 return name_to_full_path[dso] 141 found_path = find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs) 142 if found_path: 143 name_to_full_path[dso] = found_path 144 return found_path 145 146 147def tool_path(arch, tool, user_provided_path): 148 if user_provided_path is not None: 149 return user_provided_path 150 return ("%s/gcc/bin/%s-elf-%s" % 151 (PREBUILTS_BASE_DIR, arch, tool)) 152 153 154def run_tool(path, *args): 155 cmd = [path] + list(args) 156 if debug_mode: 157 print "Running: %s" % " ".join(cmd) 158 try: 159 output = subprocess.check_output(cmd) 160 except Exception as e: 161 tool = os.path.basename(path) 162 print "Calling %s failed: command %s error %s" % (tool, cmd, e) 163 return False 164 return output.rstrip() 165 166 167# Note: addr2line requires hex addresses. 168# |addr_as_hex_string| must already be PIE-adjusted. 169def run_addr2line(arch, elf_path, addr_as_hex_string): 170 path = tool_path(arch, "addr2line", addr2line_tool_path) 171 return run_tool(path, "-Cipfe", elf_path, addr_as_hex_string) 172 173 174# The caller passes in a list of arguments, this is not a varargs function. 175def run_gdb(arch, arguments): 176 path = tool_path(arch, "gdb", gdb_tool_path) 177 return run_tool(path, *arguments) 178 179 180GDB_ARCH_LUT = { "x86_64": "i386:x86-64", 181 "aarch64": "aarch64" } 182def get_gdb_set_arch_cmd(arch): 183 gdb_arch = GDB_ARCH_LUT[arch] 184 return "set arch %s" % gdb_arch 185 186 187def get_call_location(arch, elf_path, addr_as_hex_string): 188 # Subtract 1 to convert from a return address to a call site 189 # address. (To be more exact, this converts to an address that 190 # is within the call site instruction.) This adjustment gives 191 # more correct results in the presence of inlining and 192 # 'noreturn' functions. (See ZX-842.) 193 call_addr = "0x%x" % (int(addr_as_hex_string, 16) - 1) 194 return run_addr2line(arch, elf_path, call_addr) 195 196 197# On BSD platforms there are cases where writing to stdout can return EAGAIN. 198# In that event, retry the line again. This only manifests itself when piping 199# qemu's stdout directly to this script. 200def writelines(lines): 201 for line in lines: 202 writeline(line) 203 204 205def writeline(line): 206 while True: 207 try: 208 sys.stdout.write(line + "\n") 209 except IOError as e: 210 if e.errno == errno.EAGAIN: 211 continue 212 break 213 214 215# Offset the address based on binary code start and bias 216# Return same type than input 217def kaslr_offset(addr, code_start, bias): 218 if not code_start or not bias: 219 return addr 220 is_string = isinstance(addr, str) 221 if is_string: 222 addr = int(addr, 16) 223 addr -= bias - code_start 224 if is_string: 225 return '%x' % addr 226 return addr 227 228ARCH_REMAP_LUT = { 'x86_64' : 'x64', 229 'aarch64' : 'arm64' 230 } 231def choose_build_dirs(cli_args, arch): 232 arch = ARCH_REMAP_LUT.get(arch, arch) 233 zircon_build_dir = os.path.join( 234 os.path.dirname(SCRIPT_DIR), "build-%s" % (arch, )) 235 if not os.path.exists(zircon_build_dir): 236 zircon_build_dir = os.path.join( 237 os.path.dirname(SCRIPT_DIR), os.pardir, "out", "build-zircon", "build-%s" % (arch, )) 238 build_dirs = [zircon_build_dir] 239 if cli_args.build_dir: 240 build_dirs = cli_args.build_dir + build_dirs 241 else: 242 # Put the unstripped path ahead of the stripped one, we want the 243 # former searched first. This does mean the unstripped directory 244 # will get searched twice, but relative to the entire search time, 245 # the addition is small. 246 # Plus once a file is found its location is cached. 247 # Plus this is only used as a fallback in case the file isn't found 248 # in ids.txt. 249 fuchsia_build_dir = os.path.abspath(os.path.join( 250 os.path.dirname(SCRIPT_DIR), os.pardir, "out", arch)) 251 fuchsia_unstripped_dir = os.path.join(fuchsia_build_dir, "exe.unstripped") 252 build_dirs = [fuchsia_unstripped_dir, fuchsia_build_dir] + build_dirs 253 return build_dirs 254 255def get_zircon_source_dir(): 256 zircon_source_dir = os.path.join( 257 os.path.dirname(SCRIPT_DIR), os.pardir, "zircon") 258 return zircon_source_dir 259 260def parse_args(): 261 parser = argparse.ArgumentParser( 262 description=__doc__, 263 formatter_class=argparse.RawDescriptionHelpFormatter) 264 parser.add_argument("--file", "-f", nargs="?", type=argparse.FileType("r"), 265 default=sys.stdin, 266 help="File to read from, stdin by default") 267 parser.add_argument("--build-dir", "-b", nargs="*", 268 help="List of build directories to search instead of the default (out/x64)") 269 parser.add_argument("--disassemble", "-d", action="store_true", 270 help="Show disassembly of each function") 271 parser.add_argument("--source", "-S", action="store_true", 272 help="Include source in the disassembly (requires -d)") 273 parser.add_argument("--stack_size", "-s", type=int, 274 default=256*1024, 275 help="Change the assumed size of the stack (e.g. use 1048576 for ftl or " 276 "mtl default thread size") 277 parser.add_argument("--echo", dest="echo", action="store_true", 278 help="Echo lines of input (on by default)") 279 parser.add_argument("--no-echo", dest="echo", action="store_false", 280 help="Don't echo lines of input") 281 parser.add_argument("--debug", "-D", action="store_true", 282 help="Print messages for debugging symbolize") 283 parser.add_argument("--addr2line", default=None, 284 help="Path of addr2line program to use") 285 parser.add_argument("--gdb", default=None, 286 help="Path of gdb program to use") 287 parser.add_argument("app", nargs="?", help="Name of primary application") 288 parser.set_defaults(echo=True) 289 return parser.parse_args() 290 291class Symbolizer: 292 # Regex for parsing 293 # Full prefix can be: 294 # - nothing 295 # - something like "[00007.268] 00304.00325> " 296 # - something like "[00041.807507][1105][1119][klog] INFO: " 297 full_prefix = "^(|\[\d+\.\d+\] \d+\.\d+> ?|\[\d{5}\.\d{6}\]\[\d+\]\[\d+\]\[[\w ,]+\] INFO: ?)" 298 btre = re.compile(full_prefix + "bt#(\d+):") 299 bt_with_offset_re = re.compile(full_prefix + 300 "bt#(\d+): pc 0x[0-9a-f]+ sp (0x[0-9a-f]+) \(([^,]+),(0x[0-9a-f]+)\)$") 301 bt_end_re = re.compile(full_prefix + "bt#(\d+): end") 302 arch_re = re.compile(full_prefix + "arch: ([\\S]+)$") 303 build_id_re = re.compile(full_prefix + 304 "(?:dlsvc: debug: )?dso: id=([0-9a-z]+) base=(0x[0-9a-f]+) name=(.+)$") 305 disasm_re = re.compile("^ *(0x[0-9a-f]+)( .+)$") 306 307 # Zircon backtraces 308 zircon_crash_re = re.compile(full_prefix + "ZIRCON KERNEL PANIC$") 309 # TODO(cja): Add ARM to the regex 310 zircon_pc_re = re.compile("RIP: (0x[0-9a-z]+)") 311 zircon_bt_re = re.compile(full_prefix + 312 "bt#(\d+): (\dx[0-9a-fA-F]+)$") 313 zircon_nm_codestart = re.compile('^([a-f0-9]+) t __code_start$', re.M) 314 315 # ASAN backtraces 316 asan_bt_re = re.compile(full_prefix + "\{\{\{bt:(\d+):(0x[0-9a-f]+)\}\}\}") 317 asan_bt_end_re = re.compile(full_prefix + "$") 318 319 def __init__(self, args): 320 self.args = args 321 self.arch = "x86_64" 322 self.build_dirs = choose_build_dirs(self.args, self.arch) 323 self.name_to_buildid = {} 324 self.bias_to_name = {} 325 self.reset() 326 327 def reset(self): 328 self.bias = 0 329 self.processed_lines = [] 330 self.prev_sp = None 331 self.prev_frame_num = None 332 self.frame_sizes = [] 333 self.total_stack_size = 0 334 # If True and we see a dso line, start over collecting the list. 335 self.done_dso_list = True 336 337 self.zircon_elf_path = '' 338 self.zircon_code_start = None 339 self.zircon_bt = False 340 self.zircon_pc = '' 341 342 self.asan_bt = False 343 344 def write_processed_lines(self): 345 writeline("\nstart of symbolized stack:") 346 writelines(self.processed_lines) 347 writeline("end of symbolized stack\n") 348 349 if self.total_stack_size > self.args.stack_size - 8*1024: 350 if self.total_stack_size >= self.args.stack_size: 351 message = "Overflowed stack" 352 else: 353 message = "Potentially overflowed stack" 354 writeline("WARNING: %s (total usage: %d, stack size: %d)" % 355 (message, self.total_stack_size, self.args.stack_size)) 356 for frame, size in self.frame_sizes: 357 writeline("#%s: %d bytes" % (frame, size)) 358 self.reset() 359 360 # If the architecture has changed, choose our new build dirs and 361 # clear our DSO cache. We may be symbolizing the output of 362 # logserver, and it may be reasonable for the target architecture to 363 # be changing as the user boots and tests on different 364 # architectures. 365 def update_arch(self, m): 366 new_arch = m.group(2) 367 if (self.arch != new_arch): 368 self.arch = new_arch 369 global name_to_full_path 370 name_to_full_path = {} 371 self.build_dirs = choose_build_dirs(self.args, self.arch) 372 373 def update_build_id(self, m): 374 if self.done_dso_list: 375 self.name_to_buildid = {} 376 self.bias_to_name = {} 377 self.done_dso_list = False 378 buildid = m.group(2) 379 self.bias = int(m.group(3), 16) 380 name = m.group(4) 381 self.name_to_buildid[name] = buildid 382 self.bias_to_name[self.bias] = name 383 384 if self.zircon_code_start and self.zircon_code_start != self.bias: 385 if self.zircon_code_start < self.bias: 386 diff = self.bias - self.zircon_code_start 387 c = '+' 388 else: 389 diff = self.zircon_code_start - self.bias 390 c = '-' 391 writeline('kaslr offset is %c0x%x' % (c, diff)) 392 393 def process_bt(self, m, frame_num): 394 sp = int(m.group(3), 16) 395 if self.prev_sp is not None: 396 frame_size = sp - self.prev_sp 397 self.total_stack_size += frame_size 398 self.frame_sizes.append((self.prev_frame_num, frame_size)) 399 self.prev_sp = sp 400 self.prev_frame_num = frame_num 401 402 dso = m.group(4) 403 off = m.group(5) 404 405 # Adapt offset for KASLR move 406 off = kaslr_offset(off, self.zircon_code_start, self.bias) 407 408 dso_full_path = find_dso_full_path( 409 dso, self.args.app, self.name_to_buildid, self.build_dirs) 410 if not dso_full_path: 411 # can't find dso_full_path 412 self.processed_lines.append("#%s: unknown, can't find full path for %s" % 413 (frame_num, dso)) 414 return 415 416 call_loc = get_call_location(self.arch, dso_full_path, off) 417 if call_loc: 418 self.processed_lines.append( 419 "#%s: %s" % (frame_num, call_loc)) 420 if self.args.disassemble: 421 pc = int(off, 16) 422 # GDB can get confused what the default arch should be. 423 # Cope by explicitly setting it. 424 gdb_set_arch_cmd = get_gdb_set_arch_cmd(self.arch) 425 run_gdb_options = [ "--nx", "--batch", 426 "-ex", gdb_set_arch_cmd ] 427 if self.args.source: 428 gdb_source_search_dirs = self.build_dirs 429 gdb_source_search_dirs.append(get_zircon_source_dir()) 430 run_gdb_options += [ "-ex", "dir %s" % ( 431 ":".join(gdb_source_search_dirs)) ] 432 disassemble_cmd = "disassemble %s %#x" % ( 433 "/s" if self.args.source else "", pc) 434 run_gdb_options += [ "-ex", disassemble_cmd, dso_full_path ] 435 disassembly = run_gdb(self.arch, run_gdb_options) 436 if disassembly: 437 for line in disassembly.splitlines(): 438 m = Symbolizer.disasm_re.match(line) 439 if not m: 440 # If we're printing source, include these lines. 441 if self.args.source: 442 self.processed_lines.append(line) 443 continue 444 addr, rest = m.groups() 445 addr = int(addr, 16) 446 if addr == pc: 447 prefix = "=> " 448 else: 449 prefix = " " 450 line = "%s%#.16x%s" % (prefix, self.bias + addr, rest) 451 self.processed_lines.append(line) 452 453 def update_zircon(self): 454 self.zircon_elf_path = find_file_in_build_dir("zircon.elf", self.build_dirs) 455 if not self.zircon_elf_path: 456 sys.stderr.write("Symbolize could not find the zircon elf binary. Perhaps you need " 457 "to build zircon or specify the build directory with -b?\n") 458 return 459 self.zircon_bt = True 460 nm_result = run_tool(self.arch, "nm", self.zircon_elf_path) 461 m = Symbolizer.zircon_nm_codestart.search(nm_result) 462 if not m: 463 sys.stderr.write("Failed to find __code_start from nm") 464 return 465 self.zircon_code_start = int(m.group(1), 16) 466 467 # In the case of inlined methods, it is more readable if the 468 # inlined lines are aligned to be to the right of "=>". 469 @staticmethod 470 def align_inlined(prefix, s): 471 return prefix + s.replace("(inlined", (" " * len(prefix)) + "(inlined") 472 473 def process_zircon_bt(self, m): 474 frame_num = m.group(2) 475 addr = m.group(3) 476 # If we saw the instruction pointer for the fault/panic then use it once 477 if self.zircon_pc: 478 prefix = " pc: %s => " % self.zircon_pc 479 a2l_out = run_addr2line(self.arch, self.zircon_elf_path, self.zircon_pc) 480 self.processed_lines.append(prefix + 481 a2l_out.replace("(inlined", (" " * len(prefix)) + "(inlined")) 482 self.zircon_pc = None 483 484 # Adapt offset for KASLR move 485 addr = kaslr_offset(addr, self.zircon_code_start, self.bias) 486 487 prefix = "bt#%s: %s => " % (frame_num, addr) 488 call_loc = get_call_location(self.arch, self.zircon_elf_path, addr) 489 self.processed_lines.append(Symbolizer.align_inlined(prefix, call_loc)) 490 491 def process_asan_bt(self, m): 492 self.asan_bt = True 493 frame_num = m.group(2) 494 addr = int(m.group(3), 16) 495 offset = None 496 dso = None 497 for bias, candidate_dso in self.bias_to_name.items(): 498 if addr >= bias: 499 candidate_offset = addr - bias 500 if offset is None or candidate_offset < offset: 501 offset = candidate_offset 502 dso = candidate_dso 503 if offset is None: 504 self.processed_lines.append("#%s: unknown, can't find DSO for addr 0x%x" % 505 (frame_num, addr)) 506 return 507 dso_full_path = find_dso_full_path(dso, self.args.app, self.name_to_buildid, self.build_dirs) 508 if not dso_full_path: 509 self.processed_lines.append("#%s: unknown, can't find full path for %s" % 510 (frame_num, dso)) 511 return 512 513 # Adapt offset for KASLR move 514 offset = kaslr_offset(offset, self.zircon_code_start, self.bias) 515 516 prefix = "bt#%s: 0x%x => " % (frame_num, addr) 517 call_loc = run_addr2line(self.arch, dso_full_path, "0x%x" % offset) 518 if call_loc: 519 self.processed_lines.append(Symbolizer.align_inlined(prefix, call_loc)) 520 521 def run(self): 522 while True: 523 line = self.args.file.readline() 524 end_of_file = (line == '') 525 line = line.rstrip() 526 if self.args.echo and not self.args.file.isatty(): 527 writeline(line) 528 529 bt_end = Symbolizer.bt_end_re.match(line) 530 self.asan_bt_end = self.asan_bt and Symbolizer.asan_bt_end_re.match(line) 531 if bt_end or self.asan_bt_end or end_of_file: 532 if len(self.processed_lines) != 0: 533 self.write_processed_lines() 534 if end_of_file: 535 break 536 else: 537 continue 538 539 m = Symbolizer.arch_re.match(line) 540 if m: 541 self.update_arch(m) 542 continue 543 544 m = Symbolizer.build_id_re.match(line) 545 if m: 546 self.update_build_id(m) 547 continue 548 549 # We didn't see a dso line, so we're done with this list. 550 # The next time we see one means we're starting a new list. 551 self.done_dso_list = True 552 553 m = Symbolizer.btre.match(line) 554 if m and not self.zircon_bt: 555 frame_num = m.group(2) 556 m = Symbolizer.bt_with_offset_re.match(line) 557 if not m: 558 self.processed_lines.append("#%s: unknown, can't find pc, sp or app/library in line" % 559 frame_num) 560 continue 561 self.process_bt(m, frame_num) 562 continue 563 564 # Zircon Specific Handling 565 if Symbolizer.zircon_crash_re.search(line): 566 self.update_zircon() 567 continue 568 569 m = Symbolizer.zircon_pc_re.search(line) 570 if m: 571 self.zircon_pc = kaslr_offset(m.group(1), self.zircon_code_start, self.bias) 572 continue 573 574 m = Symbolizer.zircon_bt_re.match(line) 575 if m and self.zircon_bt: 576 self.process_zircon_bt(m) 577 continue 578 579 # ASAN Specific Handling 580 m = Symbolizer.asan_bt_re.match(line) 581 if m and not self.zircon_bt: 582 self.process_asan_bt(m) 583 continue 584 585 586def main(): 587 args = parse_args() 588 global debug_mode 589 debug_mode = args.debug 590 global addr2line_tool_path 591 addr2line_tool_path = args.addr2line 592 global gdb_tool_path 593 gdb_tool_path = args.gdb 594 595 symbolizer = Symbolizer(args) 596 symbolizer.run() 597 598 599if __name__ == '__main__': 600 sys.exit(main()) 601