#!/usr/bin/env python # Copyright 2016 The Fuchsia Authors # # Use of this source code is governed by a MIT-style # license that can be found in the LICENSE file or at # https://opensource.org/licenses/MIT """ This tool will symbolize a crash from Zircon's crash logger, adding function names and, if available, source code locations (filenames and line numbers from debug info). Example usage #1: ./scripts/run-zircon -a x64 | ./scripts/symbolize devmgr.elf --build-dir=build-x64 Example usage #2: ./scripts/symbolize devmgr.elf --build-dir=build-x64 Example usage #3 (for zircon kernel output): ./scripts/symbolize --build-dir=build-x64 """ import argparse import errno import os import re import subprocess import sys SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) PREBUILTS_BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(SCRIPT_DIR), "prebuilt", "downloads")) GCC_VERSION = '6.3.0' name_to_full_path = {} debug_mode = False # Paths to various external tools can be provided on the command line. # If these are None then use the prebuilt location. addr2line_tool_path = None gdb_tool_path = None def find_func(find_args, dirname, names): if find_args["path"] != "": # found something! return if dirname.find("sysroot") != -1: return for name in names: if name == find_args["name"]: find_args["path"] = dirname return def find_file_in_build_dir(name, build_dirs): find_args = {"name": name, "path": ""} for location in build_dirs: os.path.walk(location, find_func, find_args) if find_args["path"] != "": return os.path.abspath(os.path.join(find_args["path"], name)) return None def buildid_to_full_path(buildid, build_dirs): for build_dir in build_dirs: id_file_path = os.path.join(build_dir, "ids.txt") if os.path.exists(id_file_path): with open(id_file_path) as id_file: for line in id_file: id, path = line.split() if id == buildid: return path return None def find_file_in_boot_manifest(boot_app_name, build_dirs): manifest_path = find_file_in_build_dir("bootfs.manifest", build_dirs) if manifest_path: with open(manifest_path) as manifest_file: for line in manifest_file: out_path, in_path = line.rstrip().split("=") if out_path == boot_app_name: if in_path.endswith(".strip"): in_path = in_path[:-len(".strip")] return in_path return None def find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs): if dso in name_to_buildid: found_path = buildid_to_full_path(name_to_buildid[dso], build_dirs) if found_path: return found_path # This can be a bug in the generation of the ids.txt file, report it. # It's not necessarily a bug though, so for now only report in debug mode. if debug_mode: print "WARNING: Unable to find %s in any ids.txt file." % dso # The name 'app' indicates the real app name is unknown. # If the process has a name property that will be printed, but # it has a max of 32 characters so it may be insufficient. # Crashlogger prefixes such names with "app:" for our benefit. if dso == "app" or dso.startswith("app:"): # If an executable was passed on the command-line, try using that if exe_name: found_path = find_file_in_build_dir(exe_name, build_dirs) if found_path: return found_path # If this looks like a program in boot fs, consult the manifest if dso.startswith("app:/boot/"): boot_app_name = dso[len("app:/boot/"):] found_path = find_file_in_boot_manifest(boot_app_name, build_dirs) if found_path: return found_path return None # First, try an exact match for the filename found_path = find_file_in_build_dir(dso, build_dirs) if not found_path: # If that fails, and this file doesn't end with .so, try the executable # name if not dso.endswith(".so"): found_path = find_file_in_build_dir(exe_name, build_dirs) if not found_path: # If that still fails and this looks like an absolute path, try the # last path component if dso.startswith("/"): short_name = dso[dso.rfind("/"):] found_path = find_file_in_build_dir(short_name, build_dirs) return found_path def find_dso_full_path(dso, exe_name, name_to_buildid, build_dirs): if dso in name_to_full_path: return name_to_full_path[dso] found_path = find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs) if found_path: name_to_full_path[dso] = found_path return found_path def tool_path(arch, tool, user_provided_path): if user_provided_path is not None: return user_provided_path return ("%s/gcc/bin/%s-elf-%s" % (PREBUILTS_BASE_DIR, arch, tool)) def run_tool(path, *args): cmd = [path] + list(args) if debug_mode: print "Running: %s" % " ".join(cmd) try: output = subprocess.check_output(cmd) except Exception as e: tool = os.path.basename(path) print "Calling %s failed: command %s error %s" % (tool, cmd, e) return False return output.rstrip() # Note: addr2line requires hex addresses. # |addr_as_hex_string| must already be PIE-adjusted. def run_addr2line(arch, elf_path, addr_as_hex_string): path = tool_path(arch, "addr2line", addr2line_tool_path) return run_tool(path, "-Cipfe", elf_path, addr_as_hex_string) # The caller passes in a list of arguments, this is not a varargs function. def run_gdb(arch, arguments): path = tool_path(arch, "gdb", gdb_tool_path) return run_tool(path, *arguments) GDB_ARCH_LUT = { "x86_64": "i386:x86-64", "aarch64": "aarch64" } def get_gdb_set_arch_cmd(arch): gdb_arch = GDB_ARCH_LUT[arch] return "set arch %s" % gdb_arch def get_call_location(arch, elf_path, addr_as_hex_string): # Subtract 1 to convert from a return address to a call site # address. (To be more exact, this converts to an address that # is within the call site instruction.) This adjustment gives # more correct results in the presence of inlining and # 'noreturn' functions. (See ZX-842.) call_addr = "0x%x" % (int(addr_as_hex_string, 16) - 1) return run_addr2line(arch, elf_path, call_addr) # On BSD platforms there are cases where writing to stdout can return EAGAIN. # In that event, retry the line again. This only manifests itself when piping # qemu's stdout directly to this script. def writelines(lines): for line in lines: writeline(line) def writeline(line): while True: try: sys.stdout.write(line + "\n") except IOError as e: if e.errno == errno.EAGAIN: continue break # Offset the address based on binary code start and bias # Return same type than input def kaslr_offset(addr, code_start, bias): if not code_start or not bias: return addr is_string = isinstance(addr, str) if is_string: addr = int(addr, 16) addr -= bias - code_start if is_string: return '%x' % addr return addr ARCH_REMAP_LUT = { 'x86_64' : 'x64', 'aarch64' : 'arm64' } def choose_build_dirs(cli_args, arch): arch = ARCH_REMAP_LUT.get(arch, arch) zircon_build_dir = os.path.join( os.path.dirname(SCRIPT_DIR), "build-%s" % (arch, )) if not os.path.exists(zircon_build_dir): zircon_build_dir = os.path.join( os.path.dirname(SCRIPT_DIR), os.pardir, "out", "build-zircon", "build-%s" % (arch, )) build_dirs = [zircon_build_dir] if cli_args.build_dir: build_dirs = cli_args.build_dir + build_dirs else: # Put the unstripped path ahead of the stripped one, we want the # former searched first. This does mean the unstripped directory # will get searched twice, but relative to the entire search time, # the addition is small. # Plus once a file is found its location is cached. # Plus this is only used as a fallback in case the file isn't found # in ids.txt. fuchsia_build_dir = os.path.abspath(os.path.join( os.path.dirname(SCRIPT_DIR), os.pardir, "out", arch)) fuchsia_unstripped_dir = os.path.join(fuchsia_build_dir, "exe.unstripped") build_dirs = [fuchsia_unstripped_dir, fuchsia_build_dir] + build_dirs return build_dirs def get_zircon_source_dir(): zircon_source_dir = os.path.join( os.path.dirname(SCRIPT_DIR), os.pardir, "zircon") return zircon_source_dir def parse_args(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--file", "-f", nargs="?", type=argparse.FileType("r"), default=sys.stdin, help="File to read from, stdin by default") parser.add_argument("--build-dir", "-b", nargs="*", help="List of build directories to search instead of the default (out/x64)") parser.add_argument("--disassemble", "-d", action="store_true", help="Show disassembly of each function") parser.add_argument("--source", "-S", action="store_true", help="Include source in the disassembly (requires -d)") parser.add_argument("--stack_size", "-s", type=int, default=256*1024, help="Change the assumed size of the stack (e.g. use 1048576 for ftl or " "mtl default thread size") parser.add_argument("--echo", dest="echo", action="store_true", help="Echo lines of input (on by default)") parser.add_argument("--no-echo", dest="echo", action="store_false", help="Don't echo lines of input") parser.add_argument("--debug", "-D", action="store_true", help="Print messages for debugging symbolize") parser.add_argument("--addr2line", default=None, help="Path of addr2line program to use") parser.add_argument("--gdb", default=None, help="Path of gdb program to use") parser.add_argument("app", nargs="?", help="Name of primary application") parser.set_defaults(echo=True) return parser.parse_args() class Symbolizer: # Regex for parsing # Full prefix can be: # - nothing # - something like "[00007.268] 00304.00325> " # - something like "[00041.807507][1105][1119][klog] INFO: " full_prefix = "^(|\[\d+\.\d+\] \d+\.\d+> ?|\[\d{5}\.\d{6}\]\[\d+\]\[\d+\]\[[\w ,]+\] INFO: ?)" btre = re.compile(full_prefix + "bt#(\d+):") bt_with_offset_re = re.compile(full_prefix + "bt#(\d+): pc 0x[0-9a-f]+ sp (0x[0-9a-f]+) \(([^,]+),(0x[0-9a-f]+)\)$") bt_end_re = re.compile(full_prefix + "bt#(\d+): end") arch_re = re.compile(full_prefix + "arch: ([\\S]+)$") build_id_re = re.compile(full_prefix + "(?:dlsvc: debug: )?dso: id=([0-9a-z]+) base=(0x[0-9a-f]+) name=(.+)$") disasm_re = re.compile("^ *(0x[0-9a-f]+)( .+)$") # Zircon backtraces zircon_crash_re = re.compile(full_prefix + "ZIRCON KERNEL PANIC$") # TODO(cja): Add ARM to the regex zircon_pc_re = re.compile("RIP: (0x[0-9a-z]+)") zircon_bt_re = re.compile(full_prefix + "bt#(\d+): (\dx[0-9a-fA-F]+)$") zircon_nm_codestart = re.compile('^([a-f0-9]+) t __code_start$', re.M) # ASAN backtraces asan_bt_re = re.compile(full_prefix + "\{\{\{bt:(\d+):(0x[0-9a-f]+)\}\}\}") asan_bt_end_re = re.compile(full_prefix + "$") def __init__(self, args): self.args = args self.arch = "x86_64" self.build_dirs = choose_build_dirs(self.args, self.arch) self.name_to_buildid = {} self.bias_to_name = {} self.reset() def reset(self): self.bias = 0 self.processed_lines = [] self.prev_sp = None self.prev_frame_num = None self.frame_sizes = [] self.total_stack_size = 0 # If True and we see a dso line, start over collecting the list. self.done_dso_list = True self.zircon_elf_path = '' self.zircon_code_start = None self.zircon_bt = False self.zircon_pc = '' self.asan_bt = False def write_processed_lines(self): writeline("\nstart of symbolized stack:") writelines(self.processed_lines) writeline("end of symbolized stack\n") if self.total_stack_size > self.args.stack_size - 8*1024: if self.total_stack_size >= self.args.stack_size: message = "Overflowed stack" else: message = "Potentially overflowed stack" writeline("WARNING: %s (total usage: %d, stack size: %d)" % (message, self.total_stack_size, self.args.stack_size)) for frame, size in self.frame_sizes: writeline("#%s: %d bytes" % (frame, size)) self.reset() # If the architecture has changed, choose our new build dirs and # clear our DSO cache. We may be symbolizing the output of # logserver, and it may be reasonable for the target architecture to # be changing as the user boots and tests on different # architectures. def update_arch(self, m): new_arch = m.group(2) if (self.arch != new_arch): self.arch = new_arch global name_to_full_path name_to_full_path = {} self.build_dirs = choose_build_dirs(self.args, self.arch) def update_build_id(self, m): if self.done_dso_list: self.name_to_buildid = {} self.bias_to_name = {} self.done_dso_list = False buildid = m.group(2) self.bias = int(m.group(3), 16) name = m.group(4) self.name_to_buildid[name] = buildid self.bias_to_name[self.bias] = name if self.zircon_code_start and self.zircon_code_start != self.bias: if self.zircon_code_start < self.bias: diff = self.bias - self.zircon_code_start c = '+' else: diff = self.zircon_code_start - self.bias c = '-' writeline('kaslr offset is %c0x%x' % (c, diff)) def process_bt(self, m, frame_num): sp = int(m.group(3), 16) if self.prev_sp is not None: frame_size = sp - self.prev_sp self.total_stack_size += frame_size self.frame_sizes.append((self.prev_frame_num, frame_size)) self.prev_sp = sp self.prev_frame_num = frame_num dso = m.group(4) off = m.group(5) # Adapt offset for KASLR move off = kaslr_offset(off, self.zircon_code_start, self.bias) dso_full_path = find_dso_full_path( dso, self.args.app, self.name_to_buildid, self.build_dirs) if not dso_full_path: # can't find dso_full_path self.processed_lines.append("#%s: unknown, can't find full path for %s" % (frame_num, dso)) return call_loc = get_call_location(self.arch, dso_full_path, off) if call_loc: self.processed_lines.append( "#%s: %s" % (frame_num, call_loc)) if self.args.disassemble: pc = int(off, 16) # GDB can get confused what the default arch should be. # Cope by explicitly setting it. gdb_set_arch_cmd = get_gdb_set_arch_cmd(self.arch) run_gdb_options = [ "--nx", "--batch", "-ex", gdb_set_arch_cmd ] if self.args.source: gdb_source_search_dirs = self.build_dirs gdb_source_search_dirs.append(get_zircon_source_dir()) run_gdb_options += [ "-ex", "dir %s" % ( ":".join(gdb_source_search_dirs)) ] disassemble_cmd = "disassemble %s %#x" % ( "/s" if self.args.source else "", pc) run_gdb_options += [ "-ex", disassemble_cmd, dso_full_path ] disassembly = run_gdb(self.arch, run_gdb_options) if disassembly: for line in disassembly.splitlines(): m = Symbolizer.disasm_re.match(line) if not m: # If we're printing source, include these lines. if self.args.source: self.processed_lines.append(line) continue addr, rest = m.groups() addr = int(addr, 16) if addr == pc: prefix = "=> " else: prefix = " " line = "%s%#.16x%s" % (prefix, self.bias + addr, rest) self.processed_lines.append(line) def update_zircon(self): self.zircon_elf_path = find_file_in_build_dir("zircon.elf", self.build_dirs) if not self.zircon_elf_path: sys.stderr.write("Symbolize could not find the zircon elf binary. Perhaps you need " "to build zircon or specify the build directory with -b?\n") return self.zircon_bt = True nm_result = run_tool(self.arch, "nm", self.zircon_elf_path) m = Symbolizer.zircon_nm_codestart.search(nm_result) if not m: sys.stderr.write("Failed to find __code_start from nm") return self.zircon_code_start = int(m.group(1), 16) # In the case of inlined methods, it is more readable if the # inlined lines are aligned to be to the right of "=>". @staticmethod def align_inlined(prefix, s): return prefix + s.replace("(inlined", (" " * len(prefix)) + "(inlined") def process_zircon_bt(self, m): frame_num = m.group(2) addr = m.group(3) # If we saw the instruction pointer for the fault/panic then use it once if self.zircon_pc: prefix = " pc: %s => " % self.zircon_pc a2l_out = run_addr2line(self.arch, self.zircon_elf_path, self.zircon_pc) self.processed_lines.append(prefix + a2l_out.replace("(inlined", (" " * len(prefix)) + "(inlined")) self.zircon_pc = None # Adapt offset for KASLR move addr = kaslr_offset(addr, self.zircon_code_start, self.bias) prefix = "bt#%s: %s => " % (frame_num, addr) call_loc = get_call_location(self.arch, self.zircon_elf_path, addr) self.processed_lines.append(Symbolizer.align_inlined(prefix, call_loc)) def process_asan_bt(self, m): self.asan_bt = True frame_num = m.group(2) addr = int(m.group(3), 16) offset = None dso = None for bias, candidate_dso in self.bias_to_name.items(): if addr >= bias: candidate_offset = addr - bias if offset is None or candidate_offset < offset: offset = candidate_offset dso = candidate_dso if offset is None: self.processed_lines.append("#%s: unknown, can't find DSO for addr 0x%x" % (frame_num, addr)) return dso_full_path = find_dso_full_path(dso, self.args.app, self.name_to_buildid, self.build_dirs) if not dso_full_path: self.processed_lines.append("#%s: unknown, can't find full path for %s" % (frame_num, dso)) return # Adapt offset for KASLR move offset = kaslr_offset(offset, self.zircon_code_start, self.bias) prefix = "bt#%s: 0x%x => " % (frame_num, addr) call_loc = run_addr2line(self.arch, dso_full_path, "0x%x" % offset) if call_loc: self.processed_lines.append(Symbolizer.align_inlined(prefix, call_loc)) def run(self): while True: line = self.args.file.readline() end_of_file = (line == '') line = line.rstrip() if self.args.echo and not self.args.file.isatty(): writeline(line) bt_end = Symbolizer.bt_end_re.match(line) self.asan_bt_end = self.asan_bt and Symbolizer.asan_bt_end_re.match(line) if bt_end or self.asan_bt_end or end_of_file: if len(self.processed_lines) != 0: self.write_processed_lines() if end_of_file: break else: continue m = Symbolizer.arch_re.match(line) if m: self.update_arch(m) continue m = Symbolizer.build_id_re.match(line) if m: self.update_build_id(m) continue # We didn't see a dso line, so we're done with this list. # The next time we see one means we're starting a new list. self.done_dso_list = True m = Symbolizer.btre.match(line) if m and not self.zircon_bt: frame_num = m.group(2) m = Symbolizer.bt_with_offset_re.match(line) if not m: self.processed_lines.append("#%s: unknown, can't find pc, sp or app/library in line" % frame_num) continue self.process_bt(m, frame_num) continue # Zircon Specific Handling if Symbolizer.zircon_crash_re.search(line): self.update_zircon() continue m = Symbolizer.zircon_pc_re.search(line) if m: self.zircon_pc = kaslr_offset(m.group(1), self.zircon_code_start, self.bias) continue m = Symbolizer.zircon_bt_re.match(line) if m and self.zircon_bt: self.process_zircon_bt(m) continue # ASAN Specific Handling m = Symbolizer.asan_bt_re.match(line) if m and not self.zircon_bt: self.process_asan_bt(m) continue def main(): args = parse_args() global debug_mode debug_mode = args.debug global addr2line_tool_path addr2line_tool_path = args.addr2line global gdb_tool_path gdb_tool_path = args.gdb symbolizer = Symbolizer(args) symbolizer.run() if __name__ == '__main__': sys.exit(main())