1#!/usr/bin/env python
2
3# Copyright 2016 The Fuchsia Authors
4#
5# Use of this source code is governed by a MIT-style
6# license that can be found in the LICENSE file or at
7# https://opensource.org/licenses/MIT
8
9"""
10
11This tool will symbolize a crash from Zircon's crash logger, adding
12function names and, if available, source code locations (filenames and
13line numbers from debug info).
14
15Example usage #1:
16  ./scripts/run-zircon -a x64 | ./scripts/symbolize devmgr.elf --build-dir=build-x64
17
18Example usage #2:
19  ./scripts/symbolize devmgr.elf --build-dir=build-x64
20  <copy and paste output from Zircon>
21
22Example usage #3 (for zircon kernel output):
23  ./scripts/symbolize --build-dir=build-x64
24  <copy and paste output from Zircon>
25
26"""
27
28import argparse
29import errno
30import os
31import re
32import subprocess
33import sys
34
35SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
36PREBUILTS_BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(SCRIPT_DIR), "prebuilt",
37                                                  "downloads"))
38GCC_VERSION = '6.3.0'
39name_to_full_path = {}
40debug_mode = False
41
42# Paths to various external tools can be provided on the command line.
43# If these are None then use the prebuilt location.
44addr2line_tool_path = None
45gdb_tool_path = None
46
47
48def find_func(find_args, dirname, names):
49    if find_args["path"] != "":  # found something!
50        return
51    if dirname.find("sysroot") != -1:
52        return
53    for name in names:
54        if name == find_args["name"]:
55            find_args["path"] = dirname
56            return
57
58
59def find_file_in_build_dir(name, build_dirs):
60    find_args = {"name": name, "path": ""}
61    for location in build_dirs:
62        os.path.walk(location, find_func, find_args)
63        if find_args["path"] != "":
64            return os.path.abspath(os.path.join(find_args["path"], name))
65    return None
66
67
68def buildid_to_full_path(buildid, build_dirs):
69    for build_dir in build_dirs:
70        id_file_path = os.path.join(build_dir, "ids.txt")
71        if os.path.exists(id_file_path):
72            with open(id_file_path) as id_file:
73                for line in id_file:
74                    id, path = line.split()
75                    if id == buildid:
76                        return path
77    return None
78
79
80def find_file_in_boot_manifest(boot_app_name, build_dirs):
81    manifest_path = find_file_in_build_dir("bootfs.manifest", build_dirs)
82    if manifest_path:
83        with open(manifest_path) as manifest_file:
84            for line in manifest_file:
85                out_path, in_path = line.rstrip().split("=")
86                if out_path == boot_app_name:
87                    if in_path.endswith(".strip"):
88                        in_path = in_path[:-len(".strip")]
89                    return in_path
90    return None
91
92
93def find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs):
94    if dso in name_to_buildid:
95        found_path = buildid_to_full_path(name_to_buildid[dso], build_dirs)
96        if found_path:
97            return found_path
98        # This can be a bug in the generation of the ids.txt file, report it.
99        # It's not necessarily a bug though, so for now only report in debug mode.
100        if debug_mode:
101            print "WARNING: Unable to find %s in any ids.txt file." % dso
102
103    # The name 'app' indicates the real app name is unknown.
104    # If the process has a name property that will be printed, but
105    # it has a max of 32 characters so it may be insufficient.
106    # Crashlogger prefixes such names with "app:" for our benefit.
107    if dso == "app" or dso.startswith("app:"):
108        # If an executable was passed on the command-line, try using that
109        if exe_name:
110            found_path = find_file_in_build_dir(exe_name, build_dirs)
111            if found_path:
112                return found_path
113
114        # If this looks like a program in boot fs, consult the manifest
115        if dso.startswith("app:/boot/"):
116            boot_app_name = dso[len("app:/boot/"):]
117            found_path = find_file_in_boot_manifest(boot_app_name, build_dirs)
118            if found_path:
119                return found_path
120        return None
121
122    # First, try an exact match for the filename
123    found_path = find_file_in_build_dir(dso, build_dirs)
124    if not found_path:
125        # If that fails, and this file doesn't end with .so, try the executable
126        # name
127        if not dso.endswith(".so"):
128            found_path = find_file_in_build_dir(exe_name, build_dirs)
129    if not found_path:
130        # If that still fails and this looks like an absolute path, try the
131        # last path component
132        if dso.startswith("/"):
133            short_name = dso[dso.rfind("/"):]
134            found_path = find_file_in_build_dir(short_name, build_dirs)
135    return found_path
136
137
138def find_dso_full_path(dso, exe_name, name_to_buildid, build_dirs):
139    if dso in name_to_full_path:
140        return name_to_full_path[dso]
141    found_path = find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs)
142    if found_path:
143        name_to_full_path[dso] = found_path
144    return found_path
145
146
147def tool_path(arch, tool, user_provided_path):
148    if user_provided_path is not None:
149        return user_provided_path
150    return ("%s/gcc/bin/%s-elf-%s" %
151            (PREBUILTS_BASE_DIR, arch, tool))
152
153
154def run_tool(path, *args):
155    cmd = [path] + list(args)
156    if debug_mode:
157        print "Running: %s" % " ".join(cmd)
158    try:
159        output = subprocess.check_output(cmd)
160    except Exception as e:
161        tool = os.path.basename(path)
162        print "Calling %s failed: command %s error %s" % (tool, cmd, e)
163        return False
164    return output.rstrip()
165
166
167# Note: addr2line requires hex addresses.
168# |addr_as_hex_string| must already be PIE-adjusted.
169def run_addr2line(arch, elf_path, addr_as_hex_string):
170    path = tool_path(arch, "addr2line", addr2line_tool_path)
171    return run_tool(path, "-Cipfe", elf_path, addr_as_hex_string)
172
173
174# The caller passes in a list of arguments, this is not a varargs function.
175def run_gdb(arch, arguments):
176    path = tool_path(arch, "gdb", gdb_tool_path)
177    return run_tool(path, *arguments)
178
179
180GDB_ARCH_LUT = { "x86_64": "i386:x86-64",
181                 "aarch64": "aarch64" }
182def get_gdb_set_arch_cmd(arch):
183    gdb_arch = GDB_ARCH_LUT[arch]
184    return "set arch %s" % gdb_arch
185
186
187def get_call_location(arch, elf_path, addr_as_hex_string):
188    # Subtract 1 to convert from a return address to a call site
189    # address.  (To be more exact, this converts to an address that
190    # is within the call site instruction.)  This adjustment gives
191    # more correct results in the presence of inlining and
192    # 'noreturn' functions.  (See ZX-842.)
193    call_addr = "0x%x" % (int(addr_as_hex_string, 16) - 1)
194    return run_addr2line(arch, elf_path, call_addr)
195
196
197# On BSD platforms there are cases where writing to stdout can return EAGAIN.
198# In that event, retry the line again. This only manifests itself when piping
199# qemu's stdout directly to this script.
200def writelines(lines):
201    for line in lines:
202        writeline(line)
203
204
205def writeline(line):
206    while True:
207        try:
208            sys.stdout.write(line + "\n")
209        except IOError as e:
210            if e.errno == errno.EAGAIN:
211                continue
212        break
213
214
215# Offset the address based on binary code start and bias
216# Return same type than input
217def kaslr_offset(addr, code_start, bias):
218    if not code_start or not bias:
219        return addr
220    is_string = isinstance(addr, str)
221    if is_string:
222        addr = int(addr, 16)
223    addr -= bias - code_start
224    if is_string:
225        return '%x' % addr
226    return addr
227
228ARCH_REMAP_LUT = { 'x86_64' : 'x64',
229                   'aarch64' : 'arm64'
230                 }
231def choose_build_dirs(cli_args, arch):
232    arch = ARCH_REMAP_LUT.get(arch, arch)
233    zircon_build_dir = os.path.join(
234        os.path.dirname(SCRIPT_DIR), "build-%s" % (arch, ))
235    if not os.path.exists(zircon_build_dir):
236        zircon_build_dir = os.path.join(
237            os.path.dirname(SCRIPT_DIR), os.pardir, "out", "build-zircon", "build-%s" % (arch, ))
238    build_dirs = [zircon_build_dir]
239    if cli_args.build_dir:
240        build_dirs = cli_args.build_dir + build_dirs
241    else:
242        # Put the unstripped path ahead of the stripped one, we want the
243        # former searched first. This does mean the unstripped directory
244        # will get searched twice, but relative to the entire search time,
245        # the addition is small.
246        # Plus once a file is found its location is cached.
247        # Plus this is only used as a fallback in case the file isn't found
248        # in ids.txt.
249        fuchsia_build_dir = os.path.abspath(os.path.join(
250            os.path.dirname(SCRIPT_DIR), os.pardir, "out", arch))
251        fuchsia_unstripped_dir = os.path.join(fuchsia_build_dir, "exe.unstripped")
252        build_dirs = [fuchsia_unstripped_dir, fuchsia_build_dir] + build_dirs
253    return build_dirs
254
255def get_zircon_source_dir():
256    zircon_source_dir = os.path.join(
257        os.path.dirname(SCRIPT_DIR), os.pardir, "zircon")
258    return zircon_source_dir
259
260def parse_args():
261    parser = argparse.ArgumentParser(
262        description=__doc__,
263        formatter_class=argparse.RawDescriptionHelpFormatter)
264    parser.add_argument("--file", "-f", nargs="?", type=argparse.FileType("r"),
265                        default=sys.stdin,
266                        help="File to read from, stdin by default")
267    parser.add_argument("--build-dir", "-b", nargs="*",
268                        help="List of build directories to search instead of the default (out/x64)")
269    parser.add_argument("--disassemble", "-d", action="store_true",
270                        help="Show disassembly of each function")
271    parser.add_argument("--source", "-S", action="store_true",
272                        help="Include source in the disassembly (requires -d)")
273    parser.add_argument("--stack_size", "-s", type=int,
274                        default=256*1024,
275                        help="Change the assumed size of the stack (e.g. use 1048576 for ftl or "
276                             "mtl default thread size")
277    parser.add_argument("--echo", dest="echo", action="store_true",
278                        help="Echo lines of input (on by default)")
279    parser.add_argument("--no-echo", dest="echo", action="store_false",
280                        help="Don't echo lines of input")
281    parser.add_argument("--debug", "-D", action="store_true",
282                        help="Print messages for debugging symbolize")
283    parser.add_argument("--addr2line", default=None,
284                        help="Path of addr2line program to use")
285    parser.add_argument("--gdb", default=None,
286                        help="Path of gdb program to use")
287    parser.add_argument("app", nargs="?", help="Name of primary application")
288    parser.set_defaults(echo=True)
289    return parser.parse_args()
290
291class Symbolizer:
292    # Regex for parsing
293    # Full prefix can be:
294    #   - nothing
295    #   - something like "[00007.268] 00304.00325> "
296    #   - something like "[00041.807507][1105][1119][klog] INFO: "
297    full_prefix = "^(|\[\d+\.\d+\] \d+\.\d+> ?|\[\d{5}\.\d{6}\]\[\d+\]\[\d+\]\[[\w ,]+\] INFO: ?)"
298    btre = re.compile(full_prefix + "bt#(\d+):")
299    bt_with_offset_re = re.compile(full_prefix +
300        "bt#(\d+): pc 0x[0-9a-f]+ sp (0x[0-9a-f]+) \(([^,]+),(0x[0-9a-f]+)\)$")
301    bt_end_re = re.compile(full_prefix + "bt#(\d+): end")
302    arch_re = re.compile(full_prefix + "arch: ([\\S]+)$")
303    build_id_re = re.compile(full_prefix +
304        "(?:dlsvc: debug: )?dso: id=([0-9a-z]+) base=(0x[0-9a-f]+) name=(.+)$")
305    disasm_re = re.compile("^ *(0x[0-9a-f]+)( .+)$")
306
307    # Zircon backtraces
308    zircon_crash_re = re.compile(full_prefix + "ZIRCON KERNEL PANIC$")
309    # TODO(cja): Add ARM to the regex
310    zircon_pc_re = re.compile("RIP: (0x[0-9a-z]+)")
311    zircon_bt_re = re.compile(full_prefix +
312        "bt#(\d+): (\dx[0-9a-fA-F]+)$")
313    zircon_nm_codestart = re.compile('^([a-f0-9]+) t __code_start$', re.M)
314
315    # ASAN backtraces
316    asan_bt_re = re.compile(full_prefix + "\{\{\{bt:(\d+):(0x[0-9a-f]+)\}\}\}")
317    asan_bt_end_re = re.compile(full_prefix + "$")
318
319    def __init__(self, args):
320        self.args = args
321        self.arch = "x86_64"
322        self.build_dirs = choose_build_dirs(self.args, self.arch)
323        self.name_to_buildid = {}
324        self.bias_to_name = {}
325        self.reset()
326
327    def reset(self):
328        self.bias = 0
329        self.processed_lines = []
330        self.prev_sp = None
331        self.prev_frame_num = None
332        self.frame_sizes = []
333        self.total_stack_size = 0
334        # If True and we see a dso line, start over collecting the list.
335        self.done_dso_list = True
336
337        self.zircon_elf_path = ''
338        self.zircon_code_start = None
339        self.zircon_bt = False
340        self.zircon_pc = ''
341
342        self.asan_bt = False
343
344    def write_processed_lines(self):
345        writeline("\nstart of symbolized stack:")
346        writelines(self.processed_lines)
347        writeline("end of symbolized stack\n")
348
349        if self.total_stack_size > self.args.stack_size - 8*1024:
350            if self.total_stack_size >= self.args.stack_size:
351                message = "Overflowed stack"
352            else:
353                message = "Potentially overflowed stack"
354            writeline("WARNING: %s (total usage: %d, stack size: %d)" %
355                (message, self.total_stack_size, self.args.stack_size))
356            for frame, size in self.frame_sizes:
357                writeline("#%s: %d bytes" % (frame, size))
358        self.reset()
359
360    # If the architecture has changed, choose our new build dirs and
361    # clear our DSO cache.  We may be symbolizing the output of
362    # logserver, and it may be reasonable for the target architecture to
363    # be changing as the user boots and tests on different
364    # architectures.
365    def update_arch(self, m):
366        new_arch = m.group(2)
367        if (self.arch != new_arch):
368            self.arch = new_arch
369            global name_to_full_path
370            name_to_full_path = {}
371            self.build_dirs = choose_build_dirs(self.args, self.arch)
372
373    def update_build_id(self, m):
374        if self.done_dso_list:
375            self.name_to_buildid = {}
376            self.bias_to_name = {}
377            self.done_dso_list = False
378        buildid = m.group(2)
379        self.bias = int(m.group(3), 16)
380        name = m.group(4)
381        self.name_to_buildid[name] = buildid
382        self.bias_to_name[self.bias] = name
383
384        if self.zircon_code_start and self.zircon_code_start != self.bias:
385            if self.zircon_code_start < self.bias:
386                diff = self.bias - self.zircon_code_start
387                c = '+'
388            else:
389                diff = self.zircon_code_start - self.bias
390                c = '-'
391            writeline('kaslr offset is %c0x%x' % (c, diff))
392
393    def process_bt(self, m, frame_num):
394        sp = int(m.group(3), 16)
395        if self.prev_sp is not None:
396            frame_size = sp - self.prev_sp
397            self.total_stack_size += frame_size
398            self.frame_sizes.append((self.prev_frame_num, frame_size))
399        self.prev_sp = sp
400        self.prev_frame_num = frame_num
401
402        dso = m.group(4)
403        off = m.group(5)
404
405        # Adapt offset for KASLR move
406        off = kaslr_offset(off, self.zircon_code_start, self.bias)
407
408        dso_full_path = find_dso_full_path(
409            dso, self.args.app, self.name_to_buildid, self.build_dirs)
410        if not dso_full_path:
411            # can't find dso_full_path
412            self.processed_lines.append("#%s: unknown, can't find full path for %s" %
413                (frame_num, dso))
414            return
415
416        call_loc = get_call_location(self.arch, dso_full_path, off)
417        if call_loc:
418            self.processed_lines.append(
419                "#%s: %s" % (frame_num, call_loc))
420        if self.args.disassemble:
421            pc = int(off, 16)
422            # GDB can get confused what the default arch should be.
423            # Cope by explicitly setting it.
424            gdb_set_arch_cmd = get_gdb_set_arch_cmd(self.arch)
425            run_gdb_options = [ "--nx", "--batch",
426                                "-ex", gdb_set_arch_cmd ]
427            if self.args.source:
428                gdb_source_search_dirs = self.build_dirs
429                gdb_source_search_dirs.append(get_zircon_source_dir())
430                run_gdb_options += [ "-ex", "dir %s" % (
431                    ":".join(gdb_source_search_dirs)) ]
432            disassemble_cmd = "disassemble %s %#x" % (
433                "/s" if self.args.source else "", pc)
434            run_gdb_options += [ "-ex", disassemble_cmd, dso_full_path ]
435            disassembly = run_gdb(self.arch, run_gdb_options)
436            if disassembly:
437                for line in disassembly.splitlines():
438                    m = Symbolizer.disasm_re.match(line)
439                    if not m:
440                        # If we're printing source, include these lines.
441                        if self.args.source:
442                            self.processed_lines.append(line)
443                        continue
444                    addr, rest = m.groups()
445                    addr = int(addr, 16)
446                    if addr == pc:
447                        prefix = "=> "
448                    else:
449                        prefix = "   "
450                    line = "%s%#.16x%s" % (prefix, self.bias + addr, rest)
451                    self.processed_lines.append(line)
452
453    def update_zircon(self):
454        self.zircon_elf_path = find_file_in_build_dir("zircon.elf", self.build_dirs)
455        if not self.zircon_elf_path:
456            sys.stderr.write("Symbolize could not find the zircon elf binary. Perhaps you need "
457                              "to build zircon or specify the build directory with -b?\n")
458            return
459        self.zircon_bt = True
460        nm_result = run_tool(self.arch, "nm", self.zircon_elf_path)
461        m = Symbolizer.zircon_nm_codestart.search(nm_result)
462        if not m:
463            sys.stderr.write("Failed to find __code_start from nm")
464            return
465        self.zircon_code_start = int(m.group(1), 16)
466
467    # In the case of inlined methods, it is more readable if the
468    # inlined lines are aligned to be to the right of "=>".
469    @staticmethod
470    def align_inlined(prefix, s):
471        return prefix + s.replace("(inlined", (" " * len(prefix)) + "(inlined")
472
473    def process_zircon_bt(self, m):
474        frame_num = m.group(2)
475        addr = m.group(3)
476        # If we saw the instruction pointer for the fault/panic then use it once
477        if self.zircon_pc:
478            prefix = "   pc: %s => " % self.zircon_pc
479            a2l_out = run_addr2line(self.arch, self.zircon_elf_path, self.zircon_pc)
480            self.processed_lines.append(prefix +
481                    a2l_out.replace("(inlined", (" " * len(prefix)) + "(inlined"))
482            self.zircon_pc = None
483
484        # Adapt offset for KASLR move
485        addr = kaslr_offset(addr, self.zircon_code_start, self.bias)
486
487        prefix = "bt#%s: %s => " % (frame_num, addr)
488        call_loc = get_call_location(self.arch, self.zircon_elf_path, addr)
489        self.processed_lines.append(Symbolizer.align_inlined(prefix, call_loc))
490
491    def process_asan_bt(self, m):
492        self.asan_bt = True
493        frame_num = m.group(2)
494        addr = int(m.group(3), 16)
495        offset = None
496        dso = None
497        for bias, candidate_dso in self.bias_to_name.items():
498            if addr >= bias:
499                candidate_offset = addr - bias
500                if offset is None or candidate_offset < offset:
501                    offset = candidate_offset
502                    dso = candidate_dso
503        if offset is None:
504            self.processed_lines.append("#%s: unknown, can't find DSO for addr 0x%x" %
505                (frame_num, addr))
506            return
507        dso_full_path = find_dso_full_path(dso, self.args.app, self.name_to_buildid, self.build_dirs)
508        if not dso_full_path:
509            self.processed_lines.append("#%s: unknown, can't find full path for %s" %
510                (frame_num, dso))
511            return
512
513        # Adapt offset for KASLR move
514        offset = kaslr_offset(offset, self.zircon_code_start, self.bias)
515
516        prefix = "bt#%s: 0x%x => " % (frame_num, addr)
517        call_loc = run_addr2line(self.arch, dso_full_path, "0x%x" % offset)
518        if call_loc:
519            self.processed_lines.append(Symbolizer.align_inlined(prefix, call_loc))
520
521    def run(self):
522        while True:
523            line = self.args.file.readline()
524            end_of_file = (line == '')
525            line = line.rstrip()
526            if self.args.echo and not self.args.file.isatty():
527                writeline(line)
528
529            bt_end = Symbolizer.bt_end_re.match(line)
530            self.asan_bt_end = self.asan_bt and Symbolizer.asan_bt_end_re.match(line)
531            if bt_end or self.asan_bt_end or end_of_file:
532                if len(self.processed_lines) != 0:
533                    self.write_processed_lines()
534                if end_of_file:
535                    break
536                else:
537                    continue
538
539            m = Symbolizer.arch_re.match(line)
540            if m:
541                self.update_arch(m)
542                continue
543
544            m = Symbolizer.build_id_re.match(line)
545            if m:
546                self.update_build_id(m)
547                continue
548
549            # We didn't see a dso line, so we're done with this list.
550            # The next time we see one means we're starting a new list.
551            self.done_dso_list = True
552
553            m = Symbolizer.btre.match(line)
554            if m and not self.zircon_bt:
555                frame_num = m.group(2)
556                m = Symbolizer.bt_with_offset_re.match(line)
557                if not m:
558                    self.processed_lines.append("#%s: unknown, can't find pc, sp or app/library in line" %
559                        frame_num)
560                    continue
561                self.process_bt(m, frame_num)
562                continue
563
564            # Zircon Specific Handling
565            if Symbolizer.zircon_crash_re.search(line):
566                self.update_zircon()
567                continue
568
569            m = Symbolizer.zircon_pc_re.search(line)
570            if m:
571                self.zircon_pc = kaslr_offset(m.group(1), self.zircon_code_start, self.bias)
572                continue
573
574            m = Symbolizer.zircon_bt_re.match(line)
575            if m and self.zircon_bt:
576                self.process_zircon_bt(m)
577                continue
578
579            # ASAN Specific Handling
580            m = Symbolizer.asan_bt_re.match(line)
581            if m and not self.zircon_bt:
582                self.process_asan_bt(m)
583                continue
584
585
586def main():
587    args = parse_args()
588    global debug_mode
589    debug_mode = args.debug
590    global addr2line_tool_path
591    addr2line_tool_path = args.addr2line
592    global gdb_tool_path
593    gdb_tool_path = args.gdb
594
595    symbolizer = Symbolizer(args)
596    symbolizer.run()
597
598
599if __name__ == '__main__':
600    sys.exit(main())
601