1#!/usr/bin/env python3
2#
3# Copyright (c) 2020 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Database Generator
9
10This takes the built Zephyr ELF binary and produces a JSON database
11file for dictionary-based logging. This database is used together
12with the parser to decode binary log messages.
13"""
14
15import argparse
16import logging
17import os
18import re
19import string
20import struct
21import sys
22
23import dictionary_parser.log_database
24import elftools
25from dictionary_parser.log_database import LogDatabase
26from dictionary_parser.utils import extract_one_string_in_section, find_string_in_mappings
27from elftools.dwarf.descriptions import describe_DWARF_expr
28from elftools.dwarf.locationlists import LocationExpr, LocationParser
29from elftools.elf.constants import SH_FLAGS
30from elftools.elf.descriptions import describe_ei_data
31from elftools.elf.elffile import ELFFile
32from elftools.elf.sections import SymbolTableSection
33
34LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
35logger = logging.getLogger(os.path.basename(sys.argv[0]))
36
37
38# Sections that contains static strings
39STATIC_STRING_SECTIONS = [
40    'rodata',
41    '.rodata',
42    'pinned.rodata',
43]
44
45# Sections that contains static strings but are not part of the binary (allocable).
46REMOVED_STRING_SECTIONS = [
47    'log_strings'
48]
49
50
51# Regulation expression to match DWARF location
52DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)")
53
54
55# Format string for pointers (default for 32-bit pointers)
56PTR_FMT = '0x%08x'
57
58
59# Potential string encodings. Add as needed.
60STR_ENCODINGS = [
61    'ascii',
62    'iso-8859-1',
63]
64
65
66# List of acceptable escape character
67ACCEPTABLE_ESCAPE_CHARS = [
68    b'\r',
69    b'\n',
70]
71
72
73def parse_args():
74    """Parse command line arguments"""
75    argparser = argparse.ArgumentParser(allow_abbrev=False)
76
77    argparser.add_argument("elffile", help="Zephyr ELF binary")
78    argparser.add_argument("--build", help="Build ID")
79    argparser.add_argument("--build-header",
80                           help="Header file containing BUILD_VERSION define")
81    argparser.add_argument("--debug", action="store_true",
82                           help="Print extra debugging information")
83    argparser.add_argument("-v", "--verbose", action="store_true",
84                           help="Print more information")
85
86    outfile_grp = argparser.add_mutually_exclusive_group(required=True)
87    outfile_grp.add_argument("--json",
88                             help="Output Dictionary Logging Database file in JSON")
89    outfile_grp.add_argument("--syst",
90                             help="Output MIPI Sys-T Collateral XML file")
91
92    return argparser.parse_args()
93
94
95def extract_elf_code_data_sections(elf, wildcards = None):
96    """Find all sections in ELF file"""
97    sections = {}
98
99    for sect in elf.iter_sections():
100        # Only Allocated sections with PROGBITS are included
101        # since they actually have code/data.
102        #
103        # On contrary, BSS is allocated but NOBITS.
104        if (((wildcards is not None) and (sect.name in wildcards)) or
105            ((sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC
106            and sect['sh_type'] == 'SHT_PROGBITS')
107        ):
108            sections[sect.name] = {
109                    'name'    : sect.name,
110                    'size'    : sect['sh_size'],
111                    'start'   : sect['sh_addr'],
112                    'end'     : sect['sh_addr'] + sect['sh_size'] - 1,
113                    'data'    : sect.data(),
114                }
115
116    return sections
117
118
119def find_elf_sections(elf, sh_name):
120    """Find all sections in ELF file"""
121    for section in elf.iter_sections():
122        if section.name == sh_name:
123            ret = {
124                'name'    : section.name,
125                'size'    : section['sh_size'],
126                'start'   : section['sh_addr'],
127                'end'     : section['sh_addr'] + section['sh_size'] - 1,
128                'data'    : section.data(),
129            }
130
131            return ret
132
133    return None
134
135
136def get_kconfig_symbols(elf):
137    """Get kconfig symbols from the ELF file"""
138    for section in elf.iter_sections():
139        if isinstance(section, SymbolTableSection) and section['sh_type'] != 'SHT_DYNSYM':
140            return {sym.name: sym.entry.st_value
141                    for sym in section.iter_symbols()
142                       if sym.name.startswith("CONFIG_")}
143
144    raise LookupError("Could not find symbol table")
145
146
147def find_log_const_symbols(elf):
148    """Extract all "log_const_*" symbols from ELF file"""
149    symbol_tables = [s for s in elf.iter_sections()
150                     if isinstance(s, elftools.elf.sections.SymbolTableSection)]
151
152    ret_list = []
153
154    for section in symbol_tables:
155        if not isinstance(section, elftools.elf.sections.SymbolTableSection):
156            continue
157
158        if section['sh_entsize'] == 0:
159            continue
160
161        for symbol in section.iter_symbols():
162            if symbol.name.startswith("log_const_") or symbol.name.startswith("_log_const_"):
163                ret_list.append(symbol)
164
165    return ret_list
166
167
168def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings):
169    """Find the log instances and map source IDs to names"""
170    if database.is_tgt_little_endian():
171        formatter = "<"
172    else:
173        formatter = ">"
174
175    if database.is_tgt_64bit():
176        # 64-bit pointer to string
177        formatter += "Q"
178    else:
179        # 32-bit pointer to string
180        formatter += "L"
181
182    # log instance level
183    formatter += "B"
184
185    datum_size = struct.calcsize(formatter)
186
187    # Get the address of first log instance
188    first_offset = log_const_symbols[0].entry['st_value']
189    for sym in log_const_symbols:
190        if sym.entry['st_value'] < first_offset:
191            first_offset = sym.entry['st_value']
192
193    first_offset -= log_const_area['start']
194
195    # find all log_const_*
196    for sym in log_const_symbols:
197        # Find data offset in log_const_area for this symbol
198        offset = sym.entry['st_value'] - log_const_area['start']
199
200        idx_s = offset
201        idx_e = offset + datum_size
202
203        datum = log_const_area['data'][idx_s:idx_e]
204
205        if len(datum) != datum_size:
206            # Not enough data to unpack
207            continue
208
209        if sym.entry['st_size'] == 0:
210            # Empty entry
211            continue
212
213        str_ptr, level = struct.unpack(formatter, datum)
214
215        # Offset to rodata section for string
216        instance_name = find_string_in_mappings(string_mappings, str_ptr)
217        if instance_name is None:
218            instance_name = "unknown"
219
220        logger.info("Found Log Instance: %s, level: %d", instance_name, level)
221
222        # source ID is simply the element index in the log instance array
223        source_id = int((offset - first_offset) / sym.entry['st_size'])
224
225        database.add_log_instance(source_id, instance_name, level, sym.entry['st_value'])
226
227
228def extract_elf_information(elf, database):
229    """Extract information from ELF file and store in database"""
230    e_ident = elf.header['e_ident']
231    elf_data = describe_ei_data(e_ident['EI_DATA'])
232
233    if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']:
234        database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN)
235    elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']:
236        database.set_tgt_endianness(LogDatabase.BIG_ENDIAN)
237    else:
238        logger.error("Cannot determine endianness from ELF file, exiting...")
239        sys.exit(1)
240
241
242def process_kconfigs(elf, database):
243    """Process kconfigs to extract information"""
244    kconfigs = get_kconfig_symbols(elf)
245
246    # 32 or 64-bit target
247    database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32)
248
249    # Architecture
250    for name, arch in dictionary_parser.log_database.ARCHS.items():
251        if arch['kconfig'] in kconfigs:
252            database.set_arch(name)
253            break
254    else:
255        logger.error("Did not found architecture")
256        sys.exit(1)
257
258    # Put some kconfigs into the database
259    #
260    # Use 32-bit timestamp? or 64-bit?
261    if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs:
262        database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT",
263                             kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
264
265
266def extract_logging_subsys_information(elf, database, string_mappings):
267    """
268    Extract logging subsys related information and store in database.
269
270    For example, this extracts the list of log instances to establish
271    mapping from source ID to name.
272    """
273    # Extract log constant section for module names
274    section_log_const = find_elf_sections(elf, "log_const_area")
275    if section_log_const is None:
276        # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas
277        section_log_const = find_elf_sections(elf, "log_static_section")
278
279    if section_log_const is None:
280        logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...")
281        sys.exit(1)
282
283    # Find all "log_const_*" symbols and parse them
284    log_const_symbols = find_log_const_symbols(elf)
285    parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings)
286
287
288def is_die_attr_ref(attr):
289    """
290    Returns True if the DIE attribute is a reference.
291    """
292    return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2',
293                              'DW_FORM_ref4', 'DW_FORM_ref8',
294                              'DW_FORM_ref'))
295
296
297def find_die_var_base_type(compile_unit, die, is_const):
298    """
299    Finds the base type of a DIE and returns the name.
300    If DW_AT_type is a reference, it will recursively go through
301    the references to find the base type. Returns None is no
302    base type is found.
303    """
304    # DIE is of base type. So extract the name.
305    if die.tag == 'DW_TAG_base_type':
306        return die.attributes['DW_AT_name'].value.decode('ascii'), is_const
307
308    # Not a type, cannot continue
309    if 'DW_AT_type' not in die.attributes:
310        return None, None
311
312    if die.tag == 'DW_TAG_const_type':
313        is_const = True
314
315    # DIE is probably a reference to another.
316    # If so, check if the reference is a base type.
317    type_attr = die.attributes['DW_AT_type']
318    if is_die_attr_ref(type_attr):
319        ref_addr = compile_unit.cu_offset + type_attr.raw_value
320        ref_die = compile_unit.get_DIE_from_refaddr(ref_addr)
321
322        return find_die_var_base_type(compile_unit, ref_die, is_const)
323
324    # Not a base type, and not reference
325    return None, None
326
327
328def is_die_var_const_char(compile_unit, die):
329    """
330    Returns True if DIE of type variable is const char.
331    """
332    var_type, is_const = find_die_var_base_type(compile_unit, die, False)
333
334    return bool(var_type is not None and var_type.endswith('char') and is_const)
335
336
337def extract_string_variables(elf):
338    """
339    Find all string variables (char) in all Compilation Units and
340    Debug information Entry (DIE) in ELF file.
341    """
342    dwarf_info = elf.get_dwarf_info()
343    loc_lists = dwarf_info.location_lists()
344    loc_parser = LocationParser(loc_lists)
345
346    strings = []
347
348    # Loop through all Compilation Units and
349    # Debug information Entry (DIE) to extract all string variables
350    for compile_unit in dwarf_info.iter_CUs():
351        for die in compile_unit.iter_DIEs():
352            # Only care about variables with location information
353            # and of type "char"
354            if die.tag == 'DW_TAG_variable' and ('DW_AT_type' in die.attributes
355                and 'DW_AT_location' in die.attributes
356                and is_die_var_const_char(compile_unit, die)
357            ):
358                # Extract location information, which is
359                # its address in memory.
360                loc_attr = die.attributes['DW_AT_location']
361                if loc_parser.attribute_has_location(loc_attr, die.cu['version']):
362                    loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'], die)
363                    if isinstance(loc, LocationExpr):
364                        try:
365                            addr = describe_DWARF_expr(loc.loc_expr,
366                                                    dwarf_info.structs)
367
368                            matcher = DT_LOCATION_REGEX.match(addr)
369                            if matcher:
370                                addr = int(matcher.group(1), 16)
371                                if addr > 0:
372                                    strings.append({
373                                        'name': die.attributes['DW_AT_name'].value,
374                                        'addr': addr,
375                                        'die': die
376                                    })
377                        except KeyError:
378                            pass
379
380    return strings
381
382def try_decode_string(str_maybe):
383    """Check if it is a printable string"""
384    for encoding in STR_ENCODINGS:
385        try:
386            return str_maybe.decode(encoding)
387        except UnicodeDecodeError:
388            pass
389
390    return None
391
392def is_printable(b):
393    # Check if string is printable according to Python
394    # since the parser (written in Python) will need to
395    # print the string.
396    #
397    # Note that '\r' and '\n' are not included in
398    # string.printable so they need to be checked separately.
399    return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)
400
401def extract_strings_in_one_section(section, str_mappings):
402    """Extract NULL-terminated strings in one ELF section"""
403    data = section['data']
404    start = None
405    for idx, x in enumerate(data):
406        if is_printable(chr(x)):
407            # Printable character, potential part of string
408            if start is None:
409                # Beginning of potential string
410                start = idx
411        elif x == 0:
412            # End of possible string
413            if start is not None:
414                # Found potential string
415                str_maybe = data[start : idx]
416                decoded_str = try_decode_string(str_maybe)
417
418                if decoded_str is not None:
419                    addr = section['start'] + start
420
421                    if addr not in str_mappings:
422                        str_mappings[addr] = decoded_str
423
424                        # Decoded string may contain un-printable characters
425                        # (e.g. extended ASC-II characters) or control
426                        # characters (e.g. '\r' or '\n'), so simply print
427                        # the byte string instead.
428                        logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
429                                     addr, str_maybe)
430
431                        # GCC-based toolchain will reuse the NULL character
432                        # for empty strings. There is no way to know which
433                        # one is being reused, so just treat all NULL character
434                        # at the end of legitimate strings as empty strings.
435                        null_addr = section['start'] + idx
436                        str_mappings[null_addr] = ''
437
438                        logger.debug('Found null string via extraction at ' + PTR_FMT,
439                                     null_addr)
440                start = None
441        else:
442            # Non-printable byte, remove start location
443            start = None
444
445    return str_mappings
446
447
448def extract_static_strings(elf, database, section_extraction=False):
449    """
450    Extract static strings from ELF file using DWARF,
451    and also extraction from binary data.
452    """
453    string_mappings = {}
454
455    elf_sections = extract_elf_code_data_sections(elf, REMOVED_STRING_SECTIONS)
456
457    # Extract strings using ELF DWARF information
458    str_vars = extract_string_variables(elf)
459    for str_var in str_vars:
460        for _, sect in elf_sections.items():
461            one_str = extract_one_string_in_section(sect, str_var['addr'])
462            if one_str is not None:
463                string_mappings[str_var['addr']] = one_str
464                logger.debug('Found string variable at ' + PTR_FMT + ': %s',
465                             str_var['addr'], one_str)
466                break
467
468    if section_extraction:
469        # Extract strings from ELF sections
470        string_sections = STATIC_STRING_SECTIONS
471        rawstr_map = {}
472
473        # Some architectures may put static strings into additional sections.
474        # So need to extract them too.
475        arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()]
476        if "extra_string_section" in arch_data:
477            string_sections.extend(arch_data['extra_string_section'])
478
479        for sect_name in string_sections:
480            if sect_name in elf_sections:
481                rawstr_map = extract_strings_in_one_section(elf_sections[sect_name],
482                                                                rawstr_map)
483
484        for one_str in rawstr_map:
485            if one_str not in string_mappings:
486                string_mappings[one_str] = rawstr_map[one_str]
487
488    return string_mappings
489
490
491def main():
492    """Main function of database generator"""
493    args = parse_args()
494
495    # Setup logging
496    logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING)
497    if args.debug:
498        logger.setLevel(logging.DEBUG)
499    elif args.verbose:
500        logger.setLevel(logging.INFO)
501
502    elffile = open(args.elffile, "rb")  # noqa: SIM115
503    if not elffile:
504        logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile)
505        sys.exit(1)
506
507    logger.info("ELF file %s", args.elffile)
508
509    if args.json:
510        logger.info("JSON Database file %s", args.json)
511        section_extraction = True
512
513    if args.syst:
514        logger.info("MIPI Sys-T Collateral file %s", args.syst)
515        section_extraction = False
516
517    elf = ELFFile(elffile)
518
519    database = LogDatabase()
520
521    if args.build_header:
522        with open(args.build_header) as f:
523            for line in f:
524                match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', line)
525                if match:
526                    database.set_build_id(match.group(1))
527                    break
528
529    if args.build:
530        database.set_build_id(args.build)
531        logger.info("Build ID: %s", args.build)
532
533    extract_elf_information(elf, database)
534
535    process_kconfigs(elf, database)
536
537    logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
538    if database.is_tgt_little_endian():
539        logger.info("Endianness: Little")
540    else:
541        logger.info("Endianness: Big")
542
543    if database.is_tgt_64bit():
544        global PTR_FMT
545        PTR_FMT = '0x%016x'
546
547    # Extract strings from ELF files
548    string_mappings = extract_static_strings(elf, database, section_extraction)
549    if len(string_mappings) > 0:
550        database.set_string_mappings(string_mappings)
551        logger.info("Found %d strings", len(string_mappings))
552
553    # Extract information related to logging subsystem
554    if not section_extraction:
555        # The logging subsys information (e.g. log module names)
556        # may require additional strings outside of those extracted
557        # via ELF DWARF variables. So generate a new string mappings
558        # with strings in various ELF sections.
559        string_mappings = extract_static_strings(elf, database, section_extraction=True)
560
561    extract_logging_subsys_information(elf, database, string_mappings)
562
563    # Write database file
564    if args.json and not LogDatabase.write_json_database(args.json, database):
565        logger.error("ERROR: Cannot open database file for write: %s, exiting...",
566                     args.json)
567        sys.exit(1)
568
569    if args.syst and not LogDatabase.write_syst_database(args.syst, database):
570        logger.error("ERROR: Cannot open database file for write: %s, exiting...",
571                     args.syst)
572        sys.exit(1)
573
574    elffile.close()
575
576
577if __name__ == "__main__":
578    main()
579