1#!/usr/bin/env python3 2# 3# Copyright (c) 2020 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Database Generator 9 10This takes the built Zephyr ELF binary and produces a JSON database 11file for dictionary-based logging. This database is used together 12with the parser to decode binary log messages. 13""" 14 15import argparse 16import logging 17import os 18import re 19import string 20import struct 21import sys 22 23import dictionary_parser.log_database 24import elftools 25from dictionary_parser.log_database import LogDatabase 26from dictionary_parser.utils import extract_one_string_in_section, find_string_in_mappings 27from elftools.dwarf.descriptions import describe_DWARF_expr 28from elftools.dwarf.locationlists import LocationExpr, LocationParser 29from elftools.elf.constants import SH_FLAGS 30from elftools.elf.descriptions import describe_ei_data 31from elftools.elf.elffile import ELFFile 32from elftools.elf.sections import SymbolTableSection 33 34LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s" 35logger = logging.getLogger(os.path.basename(sys.argv[0])) 36 37 38# Sections that contains static strings 39STATIC_STRING_SECTIONS = [ 40 'rodata', 41 '.rodata', 42 'pinned.rodata', 43] 44 45# Sections that contains static strings but are not part of the binary (allocable). 46REMOVED_STRING_SECTIONS = [ 47 'log_strings' 48] 49 50 51# Regulation expression to match DWARF location 52DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)") 53 54 55# Format string for pointers (default for 32-bit pointers) 56PTR_FMT = '0x%08x' 57 58 59# Potential string encodings. Add as needed. 60STR_ENCODINGS = [ 61 'ascii', 62 'iso-8859-1', 63] 64 65 66# List of acceptable escape character 67ACCEPTABLE_ESCAPE_CHARS = [ 68 b'\r', 69 b'\n', 70] 71 72 73def parse_args(): 74 """Parse command line arguments""" 75 argparser = argparse.ArgumentParser(allow_abbrev=False) 76 77 argparser.add_argument("elffile", help="Zephyr ELF binary") 78 argparser.add_argument("--build", help="Build ID") 79 argparser.add_argument("--build-header", 80 help="Header file containing BUILD_VERSION define") 81 argparser.add_argument("--debug", action="store_true", 82 help="Print extra debugging information") 83 argparser.add_argument("-v", "--verbose", action="store_true", 84 help="Print more information") 85 86 outfile_grp = argparser.add_mutually_exclusive_group(required=True) 87 outfile_grp.add_argument("--json", 88 help="Output Dictionary Logging Database file in JSON") 89 outfile_grp.add_argument("--syst", 90 help="Output MIPI Sys-T Collateral XML file") 91 92 return argparser.parse_args() 93 94 95def extract_elf_code_data_sections(elf, wildcards = None): 96 """Find all sections in ELF file""" 97 sections = {} 98 99 for sect in elf.iter_sections(): 100 # Only Allocated sections with PROGBITS are included 101 # since they actually have code/data. 102 # 103 # On contrary, BSS is allocated but NOBITS. 104 if (((wildcards is not None) and (sect.name in wildcards)) or 105 ((sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC 106 and sect['sh_type'] == 'SHT_PROGBITS') 107 ): 108 sections[sect.name] = { 109 'name' : sect.name, 110 'size' : sect['sh_size'], 111 'start' : sect['sh_addr'], 112 'end' : sect['sh_addr'] + sect['sh_size'] - 1, 113 'data' : sect.data(), 114 } 115 116 return sections 117 118 119def find_elf_sections(elf, sh_name): 120 """Find all sections in ELF file""" 121 for section in elf.iter_sections(): 122 if section.name == sh_name: 123 ret = { 124 'name' : section.name, 125 'size' : section['sh_size'], 126 'start' : section['sh_addr'], 127 'end' : section['sh_addr'] + section['sh_size'] - 1, 128 'data' : section.data(), 129 } 130 131 return ret 132 133 return None 134 135 136def get_kconfig_symbols(elf): 137 """Get kconfig symbols from the ELF file""" 138 for section in elf.iter_sections(): 139 if isinstance(section, SymbolTableSection) and section['sh_type'] != 'SHT_DYNSYM': 140 return {sym.name: sym.entry.st_value 141 for sym in section.iter_symbols() 142 if sym.name.startswith("CONFIG_")} 143 144 raise LookupError("Could not find symbol table") 145 146 147def find_log_const_symbols(elf): 148 """Extract all "log_const_*" symbols from ELF file""" 149 symbol_tables = [s for s in elf.iter_sections() 150 if isinstance(s, elftools.elf.sections.SymbolTableSection)] 151 152 ret_list = [] 153 154 for section in symbol_tables: 155 if not isinstance(section, elftools.elf.sections.SymbolTableSection): 156 continue 157 158 if section['sh_entsize'] == 0: 159 continue 160 161 for symbol in section.iter_symbols(): 162 if symbol.name.startswith("log_const_") or symbol.name.startswith("_log_const_"): 163 ret_list.append(symbol) 164 165 return ret_list 166 167 168def parse_log_const_symbols(database, log_const_area, log_const_symbols, string_mappings): 169 """Find the log instances and map source IDs to names""" 170 if database.is_tgt_little_endian(): 171 formatter = "<" 172 else: 173 formatter = ">" 174 175 if database.is_tgt_64bit(): 176 # 64-bit pointer to string 177 formatter += "Q" 178 else: 179 # 32-bit pointer to string 180 formatter += "L" 181 182 # log instance level 183 formatter += "B" 184 185 datum_size = struct.calcsize(formatter) 186 187 # Get the address of first log instance 188 first_offset = log_const_symbols[0].entry['st_value'] 189 for sym in log_const_symbols: 190 if sym.entry['st_value'] < first_offset: 191 first_offset = sym.entry['st_value'] 192 193 first_offset -= log_const_area['start'] 194 195 # find all log_const_* 196 for sym in log_const_symbols: 197 # Find data offset in log_const_area for this symbol 198 offset = sym.entry['st_value'] - log_const_area['start'] 199 200 idx_s = offset 201 idx_e = offset + datum_size 202 203 datum = log_const_area['data'][idx_s:idx_e] 204 205 if len(datum) != datum_size: 206 # Not enough data to unpack 207 continue 208 209 if sym.entry['st_size'] == 0: 210 # Empty entry 211 continue 212 213 str_ptr, level = struct.unpack(formatter, datum) 214 215 # Offset to rodata section for string 216 instance_name = find_string_in_mappings(string_mappings, str_ptr) 217 if instance_name is None: 218 instance_name = "unknown" 219 220 logger.info("Found Log Instance: %s, level: %d", instance_name, level) 221 222 # source ID is simply the element index in the log instance array 223 source_id = int((offset - first_offset) / sym.entry['st_size']) 224 225 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value']) 226 227 228def extract_elf_information(elf, database): 229 """Extract information from ELF file and store in database""" 230 e_ident = elf.header['e_ident'] 231 elf_data = describe_ei_data(e_ident['EI_DATA']) 232 233 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']: 234 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN) 235 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']: 236 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN) 237 else: 238 logger.error("Cannot determine endianness from ELF file, exiting...") 239 sys.exit(1) 240 241 242def process_kconfigs(elf, database): 243 """Process kconfigs to extract information""" 244 kconfigs = get_kconfig_symbols(elf) 245 246 # 32 or 64-bit target 247 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32) 248 249 # Architecture 250 for name, arch in dictionary_parser.log_database.ARCHS.items(): 251 if arch['kconfig'] in kconfigs: 252 database.set_arch(name) 253 break 254 else: 255 logger.error("Did not found architecture") 256 sys.exit(1) 257 258 # Put some kconfigs into the database 259 # 260 # Use 32-bit timestamp? or 64-bit? 261 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs: 262 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT", 263 kconfigs['CONFIG_LOG_TIMESTAMP_64BIT']) 264 265 266def extract_logging_subsys_information(elf, database, string_mappings): 267 """ 268 Extract logging subsys related information and store in database. 269 270 For example, this extracts the list of log instances to establish 271 mapping from source ID to name. 272 """ 273 # Extract log constant section for module names 274 section_log_const = find_elf_sections(elf, "log_const_area") 275 if section_log_const is None: 276 # ESP32 puts "log_const_*" info log_static_section instead of log_const_areas 277 section_log_const = find_elf_sections(elf, "log_static_section") 278 279 if section_log_const is None: 280 logger.error("Cannot find section 'log_const_areas' in ELF file, exiting...") 281 sys.exit(1) 282 283 # Find all "log_const_*" symbols and parse them 284 log_const_symbols = find_log_const_symbols(elf) 285 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings) 286 287 288def is_die_attr_ref(attr): 289 """ 290 Returns True if the DIE attribute is a reference. 291 """ 292 return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 293 'DW_FORM_ref4', 'DW_FORM_ref8', 294 'DW_FORM_ref')) 295 296 297def find_die_var_base_type(compile_unit, die, is_const): 298 """ 299 Finds the base type of a DIE and returns the name. 300 If DW_AT_type is a reference, it will recursively go through 301 the references to find the base type. Returns None is no 302 base type is found. 303 """ 304 # DIE is of base type. So extract the name. 305 if die.tag == 'DW_TAG_base_type': 306 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const 307 308 # Not a type, cannot continue 309 if 'DW_AT_type' not in die.attributes: 310 return None, None 311 312 if die.tag == 'DW_TAG_const_type': 313 is_const = True 314 315 # DIE is probably a reference to another. 316 # If so, check if the reference is a base type. 317 type_attr = die.attributes['DW_AT_type'] 318 if is_die_attr_ref(type_attr): 319 ref_addr = compile_unit.cu_offset + type_attr.raw_value 320 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr) 321 322 return find_die_var_base_type(compile_unit, ref_die, is_const) 323 324 # Not a base type, and not reference 325 return None, None 326 327 328def is_die_var_const_char(compile_unit, die): 329 """ 330 Returns True if DIE of type variable is const char. 331 """ 332 var_type, is_const = find_die_var_base_type(compile_unit, die, False) 333 334 return bool(var_type is not None and var_type.endswith('char') and is_const) 335 336 337def extract_string_variables(elf): 338 """ 339 Find all string variables (char) in all Compilation Units and 340 Debug information Entry (DIE) in ELF file. 341 """ 342 dwarf_info = elf.get_dwarf_info() 343 loc_lists = dwarf_info.location_lists() 344 loc_parser = LocationParser(loc_lists) 345 346 strings = [] 347 348 # Loop through all Compilation Units and 349 # Debug information Entry (DIE) to extract all string variables 350 for compile_unit in dwarf_info.iter_CUs(): 351 for die in compile_unit.iter_DIEs(): 352 # Only care about variables with location information 353 # and of type "char" 354 if die.tag == 'DW_TAG_variable' and ('DW_AT_type' in die.attributes 355 and 'DW_AT_location' in die.attributes 356 and is_die_var_const_char(compile_unit, die) 357 ): 358 # Extract location information, which is 359 # its address in memory. 360 loc_attr = die.attributes['DW_AT_location'] 361 if loc_parser.attribute_has_location(loc_attr, die.cu['version']): 362 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'], die) 363 if isinstance(loc, LocationExpr): 364 try: 365 addr = describe_DWARF_expr(loc.loc_expr, 366 dwarf_info.structs) 367 368 matcher = DT_LOCATION_REGEX.match(addr) 369 if matcher: 370 addr = int(matcher.group(1), 16) 371 if addr > 0: 372 strings.append({ 373 'name': die.attributes['DW_AT_name'].value, 374 'addr': addr, 375 'die': die 376 }) 377 except KeyError: 378 pass 379 380 return strings 381 382def try_decode_string(str_maybe): 383 """Check if it is a printable string""" 384 for encoding in STR_ENCODINGS: 385 try: 386 return str_maybe.decode(encoding) 387 except UnicodeDecodeError: 388 pass 389 390 return None 391 392def is_printable(b): 393 # Check if string is printable according to Python 394 # since the parser (written in Python) will need to 395 # print the string. 396 # 397 # Note that '\r' and '\n' are not included in 398 # string.printable so they need to be checked separately. 399 return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS) 400 401def extract_strings_in_one_section(section, str_mappings): 402 """Extract NULL-terminated strings in one ELF section""" 403 data = section['data'] 404 start = None 405 for idx, x in enumerate(data): 406 if is_printable(chr(x)): 407 # Printable character, potential part of string 408 if start is None: 409 # Beginning of potential string 410 start = idx 411 elif x == 0: 412 # End of possible string 413 if start is not None: 414 # Found potential string 415 str_maybe = data[start : idx] 416 decoded_str = try_decode_string(str_maybe) 417 418 if decoded_str is not None: 419 addr = section['start'] + start 420 421 if addr not in str_mappings: 422 str_mappings[addr] = decoded_str 423 424 # Decoded string may contain un-printable characters 425 # (e.g. extended ASC-II characters) or control 426 # characters (e.g. '\r' or '\n'), so simply print 427 # the byte string instead. 428 logger.debug('Found string via extraction at ' + PTR_FMT + ': %s', 429 addr, str_maybe) 430 431 # GCC-based toolchain will reuse the NULL character 432 # for empty strings. There is no way to know which 433 # one is being reused, so just treat all NULL character 434 # at the end of legitimate strings as empty strings. 435 null_addr = section['start'] + idx 436 str_mappings[null_addr] = '' 437 438 logger.debug('Found null string via extraction at ' + PTR_FMT, 439 null_addr) 440 start = None 441 else: 442 # Non-printable byte, remove start location 443 start = None 444 445 return str_mappings 446 447 448def extract_static_strings(elf, database, section_extraction=False): 449 """ 450 Extract static strings from ELF file using DWARF, 451 and also extraction from binary data. 452 """ 453 string_mappings = {} 454 455 elf_sections = extract_elf_code_data_sections(elf, REMOVED_STRING_SECTIONS) 456 457 # Extract strings using ELF DWARF information 458 str_vars = extract_string_variables(elf) 459 for str_var in str_vars: 460 for _, sect in elf_sections.items(): 461 one_str = extract_one_string_in_section(sect, str_var['addr']) 462 if one_str is not None: 463 string_mappings[str_var['addr']] = one_str 464 logger.debug('Found string variable at ' + PTR_FMT + ': %s', 465 str_var['addr'], one_str) 466 break 467 468 if section_extraction: 469 # Extract strings from ELF sections 470 string_sections = STATIC_STRING_SECTIONS 471 rawstr_map = {} 472 473 # Some architectures may put static strings into additional sections. 474 # So need to extract them too. 475 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()] 476 if "extra_string_section" in arch_data: 477 string_sections.extend(arch_data['extra_string_section']) 478 479 for sect_name in string_sections: 480 if sect_name in elf_sections: 481 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name], 482 rawstr_map) 483 484 for one_str in rawstr_map: 485 if one_str not in string_mappings: 486 string_mappings[one_str] = rawstr_map[one_str] 487 488 return string_mappings 489 490 491def main(): 492 """Main function of database generator""" 493 args = parse_args() 494 495 # Setup logging 496 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING) 497 if args.debug: 498 logger.setLevel(logging.DEBUG) 499 elif args.verbose: 500 logger.setLevel(logging.INFO) 501 502 elffile = open(args.elffile, "rb") # noqa: SIM115 503 if not elffile: 504 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile) 505 sys.exit(1) 506 507 logger.info("ELF file %s", args.elffile) 508 509 if args.json: 510 logger.info("JSON Database file %s", args.json) 511 section_extraction = True 512 513 if args.syst: 514 logger.info("MIPI Sys-T Collateral file %s", args.syst) 515 section_extraction = False 516 517 elf = ELFFile(elffile) 518 519 database = LogDatabase() 520 521 if args.build_header: 522 with open(args.build_header) as f: 523 for line in f: 524 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', line) 525 if match: 526 database.set_build_id(match.group(1)) 527 break 528 529 if args.build: 530 database.set_build_id(args.build) 531 logger.info("Build ID: %s", args.build) 532 533 extract_elf_information(elf, database) 534 535 process_kconfigs(elf, database) 536 537 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) 538 if database.is_tgt_little_endian(): 539 logger.info("Endianness: Little") 540 else: 541 logger.info("Endianness: Big") 542 543 if database.is_tgt_64bit(): 544 global PTR_FMT 545 PTR_FMT = '0x%016x' 546 547 # Extract strings from ELF files 548 string_mappings = extract_static_strings(elf, database, section_extraction) 549 if len(string_mappings) > 0: 550 database.set_string_mappings(string_mappings) 551 logger.info("Found %d strings", len(string_mappings)) 552 553 # Extract information related to logging subsystem 554 if not section_extraction: 555 # The logging subsys information (e.g. log module names) 556 # may require additional strings outside of those extracted 557 # via ELF DWARF variables. So generate a new string mappings 558 # with strings in various ELF sections. 559 string_mappings = extract_static_strings(elf, database, section_extraction=True) 560 561 extract_logging_subsys_information(elf, database, string_mappings) 562 563 # Write database file 564 if args.json and not LogDatabase.write_json_database(args.json, database): 565 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 566 args.json) 567 sys.exit(1) 568 569 if args.syst and not LogDatabase.write_syst_database(args.syst, database): 570 logger.error("ERROR: Cannot open database file for write: %s, exiting...", 571 args.syst) 572 sys.exit(1) 573 574 elffile.close() 575 576 577if __name__ == "__main__": 578 main() 579