1# SPDX-License-Identifier: GPL-2.0+ 2# Copyright (c) 2016 Google, Inc 3# Written by Simon Glass <sjg@chromium.org> 4# 5# Handle various things related to ELF images 6# 7 8from collections import namedtuple, OrderedDict 9import io 10import os 11import re 12import shutil 13import struct 14import tempfile 15 16from u_boot_pylib import command 17from u_boot_pylib import tools 18from u_boot_pylib import tout 19 20ELF_TOOLS = True 21try: 22 from elftools.elf.elffile import ELFFile 23 from elftools.elf.elffile import ELFError 24 from elftools.elf.sections import SymbolTableSection 25except: # pragma: no cover 26 ELF_TOOLS = False 27 28# BSYM in little endian, keep in sync with include/binman_sym.h 29BINMAN_SYM_MAGIC_VALUE = 0x4d595342 30 31# Information about an ELF symbol: 32# section (str): Name of the section containing this symbol 33# address (int): Address of the symbol (its value) 34# size (int): Size of the symbol in bytes 35# weak (bool): True if the symbol is weak 36# offset (int or None): Offset of the symbol's data in the ELF file, or None if 37# not known 38Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset']) 39 40# Information about an ELF file: 41# data: Extracted program contents of ELF file (this would be loaded by an 42# ELF loader when reading this file 43# load: Load address of code 44# entry: Entry address of code 45# memsize: Number of bytes in memory occupied by loading this ELF file 46ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize']) 47 48 49def GetSymbols(fname, patterns): 50 """Get the symbols from an ELF file 51 52 Args: 53 fname: Filename of the ELF file to read 54 patterns: List of regex patterns to search for, each a string 55 56 Returns: 57 None, if the file does not exist, or Dict: 58 key: Name of symbol 59 value: Hex value of symbol 60 """ 61 stdout = tools.run('objdump', '-t', fname) 62 lines = stdout.splitlines() 63 if patterns: 64 re_syms = re.compile('|'.join(patterns)) 65 else: 66 re_syms = None 67 syms = {} 68 syms_started = False 69 for line in lines: 70 if not line or not syms_started: 71 if 'SYMBOL TABLE' in line: 72 syms_started = True 73 line = None # Otherwise code coverage complains about 'continue' 74 continue 75 if re_syms and not re_syms.search(line): 76 continue 77 78 space_pos = line.find(' ') 79 value, rest = line[:space_pos], line[space_pos + 1:] 80 flags = rest[:7] 81 parts = rest[7:].split() 82 section, size = parts[:2] 83 if len(parts) > 2: 84 name = parts[2] if parts[2] != '.hidden' else parts[3] 85 syms[name] = Symbol(section, int(value, 16), int(size, 16), 86 flags[1] == 'w', None) 87 88 # Sort dict by address 89 return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address)) 90 91def _GetFileOffset(elf, addr): 92 """Get the file offset for an address 93 94 Args: 95 elf (ELFFile): ELF file to check 96 addr (int): Address to search for 97 98 Returns 99 int: Offset of that address in the ELF file, or None if not valid 100 """ 101 for seg in elf.iter_segments(): 102 seg_end = seg['p_vaddr'] + seg['p_filesz'] 103 if seg.header['p_type'] == 'PT_LOAD': 104 if addr >= seg['p_vaddr'] and addr < seg_end: 105 return addr - seg['p_vaddr'] + seg['p_offset'] 106 107def GetFileOffset(fname, addr): 108 """Get the file offset for an address 109 110 Args: 111 fname (str): Filename of ELF file to check 112 addr (int): Address to search for 113 114 Returns 115 int: Offset of that address in the ELF file, or None if not valid 116 """ 117 if not ELF_TOOLS: 118 raise ValueError("Python: No module named 'elftools'") 119 with open(fname, 'rb') as fd: 120 elf = ELFFile(fd) 121 return _GetFileOffset(elf, addr) 122 123def GetSymbolFromAddress(fname, addr): 124 """Get the symbol at a particular address 125 126 Args: 127 fname (str): Filename of ELF file to check 128 addr (int): Address to search for 129 130 Returns: 131 str: Symbol name, or None if no symbol at that address 132 """ 133 if not ELF_TOOLS: 134 raise ValueError("Python: No module named 'elftools'") 135 with open(fname, 'rb') as fd: 136 elf = ELFFile(fd) 137 syms = GetSymbols(fname, None) 138 for name, sym in syms.items(): 139 if sym.address == addr: 140 return name 141 142def GetSymbolFileOffset(fname, patterns): 143 """Get the symbols from an ELF file 144 145 Args: 146 fname: Filename of the ELF file to read 147 patterns: List of regex patterns to search for, each a string 148 149 Returns: 150 None, if the file does not exist, or Dict: 151 key: Name of symbol 152 value: Hex value of symbol 153 """ 154 if not ELF_TOOLS: 155 raise ValueError("Python: No module named 'elftools'") 156 157 syms = {} 158 with open(fname, 'rb') as fd: 159 elf = ELFFile(fd) 160 161 re_syms = re.compile('|'.join(patterns)) 162 for section in elf.iter_sections(): 163 if isinstance(section, SymbolTableSection): 164 for symbol in section.iter_symbols(): 165 if not re_syms or re_syms.search(symbol.name): 166 addr = symbol.entry['st_value'] 167 syms[symbol.name] = Symbol( 168 section.name, addr, symbol.entry['st_size'], 169 symbol.entry['st_info']['bind'] == 'STB_WEAK', 170 _GetFileOffset(elf, addr)) 171 172 # Sort dict by address 173 return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address)) 174 175def GetSymbolAddress(fname, sym_name): 176 """Get a value of a symbol from an ELF file 177 178 Args: 179 fname: Filename of the ELF file to read 180 patterns: List of regex patterns to search for, each a string 181 182 Returns: 183 Symbol value (as an integer) or None if not found 184 """ 185 syms = GetSymbols(fname, [sym_name]) 186 sym = syms.get(sym_name) 187 if not sym: 188 return None 189 return sym.address 190 191def GetPackString(sym, msg): 192 """Get the struct.pack/unpack string to use with a given symbol 193 194 Args: 195 sym (Symbol): Symbol to check. Only the size member is checked 196 @msg (str): String which indicates the entry being processed, used for 197 errors 198 199 Returns: 200 str: struct string to use, .e.g. '<I' 201 202 Raises: 203 ValueError: Symbol has an unexpected size 204 """ 205 if sym.size == 4: 206 return '<I' 207 elif sym.size == 8: 208 return '<Q' 209 else: 210 raise ValueError('%s has size %d: only 4 and 8 are supported' % 211 (msg, sym.size)) 212 213def GetSymbolOffset(elf_fname, sym_name, base_sym=None): 214 """Read the offset of a symbol compared to base symbol 215 216 This is useful for obtaining the value of a single symbol relative to the 217 base of a binary blob. 218 219 Args: 220 elf_fname: Filename of the ELF file to read 221 sym_name (str): Name of symbol to read 222 base_sym (str): Base symbol to sue to calculate the offset (or None to 223 use '__image_copy_start' 224 225 Returns: 226 int: Offset of the symbol relative to the base symbol 227 """ 228 if not base_sym: 229 base_sym = '__image_copy_start' 230 fname = tools.get_input_filename(elf_fname) 231 syms = GetSymbols(fname, [base_sym, sym_name]) 232 base = syms[base_sym].address 233 val = syms[sym_name].address 234 return val - base 235 236def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False, 237 base_sym=None, base_addr=None): 238 """Replace all symbols in an entry with their correct values 239 240 The entry contents is updated so that values for referenced symbols will be 241 visible at run time. This is done by finding out the symbols offsets in the 242 entry (using the ELF file) and replacing them with values from binman's data 243 structures. 244 245 Args: 246 elf_fname: Filename of ELF image containing the symbol information for 247 entry 248 entry: Entry to process 249 section: Section which can be used to lookup symbol values 250 base_sym: Base symbol marking the start of the image (__image_copy_start 251 by default) 252 base_addr (int): Base address to use for the entry being written. If 253 None then the value of base_sym is used 254 255 Returns: 256 int: Number of symbols written 257 """ 258 if not base_sym: 259 base_sym = '__image_copy_start' 260 fname = tools.get_input_filename(elf_fname) 261 syms = GetSymbols(fname, ['image', 'binman']) 262 if is_elf: 263 if not ELF_TOOLS: 264 msg = ("Section '%s': entry '%s'" % 265 (section.GetPath(), entry.GetPath())) 266 raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools') 267 new_syms = {} 268 with open(fname, 'rb') as fd: 269 elf = ELFFile(fd) 270 for name, sym in syms.items(): 271 offset = _GetFileOffset(elf, sym.address) 272 new_syms[name] = Symbol(sym.section, sym.address, sym.size, 273 sym.weak, offset) 274 syms = new_syms 275 276 if not syms: 277 tout.debug('LookupAndWriteSymbols: no syms') 278 return 0 279 base = syms.get(base_sym) 280 if not base and not is_elf: 281 tout.debug(f'LookupAndWriteSymbols: no base: elf_fname={elf_fname}, base_sym={base_sym}, is_elf={is_elf}') 282 return 0 283 if base_addr is None: 284 base_addr = 0 if is_elf else base.address 285 count = 0 286 for name, sym in syms.items(): 287 if name.startswith('_binman'): 288 msg = ("Section '%s': Symbol '%s'\n in entry '%s'" % 289 (section.GetPath(), name, entry.GetPath())) 290 if is_elf: 291 # For ELF files, use the file offset 292 offset = sym.offset 293 else: 294 # For blobs use the offset of the symbol, calculated by 295 # subtracting the base address which by definition is at the 296 # start 297 offset = sym.address - base.address 298 if offset < 0 or offset + sym.size > entry.contents_size: 299 raise ValueError('%s has offset %x (size %x) but the contents ' 300 'size is %x' % (entry.GetPath(), offset, 301 sym.size, 302 entry.contents_size)) 303 pack_string = GetPackString(sym, msg) 304 if name == '_binman_sym_magic': 305 value = BINMAN_SYM_MAGIC_VALUE 306 else: 307 # Look up the symbol in our entry tables. 308 value = section.GetImage().GetImageSymbolValue(name, sym.weak, 309 msg, base_addr) 310 if value is None: 311 value = -1 312 pack_string = pack_string.lower() 313 value_bytes = struct.pack(pack_string, value) 314 tout.debug('%s:\n insert %s, offset %x, value %x, length %d' % 315 (msg, name, offset, value, len(value_bytes))) 316 entry.data = (entry.data[:offset] + value_bytes + 317 entry.data[offset + sym.size:]) 318 count += 1 319 if count: 320 tout.detail( 321 f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols") 322 return count 323 324def GetSymbolValue(sym, data, msg): 325 """Get the value of a symbol 326 327 This can only be used on symbols with an integer value. 328 329 Args: 330 sym (Symbol): Symbol to check 331 data (butes): Data for the ELF file - the symbol data appears at offset 332 sym.offset 333 @msg (str): String which indicates the entry being processed, used for 334 errors 335 336 Returns: 337 int: Value of the symbol 338 339 Raises: 340 ValueError: Symbol has an unexpected size 341 """ 342 pack_string = GetPackString(sym, msg) 343 value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size]) 344 return value[0] 345 346def MakeElf(elf_fname, text, data): 347 """Make an elf file with the given data in a single section 348 349 The output file has a several section including '.text' and '.data', 350 containing the info provided in arguments. 351 352 Args: 353 elf_fname: Output filename 354 text: Text (code) to put in the file's .text section 355 data: Data to put in the file's .data section 356 """ 357 outdir = tempfile.mkdtemp(prefix='binman.elf.') 358 s_file = os.path.join(outdir, 'elf.S') 359 360 # Spilt the text into two parts so that we can make the entry point two 361 # bytes after the start of the text section 362 text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]] 363 text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]] 364 data_bytes = ['\t.byte\t%#x' % byte for byte in data] 365 with open(s_file, 'w') as fd: 366 print('''/* Auto-generated C program to produce an ELF file for testing */ 367 368.section .text 369.code32 370.globl _start 371.type _start, @function 372%s 373_start: 374%s 375.ident "comment" 376 377.comm fred,8,4 378 379.section .empty 380.globl _empty 381_empty: 382.byte 1 383 384.globl ernie 385.data 386.type ernie, @object 387.size ernie, 4 388ernie: 389%s 390''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)), 391 file=fd) 392 lds_file = os.path.join(outdir, 'elf.lds') 393 394 # Use a linker script to set the alignment and text address. 395 with open(lds_file, 'w') as fd: 396 print('''/* Auto-generated linker script to produce an ELF file for testing */ 397 398PHDRS 399{ 400 text PT_LOAD ; 401 data PT_LOAD ; 402 empty PT_LOAD FLAGS ( 6 ) ; 403 note PT_NOTE ; 404} 405 406SECTIONS 407{ 408 . = 0xfef20000; 409 ENTRY(_start) 410 .text . : SUBALIGN(0) 411 { 412 *(.text) 413 } :text 414 .data : { 415 *(.data) 416 } :data 417 _bss_start = .; 418 .empty : { 419 *(.empty) 420 } :empty 421 /DISCARD/ : { 422 *(.note.gnu.property) 423 } 424 .note : { 425 *(.comment) 426 } :note 427 .bss _bss_start (OVERLAY) : { 428 *(.bss) 429 } 430} 431''', file=fd) 432 # -static: Avoid requiring any shared libraries 433 # -nostdlib: Don't link with C library 434 # -Wl,--build-id=none: Don't generate a build ID, so that we just get the 435 # text section at the start 436 # -m32: Build for 32-bit x86 437 # -T...: Specifies the link script, which sets the start address 438 cc, args = tools.get_target_compile_tool('cc') 439 args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T', 440 lds_file, '-o', elf_fname, s_file] 441 stdout = command.output(cc, *args) 442 shutil.rmtree(outdir) 443 444def DecodeElf(data, location): 445 """Decode an ELF file and return information about it 446 447 Args: 448 data: Data from ELF file 449 location: Start address of data to return 450 451 Returns: 452 ElfInfo object containing information about the decoded ELF file 453 """ 454 if not ELF_TOOLS: 455 raise ValueError("Python: No module named 'elftools'") 456 file_size = len(data) 457 with io.BytesIO(data) as fd: 458 elf = ELFFile(fd) 459 data_start = 0xffffffff 460 data_end = 0 461 mem_end = 0 462 virt_to_phys = 0 463 464 for i in range(elf.num_segments()): 465 segment = elf.get_segment(i) 466 if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: 467 skipped = 1 # To make code-coverage see this line 468 continue 469 start = segment['p_paddr'] 470 mend = start + segment['p_memsz'] 471 rend = start + segment['p_filesz'] 472 data_start = min(data_start, start) 473 data_end = max(data_end, rend) 474 mem_end = max(mem_end, mend) 475 if not virt_to_phys: 476 virt_to_phys = segment['p_paddr'] - segment['p_vaddr'] 477 478 output = bytearray(data_end - data_start) 479 for i in range(elf.num_segments()): 480 segment = elf.get_segment(i) 481 if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: 482 skipped = 1 # To make code-coverage see this line 483 continue 484 start = segment['p_paddr'] 485 offset = 0 486 if start < location: 487 offset = location - start 488 start = location 489 # A legal ELF file can have a program header with non-zero length 490 # but zero-length file size and a non-zero offset which, added 491 # together, are greater than input->size (i.e. the total file size). 492 # So we need to not even test in the case that p_filesz is zero. 493 # Note: All of this code is commented out since we don't have a test 494 # case for it. 495 size = segment['p_filesz'] 496 #if not size: 497 #continue 498 #end = segment['p_offset'] + segment['p_filesz'] 499 #if end > file_size: 500 #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n', 501 #file_size, end) 502 output[start - data_start:start - data_start + size] = ( 503 segment.data()[offset:]) 504 return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys, 505 mem_end - data_start) 506 507def UpdateFile(infile, outfile, start_sym, end_sym, insert): 508 tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" % 509 (outfile, len(insert), len(insert), start_sym, end_sym)) 510 syms = GetSymbolFileOffset(infile, [start_sym, end_sym]) 511 if len(syms) != 2: 512 raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" % 513 (start_sym, end_sym, len(syms), 514 ','.join(syms.keys()))) 515 516 size = syms[end_sym].offset - syms[start_sym].offset 517 if len(insert) > size: 518 raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" % 519 (infile, len(insert), len(insert), size, size)) 520 521 data = tools.read_file(infile) 522 newdata = data[:syms[start_sym].offset] 523 newdata += insert + tools.get_bytes(0, size - len(insert)) 524 newdata += data[syms[end_sym].offset:] 525 tools.write_file(outfile, newdata) 526 tout.info('Written to offset %#x' % syms[start_sym].offset) 527 528def read_loadable_segments(data): 529 """Read segments from an ELF file 530 531 Args: 532 data (bytes): Contents of file 533 534 Returns: 535 tuple: 536 list of segments, each: 537 int: Segment number (0 = first) 538 int: Start address of segment in memory 539 bytes: Contents of segment 540 int: entry address for image 541 542 Raises: 543 ValueError: elftools is not available 544 """ 545 if not ELF_TOOLS: 546 raise ValueError("Python: No module named 'elftools'") 547 with io.BytesIO(data) as inf: 548 try: 549 elf = ELFFile(inf) 550 except ELFError as err: 551 raise ValueError(err) 552 entry = elf.header['e_entry'] 553 segments = [] 554 for i in range(elf.num_segments()): 555 segment = elf.get_segment(i) 556 if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: 557 skipped = 1 # To make code-coverage see this line 558 continue 559 start = segment['p_offset'] 560 rend = start + segment['p_filesz'] 561 segments.append((i, segment['p_paddr'], data[start:rend])) 562 return segments, entry 563 564def is_valid(data): 565 """Check if some binary data is a valid ELF file 566 567 Args: 568 data (bytes): Bytes to check 569 570 Returns: 571 bool: True if a valid Elf file, False if not 572 """ 573 try: 574 DecodeElf(data, 0) 575 return True 576 except ELFError: 577 return False 578