1# SPDX-License-Identifier: GPL-2.0+
2# Copyright (c) 2016 Google, Inc
3# Written by Simon Glass <sjg@chromium.org>
4#
5# Handle various things related to ELF images
6#
7
8from collections import namedtuple, OrderedDict
9import io
10import os
11import re
12import shutil
13import struct
14import tempfile
15
16from u_boot_pylib import command
17from u_boot_pylib import tools
18from u_boot_pylib import tout
19
20ELF_TOOLS = True
21try:
22    from elftools.elf.elffile import ELFFile
23    from elftools.elf.elffile import ELFError
24    from elftools.elf.sections import SymbolTableSection
25except:  # pragma: no cover
26    ELF_TOOLS = False
27
28# BSYM in little endian, keep in sync with include/binman_sym.h
29BINMAN_SYM_MAGIC_VALUE = 0x4d595342
30
31# Information about an ELF symbol:
32# section (str): Name of the section containing this symbol
33# address (int): Address of the symbol (its value)
34# size (int): Size of the symbol in bytes
35# weak (bool): True if the symbol is weak
36# offset (int or None): Offset of the symbol's data in the ELF file, or None if
37#   not known
38Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
39
40# Information about an ELF file:
41#    data: Extracted program contents of ELF file (this would be loaded by an
42#           ELF loader when reading this file
43#    load: Load address of code
44#    entry: Entry address of code
45#    memsize: Number of bytes in memory occupied by loading this ELF file
46ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
47
48
49def GetSymbols(fname, patterns):
50    """Get the symbols from an ELF file
51
52    Args:
53        fname: Filename of the ELF file to read
54        patterns: List of regex patterns to search for, each a string
55
56    Returns:
57        None, if the file does not exist, or Dict:
58          key: Name of symbol
59          value: Hex value of symbol
60    """
61    stdout = tools.run('objdump', '-t', fname)
62    lines = stdout.splitlines()
63    if patterns:
64        re_syms = re.compile('|'.join(patterns))
65    else:
66        re_syms = None
67    syms = {}
68    syms_started = False
69    for line in lines:
70        if not line or not syms_started:
71            if 'SYMBOL TABLE' in line:
72                syms_started = True
73            line = None  # Otherwise code coverage complains about 'continue'
74            continue
75        if re_syms and not re_syms.search(line):
76            continue
77
78        space_pos = line.find(' ')
79        value, rest = line[:space_pos], line[space_pos + 1:]
80        flags = rest[:7]
81        parts = rest[7:].split()
82        section, size =  parts[:2]
83        if len(parts) > 2:
84            name = parts[2] if parts[2] != '.hidden' else parts[3]
85            syms[name] = Symbol(section, int(value, 16), int(size, 16),
86                                flags[1] == 'w', None)
87
88    # Sort dict by address
89    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
90
91def _GetFileOffset(elf, addr):
92    """Get the file offset for an address
93
94    Args:
95        elf (ELFFile): ELF file to check
96        addr (int): Address to search for
97
98    Returns
99        int: Offset of that address in the ELF file, or None if not valid
100    """
101    for seg in elf.iter_segments():
102        seg_end = seg['p_vaddr'] + seg['p_filesz']
103        if seg.header['p_type'] == 'PT_LOAD':
104            if addr >= seg['p_vaddr'] and addr < seg_end:
105                return addr - seg['p_vaddr'] + seg['p_offset']
106
107def GetFileOffset(fname, addr):
108    """Get the file offset for an address
109
110    Args:
111        fname (str): Filename of ELF file to check
112        addr (int): Address to search for
113
114    Returns
115        int: Offset of that address in the ELF file, or None if not valid
116    """
117    if not ELF_TOOLS:
118        raise ValueError("Python: No module named 'elftools'")
119    with open(fname, 'rb') as fd:
120        elf = ELFFile(fd)
121        return _GetFileOffset(elf, addr)
122
123def GetSymbolFromAddress(fname, addr):
124    """Get the symbol at a particular address
125
126    Args:
127        fname (str): Filename of ELF file to check
128        addr (int): Address to search for
129
130    Returns:
131        str: Symbol name, or None if no symbol at that address
132    """
133    if not ELF_TOOLS:
134        raise ValueError("Python: No module named 'elftools'")
135    with open(fname, 'rb') as fd:
136        elf = ELFFile(fd)
137        syms = GetSymbols(fname, None)
138    for name, sym in syms.items():
139        if sym.address == addr:
140            return name
141
142def GetSymbolFileOffset(fname, patterns):
143    """Get the symbols from an ELF file
144
145    Args:
146        fname: Filename of the ELF file to read
147        patterns: List of regex patterns to search for, each a string
148
149    Returns:
150        None, if the file does not exist, or Dict:
151          key: Name of symbol
152          value: Hex value of symbol
153    """
154    if not ELF_TOOLS:
155        raise ValueError("Python: No module named 'elftools'")
156
157    syms = {}
158    with open(fname, 'rb') as fd:
159        elf = ELFFile(fd)
160
161        re_syms = re.compile('|'.join(patterns))
162        for section in elf.iter_sections():
163            if isinstance(section, SymbolTableSection):
164                for symbol in section.iter_symbols():
165                    if not re_syms or re_syms.search(symbol.name):
166                        addr = symbol.entry['st_value']
167                        syms[symbol.name] = Symbol(
168                            section.name, addr, symbol.entry['st_size'],
169                            symbol.entry['st_info']['bind'] == 'STB_WEAK',
170                            _GetFileOffset(elf, addr))
171
172    # Sort dict by address
173    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
174
175def GetSymbolAddress(fname, sym_name):
176    """Get a value of a symbol from an ELF file
177
178    Args:
179        fname: Filename of the ELF file to read
180        patterns: List of regex patterns to search for, each a string
181
182    Returns:
183        Symbol value (as an integer) or None if not found
184    """
185    syms = GetSymbols(fname, [sym_name])
186    sym = syms.get(sym_name)
187    if not sym:
188        return None
189    return sym.address
190
191def GetPackString(sym, msg):
192    """Get the struct.pack/unpack string to use with a given symbol
193
194    Args:
195        sym (Symbol): Symbol to check. Only the size member is checked
196        @msg (str): String which indicates the entry being processed, used for
197            errors
198
199    Returns:
200        str: struct string to use, .e.g. '<I'
201
202    Raises:
203        ValueError: Symbol has an unexpected size
204    """
205    if sym.size == 4:
206        return '<I'
207    elif sym.size == 8:
208        return '<Q'
209    else:
210        raise ValueError('%s has size %d: only 4 and 8 are supported' %
211                         (msg, sym.size))
212
213def GetSymbolOffset(elf_fname, sym_name, base_sym=None):
214    """Read the offset of a symbol compared to base symbol
215
216    This is useful for obtaining the value of a single symbol relative to the
217    base of a binary blob.
218
219    Args:
220        elf_fname: Filename of the ELF file to read
221        sym_name (str): Name of symbol to read
222        base_sym (str): Base symbol to sue to calculate the offset (or None to
223            use '__image_copy_start'
224
225    Returns:
226        int: Offset of the symbol relative to the base symbol
227    """
228    if not base_sym:
229        base_sym = '__image_copy_start'
230    fname = tools.get_input_filename(elf_fname)
231    syms = GetSymbols(fname, [base_sym, sym_name])
232    base = syms[base_sym].address
233    val = syms[sym_name].address
234    return val - base
235
236def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False,
237                          base_sym=None, base_addr=None):
238    """Replace all symbols in an entry with their correct values
239
240    The entry contents is updated so that values for referenced symbols will be
241    visible at run time. This is done by finding out the symbols offsets in the
242    entry (using the ELF file) and replacing them with values from binman's data
243    structures.
244
245    Args:
246        elf_fname: Filename of ELF image containing the symbol information for
247            entry
248        entry: Entry to process
249        section: Section which can be used to lookup symbol values
250        base_sym: Base symbol marking the start of the image (__image_copy_start
251            by default)
252        base_addr (int): Base address to use for the entry being written. If
253            None then the value of base_sym is used
254
255    Returns:
256        int: Number of symbols written
257    """
258    if not base_sym:
259        base_sym = '__image_copy_start'
260    fname = tools.get_input_filename(elf_fname)
261    syms = GetSymbols(fname, ['image', 'binman'])
262    if is_elf:
263        if not ELF_TOOLS:
264            msg = ("Section '%s': entry '%s'" %
265                   (section.GetPath(), entry.GetPath()))
266            raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools')
267        new_syms = {}
268        with open(fname, 'rb') as fd:
269            elf = ELFFile(fd)
270            for name, sym in syms.items():
271                offset = _GetFileOffset(elf, sym.address)
272                new_syms[name] = Symbol(sym.section, sym.address, sym.size,
273                                        sym.weak, offset)
274            syms = new_syms
275
276    if not syms:
277        tout.debug('LookupAndWriteSymbols: no syms')
278        return 0
279    base = syms.get(base_sym)
280    if not base and not is_elf:
281        tout.debug(f'LookupAndWriteSymbols: no base: elf_fname={elf_fname}, base_sym={base_sym}, is_elf={is_elf}')
282        return 0
283    if base_addr is None:
284        base_addr = 0 if is_elf else base.address
285    count = 0
286    for name, sym in syms.items():
287        if name.startswith('_binman'):
288            msg = ("Section '%s': Symbol '%s'\n   in entry '%s'" %
289                   (section.GetPath(), name, entry.GetPath()))
290            if is_elf:
291                # For ELF files, use the file offset
292                offset = sym.offset
293            else:
294                # For blobs use the offset of the symbol, calculated by
295                # subtracting the base address which by definition is at the
296                # start
297                offset = sym.address - base.address
298                if offset < 0 or offset + sym.size > entry.contents_size:
299                    raise ValueError('%s has offset %x (size %x) but the contents '
300                                     'size is %x' % (entry.GetPath(), offset,
301                                                     sym.size,
302                                                     entry.contents_size))
303            pack_string = GetPackString(sym, msg)
304            if name == '_binman_sym_magic':
305                value = BINMAN_SYM_MAGIC_VALUE
306            else:
307                # Look up the symbol in our entry tables.
308                value = section.GetImage().GetImageSymbolValue(name, sym.weak,
309                                                               msg, base_addr)
310            if value is None:
311                value = -1
312                pack_string = pack_string.lower()
313            value_bytes = struct.pack(pack_string, value)
314            tout.debug('%s:\n   insert %s, offset %x, value %x, length %d' %
315                       (msg, name, offset, value, len(value_bytes)))
316            entry.data = (entry.data[:offset] + value_bytes +
317                        entry.data[offset + sym.size:])
318            count += 1
319    if count:
320        tout.detail(
321            f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols")
322    return count
323
324def GetSymbolValue(sym, data, msg):
325    """Get the value of a symbol
326
327    This can only be used on symbols with an integer value.
328
329    Args:
330        sym (Symbol): Symbol to check
331        data (butes): Data for the ELF file - the symbol data appears at offset
332            sym.offset
333        @msg (str): String which indicates the entry being processed, used for
334            errors
335
336    Returns:
337        int: Value of the symbol
338
339    Raises:
340        ValueError: Symbol has an unexpected size
341    """
342    pack_string = GetPackString(sym, msg)
343    value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size])
344    return value[0]
345
346def MakeElf(elf_fname, text, data):
347    """Make an elf file with the given data in a single section
348
349    The output file has a several section including '.text' and '.data',
350    containing the info provided in arguments.
351
352    Args:
353        elf_fname: Output filename
354        text: Text (code) to put in the file's .text section
355        data: Data to put in the file's .data section
356    """
357    outdir = tempfile.mkdtemp(prefix='binman.elf.')
358    s_file = os.path.join(outdir, 'elf.S')
359
360    # Spilt the text into two parts so that we can make the entry point two
361    # bytes after the start of the text section
362    text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
363    text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
364    data_bytes = ['\t.byte\t%#x' % byte for byte in data]
365    with open(s_file, 'w') as fd:
366        print('''/* Auto-generated C program to produce an ELF file for testing */
367
368.section .text
369.code32
370.globl _start
371.type _start, @function
372%s
373_start:
374%s
375.ident "comment"
376
377.comm fred,8,4
378
379.section .empty
380.globl _empty
381_empty:
382.byte 1
383
384.globl ernie
385.data
386.type ernie, @object
387.size ernie, 4
388ernie:
389%s
390''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
391        file=fd)
392    lds_file = os.path.join(outdir, 'elf.lds')
393
394    # Use a linker script to set the alignment and text address.
395    with open(lds_file, 'w') as fd:
396        print('''/* Auto-generated linker script to produce an ELF file for testing */
397
398PHDRS
399{
400    text PT_LOAD ;
401    data PT_LOAD ;
402    empty PT_LOAD FLAGS ( 6 ) ;
403    note PT_NOTE ;
404}
405
406SECTIONS
407{
408    . = 0xfef20000;
409    ENTRY(_start)
410    .text . : SUBALIGN(0)
411    {
412        *(.text)
413    } :text
414    .data : {
415        *(.data)
416    } :data
417    _bss_start = .;
418    .empty : {
419        *(.empty)
420    } :empty
421    /DISCARD/ : {
422        *(.note.gnu.property)
423    }
424    .note : {
425        *(.comment)
426    } :note
427    .bss _bss_start  (OVERLAY) : {
428        *(.bss)
429    }
430}
431''', file=fd)
432    # -static: Avoid requiring any shared libraries
433    # -nostdlib: Don't link with C library
434    # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
435    #   text section at the start
436    # -m32: Build for 32-bit x86
437    # -T...: Specifies the link script, which sets the start address
438    cc, args = tools.get_target_compile_tool('cc')
439    args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
440            lds_file, '-o', elf_fname, s_file]
441    stdout = command.output(cc, *args)
442    shutil.rmtree(outdir)
443
444def DecodeElf(data, location):
445    """Decode an ELF file and return information about it
446
447    Args:
448        data: Data from ELF file
449        location: Start address of data to return
450
451    Returns:
452        ElfInfo object containing information about the decoded ELF file
453    """
454    if not ELF_TOOLS:
455        raise ValueError("Python: No module named 'elftools'")
456    file_size = len(data)
457    with io.BytesIO(data) as fd:
458        elf = ELFFile(fd)
459        data_start = 0xffffffff
460        data_end = 0
461        mem_end = 0
462        virt_to_phys = 0
463
464        for i in range(elf.num_segments()):
465            segment = elf.get_segment(i)
466            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
467                skipped = 1  # To make code-coverage see this line
468                continue
469            start = segment['p_paddr']
470            mend = start + segment['p_memsz']
471            rend = start + segment['p_filesz']
472            data_start = min(data_start, start)
473            data_end = max(data_end, rend)
474            mem_end = max(mem_end, mend)
475            if not virt_to_phys:
476                virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
477
478        output = bytearray(data_end - data_start)
479        for i in range(elf.num_segments()):
480            segment = elf.get_segment(i)
481            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
482                skipped = 1  # To make code-coverage see this line
483                continue
484            start = segment['p_paddr']
485            offset = 0
486            if start < location:
487                offset = location - start
488                start = location
489            # A legal ELF file can have a program header with non-zero length
490            # but zero-length file size and a non-zero offset which, added
491            # together, are greater than input->size (i.e. the total file size).
492            #  So we need to not even test in the case that p_filesz is zero.
493            # Note: All of this code is commented out since we don't have a test
494            # case for it.
495            size = segment['p_filesz']
496            #if not size:
497                #continue
498            #end = segment['p_offset'] + segment['p_filesz']
499            #if end > file_size:
500                #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
501                                 #file_size, end)
502            output[start - data_start:start - data_start + size] = (
503                segment.data()[offset:])
504    return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
505                   mem_end - data_start)
506
507def UpdateFile(infile, outfile, start_sym, end_sym, insert):
508    tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
509                (outfile, len(insert), len(insert), start_sym, end_sym))
510    syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
511    if len(syms) != 2:
512        raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
513                         (start_sym, end_sym, len(syms),
514                          ','.join(syms.keys())))
515
516    size = syms[end_sym].offset - syms[start_sym].offset
517    if len(insert) > size:
518        raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
519                         (infile, len(insert), len(insert), size, size))
520
521    data = tools.read_file(infile)
522    newdata = data[:syms[start_sym].offset]
523    newdata += insert + tools.get_bytes(0, size - len(insert))
524    newdata += data[syms[end_sym].offset:]
525    tools.write_file(outfile, newdata)
526    tout.info('Written to offset %#x' % syms[start_sym].offset)
527
528def read_loadable_segments(data):
529    """Read segments from an ELF file
530
531    Args:
532        data (bytes): Contents of file
533
534    Returns:
535        tuple:
536            list of segments, each:
537                int: Segment number (0 = first)
538                int: Start address of segment in memory
539                bytes: Contents of segment
540            int: entry address for image
541
542    Raises:
543        ValueError: elftools is not available
544    """
545    if not ELF_TOOLS:
546        raise ValueError("Python: No module named 'elftools'")
547    with io.BytesIO(data) as inf:
548        try:
549            elf = ELFFile(inf)
550        except ELFError as err:
551            raise ValueError(err)
552        entry = elf.header['e_entry']
553        segments = []
554        for i in range(elf.num_segments()):
555            segment = elf.get_segment(i)
556            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
557                skipped = 1  # To make code-coverage see this line
558                continue
559            start = segment['p_offset']
560            rend = start + segment['p_filesz']
561            segments.append((i, segment['p_paddr'], data[start:rend]))
562    return segments, entry
563
564def is_valid(data):
565    """Check if some binary data is a valid ELF file
566
567    Args:
568        data (bytes): Bytes to check
569
570    Returns:
571        bool: True if a valid Elf file, False if not
572    """
573    try:
574        DecodeElf(data, 0)
575        return True
576    except ELFError:
577        return False
578