1# SPDX-License-Identifier: GPL-2.0+
2# Copyright (c) 2016 Google, Inc
3# Written by Simon Glass <sjg@chromium.org>
4#
5# Handle various things related to ELF images
6#
7
8from collections import namedtuple, OrderedDict
9import io
10import os
11import re
12import shutil
13import struct
14import tempfile
15
16from u_boot_pylib import command
17from u_boot_pylib import tools
18from u_boot_pylib import tout
19
20ELF_TOOLS = True
21try:
22    from elftools.elf.elffile import ELFFile
23    from elftools.elf.elffile import ELFError
24    from elftools.elf.sections import SymbolTableSection
25except:  # pragma: no cover
26    ELF_TOOLS = False
27
28# BSYM in little endian, keep in sync with include/binman_sym.h
29BINMAN_SYM_MAGIC_VALUE = 0x4d595342
30
31# Information about an EFL symbol:
32# section (str): Name of the section containing this symbol
33# address (int): Address of the symbol (its value)
34# size (int): Size of the symbol in bytes
35# weak (bool): True if the symbol is weak
36# offset (int or None): Offset of the symbol's data in the ELF file, or None if
37#   not known
38Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
39
40# Information about an ELF file:
41#    data: Extracted program contents of ELF file (this would be loaded by an
42#           ELF loader when reading this file
43#    load: Load address of code
44#    entry: Entry address of code
45#    memsize: Number of bytes in memory occupied by loading this ELF file
46ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
47
48
49def GetSymbols(fname, patterns):
50    """Get the symbols from an ELF file
51
52    Args:
53        fname: Filename of the ELF file to read
54        patterns: List of regex patterns to search for, each a string
55
56    Returns:
57        None, if the file does not exist, or Dict:
58          key: Name of symbol
59          value: Hex value of symbol
60    """
61    stdout = tools.run('objdump', '-t', fname)
62    lines = stdout.splitlines()
63    if patterns:
64        re_syms = re.compile('|'.join(patterns))
65    else:
66        re_syms = None
67    syms = {}
68    syms_started = False
69    for line in lines:
70        if not line or not syms_started:
71            if 'SYMBOL TABLE' in line:
72                syms_started = True
73            line = None  # Otherwise code coverage complains about 'continue'
74            continue
75        if re_syms and not re_syms.search(line):
76            continue
77
78        space_pos = line.find(' ')
79        value, rest = line[:space_pos], line[space_pos + 1:]
80        flags = rest[:7]
81        parts = rest[7:].split()
82        section, size =  parts[:2]
83        if len(parts) > 2:
84            name = parts[2] if parts[2] != '.hidden' else parts[3]
85            syms[name] = Symbol(section, int(value, 16), int(size, 16),
86                                flags[1] == 'w', None)
87
88    # Sort dict by address
89    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
90
91def _GetFileOffset(elf, addr):
92    """Get the file offset for an address
93
94    Args:
95        elf (ELFFile): ELF file to check
96        addr (int): Address to search for
97
98    Returns
99        int: Offset of that address in the ELF file, or None if not valid
100    """
101    for seg in elf.iter_segments():
102        seg_end = seg['p_vaddr'] + seg['p_filesz']
103        if seg.header['p_type'] == 'PT_LOAD':
104            if addr >= seg['p_vaddr'] and addr < seg_end:
105                return addr - seg['p_vaddr'] + seg['p_offset']
106
107def GetFileOffset(fname, addr):
108    """Get the file offset for an address
109
110    Args:
111        fname (str): Filename of ELF file to check
112        addr (int): Address to search for
113
114    Returns
115        int: Offset of that address in the ELF file, or None if not valid
116    """
117    if not ELF_TOOLS:
118        raise ValueError("Python: No module named 'elftools'")
119    with open(fname, 'rb') as fd:
120        elf = ELFFile(fd)
121        return _GetFileOffset(elf, addr)
122
123def GetSymbolFromAddress(fname, addr):
124    """Get the symbol at a particular address
125
126    Args:
127        fname (str): Filename of ELF file to check
128        addr (int): Address to search for
129
130    Returns:
131        str: Symbol name, or None if no symbol at that address
132    """
133    if not ELF_TOOLS:
134        raise ValueError("Python: No module named 'elftools'")
135    with open(fname, 'rb') as fd:
136        elf = ELFFile(fd)
137        syms = GetSymbols(fname, None)
138    for name, sym in syms.items():
139        if sym.address == addr:
140            return name
141
142def GetSymbolFileOffset(fname, patterns):
143    """Get the symbols from an ELF file
144
145    Args:
146        fname: Filename of the ELF file to read
147        patterns: List of regex patterns to search for, each a string
148
149    Returns:
150        None, if the file does not exist, or Dict:
151          key: Name of symbol
152          value: Hex value of symbol
153    """
154    if not ELF_TOOLS:
155        raise ValueError("Python: No module named 'elftools'")
156
157    syms = {}
158    with open(fname, 'rb') as fd:
159        elf = ELFFile(fd)
160
161        re_syms = re.compile('|'.join(patterns))
162        for section in elf.iter_sections():
163            if isinstance(section, SymbolTableSection):
164                for symbol in section.iter_symbols():
165                    if not re_syms or re_syms.search(symbol.name):
166                        addr = symbol.entry['st_value']
167                        syms[symbol.name] = Symbol(
168                            section.name, addr, symbol.entry['st_size'],
169                            symbol.entry['st_info']['bind'] == 'STB_WEAK',
170                            _GetFileOffset(elf, addr))
171
172    # Sort dict by address
173    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
174
175def GetSymbolAddress(fname, sym_name):
176    """Get a value of a symbol from an ELF file
177
178    Args:
179        fname: Filename of the ELF file to read
180        patterns: List of regex patterns to search for, each a string
181
182    Returns:
183        Symbol value (as an integer) or None if not found
184    """
185    syms = GetSymbols(fname, [sym_name])
186    sym = syms.get(sym_name)
187    if not sym:
188        return None
189    return sym.address
190
191def GetPackString(sym, msg):
192    """Get the struct.pack/unpack string to use with a given symbol
193
194    Args:
195        sym (Symbol): Symbol to check. Only the size member is checked
196        @msg (str): String which indicates the entry being processed, used for
197            errors
198
199    Returns:
200        str: struct string to use, .e.g. '<I'
201
202    Raises:
203        ValueError: Symbol has an unexpected size
204    """
205    if sym.size == 4:
206        return '<I'
207    elif sym.size == 8:
208        return '<Q'
209    else:
210        raise ValueError('%s has size %d: only 4 and 8 are supported' %
211                         (msg, sym.size))
212
213def GetSymbolOffset(elf_fname, sym_name, base_sym=None):
214    """Read the offset of a symbol compared to base symbol
215
216    This is useful for obtaining the value of a single symbol relative to the
217    base of a binary blob.
218
219    Args:
220        elf_fname: Filename of the ELF file to read
221        sym_name (str): Name of symbol to read
222        base_sym (str): Base symbol to sue to calculate the offset (or None to
223            use '__image_copy_start'
224
225    Returns:
226        int: Offset of the symbol relative to the base symbol
227    """
228    if not base_sym:
229        base_sym = '__image_copy_start'
230    fname = tools.get_input_filename(elf_fname)
231    syms = GetSymbols(fname, [base_sym, sym_name])
232    base = syms[base_sym].address
233    val = syms[sym_name].address
234    return val - base
235
236def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False,
237                          base_sym=None):
238    """Replace all symbols in an entry with their correct values
239
240    The entry contents is updated so that values for referenced symbols will be
241    visible at run time. This is done by finding out the symbols offsets in the
242    entry (using the ELF file) and replacing them with values from binman's data
243    structures.
244
245    Args:
246        elf_fname: Filename of ELF image containing the symbol information for
247            entry
248        entry: Entry to process
249        section: Section which can be used to lookup symbol values
250        base_sym: Base symbol marking the start of the image
251    """
252    if not base_sym:
253        base_sym = '__image_copy_start'
254    fname = tools.get_input_filename(elf_fname)
255    syms = GetSymbols(fname, ['image', 'binman'])
256    if is_elf:
257        if not ELF_TOOLS:
258            msg = ("Section '%s': entry '%s'" %
259                   (section.GetPath(), entry.GetPath()))
260            raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools')
261        new_syms = {}
262        with open(fname, 'rb') as fd:
263            elf = ELFFile(fd)
264            for name, sym in syms.items():
265                offset = _GetFileOffset(elf, sym.address)
266                new_syms[name] = Symbol(sym.section, sym.address, sym.size,
267                                        sym.weak, offset)
268            syms = new_syms
269
270    if not syms:
271        tout.debug('LookupAndWriteSymbols: no syms')
272        return
273    base = syms.get(base_sym)
274    if not base and not is_elf:
275        tout.debug('LookupAndWriteSymbols: no base')
276        return
277    base_addr = 0 if is_elf else base.address
278    for name, sym in syms.items():
279        if name.startswith('_binman'):
280            msg = ("Section '%s': Symbol '%s'\n   in entry '%s'" %
281                   (section.GetPath(), name, entry.GetPath()))
282            if is_elf:
283                # For ELF files, use the file offset
284                offset = sym.offset
285            else:
286                # For blobs use the offset of the symbol, calculated by
287                # subtracting the base address which by definition is at the
288                # start
289                offset = sym.address - base.address
290                if offset < 0 or offset + sym.size > entry.contents_size:
291                    raise ValueError('%s has offset %x (size %x) but the contents '
292                                     'size is %x' % (entry.GetPath(), offset,
293                                                     sym.size,
294                                                     entry.contents_size))
295            pack_string = GetPackString(sym, msg)
296            if name == '_binman_sym_magic':
297                value = BINMAN_SYM_MAGIC_VALUE
298            else:
299                # Look up the symbol in our entry tables.
300                value = section.GetImage().LookupImageSymbol(name, sym.weak,
301                                                             msg, base_addr)
302            if value is None:
303                value = -1
304                pack_string = pack_string.lower()
305            value_bytes = struct.pack(pack_string, value)
306            tout.debug('%s:\n   insert %s, offset %x, value %x, length %d' %
307                       (msg, name, offset, value, len(value_bytes)))
308            entry.data = (entry.data[:offset] + value_bytes +
309                        entry.data[offset + sym.size:])
310
311def GetSymbolValue(sym, data, msg):
312    """Get the value of a symbol
313
314    This can only be used on symbols with an integer value.
315
316    Args:
317        sym (Symbol): Symbol to check
318        data (butes): Data for the ELF file - the symbol data appears at offset
319            sym.offset
320        @msg (str): String which indicates the entry being processed, used for
321            errors
322
323    Returns:
324        int: Value of the symbol
325
326    Raises:
327        ValueError: Symbol has an unexpected size
328    """
329    pack_string = GetPackString(sym, msg)
330    value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size])
331    return value[0]
332
333def MakeElf(elf_fname, text, data):
334    """Make an elf file with the given data in a single section
335
336    The output file has a several section including '.text' and '.data',
337    containing the info provided in arguments.
338
339    Args:
340        elf_fname: Output filename
341        text: Text (code) to put in the file's .text section
342        data: Data to put in the file's .data section
343    """
344    outdir = tempfile.mkdtemp(prefix='binman.elf.')
345    s_file = os.path.join(outdir, 'elf.S')
346
347    # Spilt the text into two parts so that we can make the entry point two
348    # bytes after the start of the text section
349    text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
350    text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
351    data_bytes = ['\t.byte\t%#x' % byte for byte in data]
352    with open(s_file, 'w') as fd:
353        print('''/* Auto-generated C program to produce an ELF file for testing */
354
355.section .text
356.code32
357.globl _start
358.type _start, @function
359%s
360_start:
361%s
362.ident "comment"
363
364.comm fred,8,4
365
366.section .empty
367.globl _empty
368_empty:
369.byte 1
370
371.globl ernie
372.data
373.type ernie, @object
374.size ernie, 4
375ernie:
376%s
377''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
378        file=fd)
379    lds_file = os.path.join(outdir, 'elf.lds')
380
381    # Use a linker script to set the alignment and text address.
382    with open(lds_file, 'w') as fd:
383        print('''/* Auto-generated linker script to produce an ELF file for testing */
384
385PHDRS
386{
387    text PT_LOAD ;
388    data PT_LOAD ;
389    empty PT_LOAD FLAGS ( 6 ) ;
390    note PT_NOTE ;
391}
392
393SECTIONS
394{
395    . = 0xfef20000;
396    ENTRY(_start)
397    .text . : SUBALIGN(0)
398    {
399        *(.text)
400    } :text
401    .data : {
402        *(.data)
403    } :data
404    _bss_start = .;
405    .empty : {
406        *(.empty)
407    } :empty
408    /DISCARD/ : {
409        *(.note.gnu.property)
410    }
411    .note : {
412        *(.comment)
413    } :note
414    .bss _bss_start  (OVERLAY) : {
415        *(.bss)
416    }
417}
418''', file=fd)
419    # -static: Avoid requiring any shared libraries
420    # -nostdlib: Don't link with C library
421    # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
422    #   text section at the start
423    # -m32: Build for 32-bit x86
424    # -T...: Specifies the link script, which sets the start address
425    cc, args = tools.get_target_compile_tool('cc')
426    args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
427            lds_file, '-o', elf_fname, s_file]
428    stdout = command.output(cc, *args)
429    shutil.rmtree(outdir)
430
431def DecodeElf(data, location):
432    """Decode an ELF file and return information about it
433
434    Args:
435        data: Data from ELF file
436        location: Start address of data to return
437
438    Returns:
439        ElfInfo object containing information about the decoded ELF file
440    """
441    file_size = len(data)
442    with io.BytesIO(data) as fd:
443        elf = ELFFile(fd)
444        data_start = 0xffffffff;
445        data_end = 0;
446        mem_end = 0;
447        virt_to_phys = 0;
448
449        for i in range(elf.num_segments()):
450            segment = elf.get_segment(i)
451            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
452                skipped = 1  # To make code-coverage see this line
453                continue
454            start = segment['p_paddr']
455            mend = start + segment['p_memsz']
456            rend = start + segment['p_filesz']
457            data_start = min(data_start, start)
458            data_end = max(data_end, rend)
459            mem_end = max(mem_end, mend)
460            if not virt_to_phys:
461                virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
462
463        output = bytearray(data_end - data_start)
464        for i in range(elf.num_segments()):
465            segment = elf.get_segment(i)
466            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
467                skipped = 1  # To make code-coverage see this line
468                continue
469            start = segment['p_paddr']
470            offset = 0
471            if start < location:
472                offset = location - start
473                start = location
474            # A legal ELF file can have a program header with non-zero length
475            # but zero-length file size and a non-zero offset which, added
476            # together, are greater than input->size (i.e. the total file size).
477            #  So we need to not even test in the case that p_filesz is zero.
478            # Note: All of this code is commented out since we don't have a test
479            # case for it.
480            size = segment['p_filesz']
481            #if not size:
482                #continue
483            #end = segment['p_offset'] + segment['p_filesz']
484            #if end > file_size:
485                #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
486                                 #file_size, end)
487            output[start - data_start:start - data_start + size] = (
488                segment.data()[offset:])
489    return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
490                   mem_end - data_start)
491
492def UpdateFile(infile, outfile, start_sym, end_sym, insert):
493    tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
494                (outfile, len(insert), len(insert), start_sym, end_sym))
495    syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
496    if len(syms) != 2:
497        raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
498                         (start_sym, end_sym, len(syms),
499                          ','.join(syms.keys())))
500
501    size = syms[end_sym].offset - syms[start_sym].offset
502    if len(insert) > size:
503        raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
504                         (infile, len(insert), len(insert), size, size))
505
506    data = tools.read_file(infile)
507    newdata = data[:syms[start_sym].offset]
508    newdata += insert + tools.get_bytes(0, size - len(insert))
509    newdata += data[syms[end_sym].offset:]
510    tools.write_file(outfile, newdata)
511    tout.info('Written to offset %#x' % syms[start_sym].offset)
512
513def read_loadable_segments(data):
514    """Read segments from an ELF file
515
516    Args:
517        data (bytes): Contents of file
518
519    Returns:
520        tuple:
521            list of segments, each:
522                int: Segment number (0 = first)
523                int: Start address of segment in memory
524                bytes: Contents of segment
525            int: entry address for image
526
527    Raises:
528        ValueError: elftools is not available
529    """
530    if not ELF_TOOLS:
531        raise ValueError("Python: No module named 'elftools'")
532    with io.BytesIO(data) as inf:
533        try:
534            elf = ELFFile(inf)
535        except ELFError as err:
536            raise ValueError(err)
537        entry = elf.header['e_entry']
538        segments = []
539        for i in range(elf.num_segments()):
540            segment = elf.get_segment(i)
541            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
542                skipped = 1  # To make code-coverage see this line
543                continue
544            start = segment['p_offset']
545            rend = start + segment['p_filesz']
546            segments.append((i, segment['p_paddr'], data[start:rend]))
547    return segments, entry
548
549def is_valid(data):
550    """Check if some binary data is a valid ELF file
551
552    Args:
553        data (bytes): Bytes to check
554
555    Returns:
556        bool: True if a valid Elf file, False if not
557    """
558    try:
559        DecodeElf(data, 0)
560        return True
561    except ELFError:
562        return False
563