1#!/usr/bin/env python3
2#
3# Copyright (c) 2021, 2024 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7# Parsers are gonig to have very similar code.
8# So tell pylint not to care.
9# pylint: disable=duplicate-code
10
11"""
12Dictionary-based Logging Parser Version 3
13
14This contains the implementation of the parser for
15version 3 databases.
16"""
17
18import logging
19import struct
20
21import colorama
22from colorama import Fore
23
24from .data_types import DataTypes
25from .log_parser import LogParser, formalize_fmt_string, get_log_level_str_color
26
27HEX_BYTES_IN_LINE = 16
28
29# Need to keep sync with struct log_dict_output_msg_hdr in
30# include/logging/log_output_dict.h.
31#
32# struct log_dict_output_normal_msg_hdr_t {
33#     uint8_t type;
34#     uint32_t domain:4;
35#     uint32_t level:4;
36#     uint32_t package_len:16;
37#     uint32_t data_len:16;
38#     uintptr_t source;
39#     log_timestamp_t timestamp;
40# } __packed;
41#
42# Note "type" and "timestamp" are encoded separately below.
43FMT_MSG_HDR_32 = "BHHI"
44FMT_MSG_HDR_64 = "BHHQ"
45
46# Message type
47# 0: normal message
48# 1: number of dropped messages
49FMT_MSG_TYPE = "B"
50
51# Depends on CONFIG_LOG_TIMESTAMP_64BIT
52FMT_MSG_TIMESTAMP_32 = "I"
53FMT_MSG_TIMESTAMP_64 = "Q"
54
55# Keep message types in sync with include/logging/log_output_dict.h
56MSG_TYPE_NORMAL = 0
57MSG_TYPE_DROPPED = 1
58
59# Number of dropped messages
60FMT_DROPPED_CNT = "H"
61
62
63logger = logging.getLogger("parser")
64
65
66class LogParserV3(LogParser):
67    """Log Parser V1"""
68    def __init__(self, database):
69        super().__init__(database=database)
70
71        if self.database.is_tgt_little_endian():
72            endian = "<"
73            self.is_big_endian = False
74        else:
75            endian = ">"
76            self.is_big_endian = True
77
78        self.fmt_msg_type = endian + FMT_MSG_TYPE
79        self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
80
81        if self.database.is_tgt_64bit():
82            self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
83        else:
84            self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
85
86        if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
87            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
88        else:
89            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
90
91
92    def __get_string(self, arg, arg_offset, string_tbl):
93        one_str = self.database.find_string(arg)
94        if one_str is not None:
95            ret = one_str
96        else:
97            # The index from the string table is basically
98            # the order in va_list. Need to add to the index
99            # to skip the packaged string header and
100            # the format string.
101            str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
102            str_idx /= self.data_types.get_sizeof(DataTypes.INT)
103
104            ret = string_tbl.get(int(str_idx), f"<string@0x{arg:x}>")
105
106        return ret
107
108
109    def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
110        """Parse the format string to extract arguments from
111        the binary arglist and return a tuple usable with
112        Python's string formatting"""
113        idx = 0
114        arg_offset = 0
115        arg_data_type = None
116        is_parsing = False
117        do_extract = False
118
119        args = []
120
121        # Translated from cbvprintf_package()
122        for idx, fmt in enumerate(fmt_str):
123            if not is_parsing:
124                if fmt == '%':
125                    is_parsing = True
126                    arg_data_type = DataTypes.INT
127                    continue
128
129            elif fmt == '%':
130                # '%%' -> literal percentage sign
131                is_parsing = False
132                continue
133
134            elif fmt == '*':
135                pass
136
137            elif fmt.isdecimal() or str.lower(fmt) == 'l' \
138                or fmt in (' ', '#', '-', '+', '.', 'h'):
139                # formatting modifiers, just ignore
140                continue
141
142            elif fmt in ('j', 'z', 't'):
143                # intmax_t, size_t or ptrdiff_t
144                arg_data_type = DataTypes.LONG
145
146            elif fmt in ('c', 'd', 'i', 'o', 'u', 'x', 'X'):
147                unsigned = fmt in ('c', 'o', 'u', 'x', 'X')
148
149                if fmt_str[idx - 1] == 'l':
150                    if fmt_str[idx - 2] == 'l':
151                        arg_data_type = DataTypes.ULONG_LONG if unsigned else DataTypes.LONG_LONG
152                    else:
153                        arg_data_type = DataTypes.ULONG if unsigned else DataTypes.LONG
154                else:
155                    arg_data_type = DataTypes.UINT if unsigned else DataTypes.INT
156
157                is_parsing = False
158                do_extract = True
159
160            elif fmt in ('s', 'p', 'n'):
161                arg_data_type = DataTypes.PTR
162
163                is_parsing = False
164                do_extract = True
165
166            elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
167                # Python doesn't do"long double".
168                #
169                # Parse it as double (probably incorrect), but
170                # still have to skip enough bytes.
171                if fmt_str[idx - 1] == 'L':
172                    arg_data_type = DataTypes.LONG_DOUBLE
173                else:
174                    arg_data_type = DataTypes.DOUBLE
175
176                is_parsing = False
177                do_extract = True
178
179            else:
180                is_parsing = False
181                continue
182
183            if do_extract:
184                do_extract = False
185
186                align = self.data_types.get_alignment(arg_data_type)
187                size = self.data_types.get_sizeof(arg_data_type)
188                unpack_fmt = self.data_types.get_formatter(arg_data_type)
189
190                # Align the argument list by rounding up
191                stack_align = self.data_types.get_stack_alignment(arg_data_type)
192                if stack_align > 1:
193                    arg_offset = int((arg_offset + (align - 1)) / align) * align
194
195                one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
196
197                if fmt == 's':
198                    one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
199
200                args.append(one_arg)
201                arg_offset += size
202
203                # Align the offset
204                if stack_align > 1:
205                    arg_offset = int((arg_offset + align - 1) / align) * align
206
207        return tuple(args)
208
209
210    @staticmethod
211    def extract_string_table(str_tbl):
212        """Extract string table in a packaged log message"""
213        tbl = {}
214
215        one_str = ""
216        next_new_string = True
217        # Translated from cbvprintf_package()
218        for one_ch in str_tbl:
219            if next_new_string:
220                str_idx = one_ch
221                next_new_string = False
222                continue
223
224            if one_ch == 0:
225                tbl[str_idx] = one_str
226                one_str = ""
227                next_new_string = True
228                continue
229
230            one_str += chr(one_ch)
231
232        return tbl
233
234
235    @staticmethod
236    def print_hexdump(hex_data, prefix_len, color):
237        """Print hex dump"""
238        hex_vals = ""
239        chr_vals = ""
240        chr_done = 0
241
242        for one_hex in hex_data:
243            hex_vals += f'{one_hex:02x} '
244            chr_vals += chr(one_hex)
245            chr_done += 1
246
247            if chr_done == HEX_BYTES_IN_LINE / 2:
248                hex_vals += " "
249                chr_vals += " "
250
251            elif chr_done == HEX_BYTES_IN_LINE:
252                print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
253                      hex_vals, chr_vals))
254                hex_vals = ""
255                chr_vals = ""
256                chr_done = 0
257
258        if len(chr_vals) > 0:
259            hex_padding = "   " * (HEX_BYTES_IN_LINE - chr_done)
260            print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
261                  hex_vals, hex_padding, chr_vals))
262
263    def get_full_msg_hdr_size(self):
264        """Get the size of the full message header"""
265        return struct.calcsize(self.fmt_msg_type) + \
266            struct.calcsize(self.fmt_msg_hdr) + \
267            struct.calcsize(self.fmt_msg_timestamp)
268
269    def get_normal_msg_size(self, logdata, offset):
270        """Get the needed size of the normal log message at offset"""
271        _, pkg_len, data_len, _ = struct.unpack_from(
272            self.fmt_msg_hdr,
273            logdata,
274            offset + struct.calcsize(self.fmt_msg_type),
275        )
276        return self.get_full_msg_hdr_size() + pkg_len + data_len
277
278    def parse_one_normal_msg(self, logdata, offset):
279        """Parse one normal log message and print the encoded message"""
280        # Parse log message header
281        domain_lvl, pkg_len, data_len, source_id = struct.unpack_from(self.fmt_msg_hdr,
282                                                                      logdata, offset)
283        offset += struct.calcsize(self.fmt_msg_hdr)
284
285        timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
286        offset += struct.calcsize(self.fmt_msg_timestamp)
287
288        # domain_id, level
289        if self.is_big_endian:
290            level = domain_lvl & 0x0F
291            domain_id = (domain_lvl >> 4) & 0x0F
292        else:
293            domain_id = domain_lvl & 0x0F
294            level = (domain_lvl >> 4) & 0x0F
295
296        level_str, color = get_log_level_str_color(level)
297        source_id_str = self.database.get_log_source_string(domain_id, source_id)
298
299        # Skip over data to point to next message (save as return value)
300        next_msg_offset = offset + pkg_len + data_len
301
302        # Offset from beginning of cbprintf_packaged data to end of va_list arguments
303        offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
304        offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
305        offset_end_of_args += offset
306
307        # Extra data after packaged log
308        extra_data = logdata[(offset + pkg_len):next_msg_offset]
309
310        # Number of appended strings in package
311        num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0]
312
313        # Number of read-only string indexes
314        num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0]
315        offset_end_of_args += num_ro_str_indexes
316
317        # Number of read-write string indexes
318        num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0]
319        offset_end_of_args += num_rw_str_indexes
320
321        # Extract the string table in the packaged log message
322        string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)])
323
324        if len(string_tbl) != num_packed_strings:
325            logger.error("------ Error extracting string table")
326            return None
327
328        # Skip packaged string header
329        offset += self.data_types.get_sizeof(DataTypes.PTR)
330
331        # Grab the format string
332        #
333        # Note the negative offset to __get_string(). It is because
334        # the offset begins at 0 for va_list. However, the format string
335        # itself is before the va_list, so need to go back the width of
336        # a pointer.
337        fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR),
338                                         logdata, offset)[0]
339        fmt_str = self.__get_string(fmt_str_ptr,
340                                    -self.data_types.get_sizeof(DataTypes.PTR),
341                                    string_tbl)
342        offset += self.data_types.get_sizeof(DataTypes.PTR)
343
344        if not fmt_str:
345            logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
346            return None
347
348        args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
349
350        fmt_str = formalize_fmt_string(fmt_str)
351        log_msg = fmt_str % args
352
353        if level == 0:
354            print(f"{log_msg}", end='')
355            log_prefix = ""
356        else:
357            log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
358            print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg))
359
360        if data_len > 0:
361            # Has hexdump data
362            self.print_hexdump(extra_data, len(log_prefix), color)
363
364        # Point to next message
365        return next_msg_offset
366
367    def parse_one_msg(self, logdata, offset):
368        if offset + struct.calcsize(self.fmt_msg_type) > len(logdata):
369            return False, offset
370
371        # Get message type
372        msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
373
374        if msg_type == MSG_TYPE_DROPPED:
375
376            if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata):
377                return False, offset
378
379            offset += struct.calcsize(self.fmt_msg_type)
380
381            num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)
382            offset += struct.calcsize(self.fmt_dropped_cnt)
383
384            print(f"--- {num_dropped} messages dropped ---")
385
386        elif msg_type == MSG_TYPE_NORMAL:
387
388            if ((offset + self.get_full_msg_hdr_size() > len(logdata)) or
389                (offset + self.get_normal_msg_size(logdata, offset) > len(logdata))):
390                return False, offset
391
392            offset += struct.calcsize(self.fmt_msg_type)
393
394            ret = self.parse_one_normal_msg(logdata, offset)
395            if ret is None:
396                raise ValueError("Error parsing normal log message")
397
398            offset = ret
399
400        else:
401            logger.error("------ Unknown message type: %s", msg_type)
402            raise ValueError(f"Unknown message type: {msg_type}")
403
404        return True, offset
405
406    def parse_log_data(self, logdata, debug=False):
407        """Parse binary log data and print the encoded log messages"""
408        offset = 0
409        still_parsing = True
410
411        while offset < len(logdata) and still_parsing:
412            still_parsing, offset = self.parse_one_msg(logdata, offset)
413
414        return offset
415
416
417colorama.init()
418