1#!/usr/bin/env python3
2#
3# Copyright (c) 2021 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Parser Version 1
9
10This contains the implementation of the parser for
11version 1 databases.
12"""
13
14import logging
15import math
16import struct
17
18import colorama
19from colorama import Fore
20
21from .data_types import DataTypes
22from .log_parser import LogParser, formalize_fmt_string, get_log_level_str_color
23
24HEX_BYTES_IN_LINE = 16
25
26# Need to keep sync with struct log_dict_output_msg_hdr in
27# include/logging/log_output_dict.h.
28#
29# struct log_dict_output_normal_msg_hdr_t {
30#     uint8_t type;
31#     uint32_t domain:3;
32#     uint32_t level:3;
33#     uint32_t package_len:10;
34#     uint32_t data_len:12;
35#     uintptr_t source;
36#     log_timestamp_t timestamp;
37# } __packed;
38#
39# Note "type" and "timestamp" are encoded separately below.
40FMT_MSG_HDR_32 = "II"
41FMT_MSG_HDR_64 = "IQ"
42
43# Message type
44# 0: normal message
45# 1: number of dropped messages
46FMT_MSG_TYPE = "B"
47
48# Depends on CONFIG_LOG_TIMESTAMP_64BIT
49FMT_MSG_TIMESTAMP_32 = "I"
50FMT_MSG_TIMESTAMP_64 = "Q"
51
52# Keep message types in sync with include/logging/log_output_dict.h
53MSG_TYPE_NORMAL = 0
54MSG_TYPE_DROPPED = 1
55
56# Number of dropped messages
57FMT_DROPPED_CNT = "H"
58
59
60logger = logging.getLogger("parser")
61
62
63class LogParserV1(LogParser):
64    """Log Parser V1"""
65    def __init__(self, database):
66        super().__init__(database=database)
67
68        if self.database.is_tgt_little_endian():
69            endian = "<"
70        else:
71            endian = ">"
72
73        self.fmt_msg_type = endian + FMT_MSG_TYPE
74        self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
75
76        if self.database.is_tgt_64bit():
77            self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
78        else:
79            self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
80
81        if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
82            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
83        else:
84            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
85
86
87    def __get_string(self, arg, arg_offset, string_tbl):
88        one_str = self.database.find_string(arg)
89        if one_str is not None:
90            ret = one_str
91        else:
92            # The index from the string table is basically
93            # the order in va_list. Need to add to the index
94            # to skip the packaged string header and
95            # the format string.
96            str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
97            str_idx /= self.data_types.get_sizeof(DataTypes.INT)
98
99            ret = string_tbl.get(int(str_idx), f"<string@0x{arg:x}>")
100
101        return ret
102
103
104    def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
105        """Parse the format string to extract arguments from
106        the binary arglist and return a tuple usable with
107        Python's string formatting"""
108        idx = 0
109        arg_offset = 0
110        arg_data_type = None
111        is_parsing = False
112        do_extract = False
113
114        args = []
115
116        # Translated from cbvprintf_package()
117        for idx, fmt in enumerate(fmt_str):
118            if not is_parsing:
119                if fmt == '%':
120                    is_parsing = True
121                    arg_data_type = DataTypes.INT
122                    continue
123
124            elif fmt == '%':
125                # '%%' -> literal percentage sign
126                is_parsing = False
127                continue
128
129            elif fmt == '*':
130                pass
131
132            elif fmt.isdecimal() or str.lower(fmt) == 'l' \
133                or fmt in (' ', '#', '-', '+', '.', 'h'):
134                # formatting modifiers, just ignore
135                continue
136
137            elif fmt in ('j', 'z', 't'):
138                # intmax_t, size_t or ptrdiff_t
139                arg_data_type = DataTypes.LONG
140
141            elif fmt in ('c', 'd', 'i', 'o', 'u', 'x', 'X'):
142                unsigned = fmt in ('c', 'o', 'u', 'x', 'X')
143
144                if fmt_str[idx - 1] == 'l':
145                    if fmt_str[idx - 2] == 'l':
146                        arg_data_type = DataTypes.ULONG_LONG if unsigned else DataTypes.LONG_LONG
147                    else:
148                        arg_data_type = DataTypes.ULONG if unsigned else DataTypes.LONG
149                else:
150                    arg_data_type = DataTypes.UINT if unsigned else DataTypes.INT
151
152                is_parsing = False
153                do_extract = True
154
155            elif fmt in ('s', 'p', 'n'):
156                arg_data_type = DataTypes.PTR
157
158                is_parsing = False
159                do_extract = True
160
161            elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
162                # Python doesn't do"long double".
163                #
164                # Parse it as double (probably incorrect), but
165                # still have to skip enough bytes.
166                if fmt_str[idx - 1] == 'L':
167                    arg_data_type = DataTypes.LONG_DOUBLE
168                else:
169                    arg_data_type = DataTypes.DOUBLE
170
171                is_parsing = False
172                do_extract = True
173
174            else:
175                is_parsing = False
176                continue
177
178            if do_extract:
179                do_extract = False
180
181                align = self.data_types.get_alignment(arg_data_type)
182                size = self.data_types.get_sizeof(arg_data_type)
183                unpack_fmt = self.data_types.get_formatter(arg_data_type)
184
185                # Align the argument list by rounding up
186                stack_align = self.data_types.get_stack_alignment(arg_data_type)
187                if stack_align > 1:
188                    arg_offset = int((arg_offset + (align - 1)) / align) * align
189
190                one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
191
192                if fmt == 's':
193                    one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
194
195                args.append(one_arg)
196                arg_offset += size
197
198                # Align the offset
199                if stack_align > 1:
200                    arg_offset = int((arg_offset + align - 1) / align) * align
201
202        return tuple(args)
203
204
205    @staticmethod
206    def extract_string_table(str_tbl):
207        """Extract string table in a packaged log message"""
208        tbl = {}
209
210        one_str = ""
211        next_new_string = True
212        # Translated from cbvprintf_package()
213        for one_ch in str_tbl:
214            if next_new_string:
215                str_idx = one_ch
216                next_new_string = False
217                continue
218
219            if one_ch == 0:
220                tbl[str_idx] = one_str
221                one_str = ""
222                next_new_string = True
223                continue
224
225            one_str += chr(one_ch)
226
227        return tbl
228
229
230    @staticmethod
231    def print_hexdump(hex_data, prefix_len, color):
232        """Print hex dump"""
233        hex_vals = ""
234        chr_vals = ""
235        chr_done = 0
236
237        for one_hex in hex_data:
238            hex_vals += f"{one_hex:x} "
239            chr_vals += chr(one_hex)
240            chr_done += 1
241
242            if chr_done == HEX_BYTES_IN_LINE / 2:
243                hex_vals += " "
244                chr_vals += " "
245
246            elif chr_done == HEX_BYTES_IN_LINE:
247                print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
248                      hex_vals, chr_vals))
249                hex_vals = ""
250                chr_vals = ""
251                chr_done = 0
252
253        if len(chr_vals) > 0:
254            hex_padding = "   " * (HEX_BYTES_IN_LINE - chr_done)
255            print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len),
256                  hex_vals, hex_padding, chr_vals))
257
258    def get_full_msg_hdr_size(self):
259        """Get the size of the full message header"""
260        return struct.calcsize(self.fmt_msg_type) + \
261            struct.calcsize(self.fmt_msg_hdr) + \
262            struct.calcsize(self.fmt_msg_timestamp)
263
264    def get_normal_msg_size(self, logdata, offset):
265        """Get the needed size of the normal log message at offset"""
266        log_desc, _ = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
267        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
268        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
269        return self.get_full_msg_hdr_size() + pkg_len + data_len
270
271    def parse_one_normal_msg(self, logdata, offset):
272        """Parse one normal log message and print the encoded message"""
273        # Parse log message header
274        log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
275        offset += struct.calcsize(self.fmt_msg_hdr)
276
277        timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
278        offset += struct.calcsize(self.fmt_msg_timestamp)
279
280        # domain_id, level, pkg_len, data_len
281        domain_id = log_desc & 0x07
282        level = (log_desc >> 3) & 0x07
283        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
284        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
285
286        level_str, color = get_log_level_str_color(level)
287        source_id_str = self.database.get_log_source_string(domain_id, source_id)
288
289        # Skip over data to point to next message (save as return value)
290        next_msg_offset = offset + pkg_len + data_len
291
292        # Offset from beginning of cbprintf_packaged data to end of va_list arguments
293        offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
294        offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
295        offset_end_of_args += offset
296
297        # Extra data after packaged log
298        extra_data = logdata[(offset + pkg_len):next_msg_offset]
299
300        # Number of appended strings in package
301        num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0]
302
303        # Number of read-only string indexes
304        num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0]
305        offset_end_of_args += num_ro_str_indexes
306
307        # Number of read-write string indexes
308        num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0]
309        offset_end_of_args += num_rw_str_indexes
310
311        # Extract the string table in the packaged log message
312        string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)])
313
314        if len(string_tbl) != num_packed_strings:
315            logger.error("------ Error extracting string table")
316            return None
317
318        # Skip packaged string header
319        offset += self.data_types.get_sizeof(DataTypes.PTR)
320
321        # Grab the format string
322        #
323        # Note the negative offset to __get_string(). It is because
324        # the offset begins at 0 for va_list. However, the format string
325        # itself is before the va_list, so need to go back the width of
326        # a pointer.
327        fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR),
328                                         logdata, offset)[0]
329        fmt_str = self.__get_string(fmt_str_ptr,
330                                    -self.data_types.get_sizeof(DataTypes.PTR),
331                                    string_tbl)
332        offset += self.data_types.get_sizeof(DataTypes.PTR)
333
334        if not fmt_str:
335            logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
336            return None
337
338        args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
339
340        fmt_str = formalize_fmt_string(fmt_str)
341        log_msg = fmt_str % args
342
343        if level == 0:
344            print(f"{log_msg}", end='')
345        else:
346            log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
347            print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg))
348
349        if data_len > 0:
350            # Has hexdump data
351            self.print_hexdump(extra_data, len(log_prefix), color)
352
353        # Point to next message
354        return next_msg_offset
355
356    def parse_one_msg(self, logdata, offset):
357        if offset + struct.calcsize(self.fmt_msg_type) > len(logdata):
358            return False, offset
359
360        # Get message type
361        msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
362
363        if msg_type == MSG_TYPE_DROPPED:
364
365            if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata):
366                return False, offset
367            offset += struct.calcsize(self.fmt_msg_type)
368
369            num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)
370            offset += struct.calcsize(self.fmt_dropped_cnt)
371
372            print(f"--- {num_dropped} messages dropped ---")
373
374        elif msg_type == MSG_TYPE_NORMAL:
375
376            if ((offset + self.get_full_msg_hdr_size() > len(logdata)) or
377                (offset + self.get_normal_msg_size(logdata, offset) > len(logdata))):
378                return False, offset
379
380            offset += struct.calcsize(self.fmt_msg_type)
381
382            ret = self.parse_one_normal_msg(logdata, offset)
383            if ret is None:
384                raise ValueError("Error parsing normal log message")
385
386            offset = ret
387
388        else:
389            logger.error("------ Unknown message type: %s", msg_type)
390            raise ValueError(f"Unknown message type: {msg_type}")
391
392        return True, offset
393
394    def parse_log_data(self, logdata, debug=False):
395        """Parse binary log data and print the encoded log messages"""
396        offset = 0
397        still_parsing = True
398
399        while offset < len(logdata) and still_parsing:
400            still_parsing, offset = self.parse_one_msg(logdata, offset)
401
402        return offset
403
404
405colorama.init()
406