1#!/usr/bin/env python3 2# 3# Copyright (c) 2021, 2024 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7# Parsers are gonig to have very similar code. 8# So tell pylint not to care. 9# pylint: disable=duplicate-code 10 11""" 12Dictionary-based Logging Parser Version 3 13 14This contains the implementation of the parser for 15version 3 databases. 16""" 17 18import logging 19import struct 20 21import colorama 22from colorama import Fore 23 24from .data_types import DataTypes 25from .log_parser import LogParser, formalize_fmt_string, get_log_level_str_color 26 27HEX_BYTES_IN_LINE = 16 28 29# Need to keep sync with struct log_dict_output_msg_hdr in 30# include/logging/log_output_dict.h. 31# 32# struct log_dict_output_normal_msg_hdr_t { 33# uint8_t type; 34# uint32_t domain:4; 35# uint32_t level:4; 36# uint32_t package_len:16; 37# uint32_t data_len:16; 38# uintptr_t source; 39# log_timestamp_t timestamp; 40# } __packed; 41# 42# Note "type" and "timestamp" are encoded separately below. 43FMT_MSG_HDR_32 = "BHHI" 44FMT_MSG_HDR_64 = "BHHQ" 45 46# Message type 47# 0: normal message 48# 1: number of dropped messages 49FMT_MSG_TYPE = "B" 50 51# Depends on CONFIG_LOG_TIMESTAMP_64BIT 52FMT_MSG_TIMESTAMP_32 = "I" 53FMT_MSG_TIMESTAMP_64 = "Q" 54 55# Keep message types in sync with include/logging/log_output_dict.h 56MSG_TYPE_NORMAL = 0 57MSG_TYPE_DROPPED = 1 58 59# Number of dropped messages 60FMT_DROPPED_CNT = "H" 61 62 63logger = logging.getLogger("parser") 64 65 66class LogParserV3(LogParser): 67 """Log Parser V1""" 68 def __init__(self, database): 69 super().__init__(database=database) 70 71 if self.database.is_tgt_little_endian(): 72 endian = "<" 73 self.is_big_endian = False 74 else: 75 endian = ">" 76 self.is_big_endian = True 77 78 self.fmt_msg_type = endian + FMT_MSG_TYPE 79 self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT 80 81 if self.database.is_tgt_64bit(): 82 self.fmt_msg_hdr = endian + FMT_MSG_HDR_64 83 else: 84 self.fmt_msg_hdr = endian + FMT_MSG_HDR_32 85 86 if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs(): 87 self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64 88 else: 89 self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32 90 91 92 def __get_string(self, arg, arg_offset, string_tbl): 93 one_str = self.database.find_string(arg) 94 if one_str is not None: 95 ret = one_str 96 else: 97 # The index from the string table is basically 98 # the order in va_list. Need to add to the index 99 # to skip the packaged string header and 100 # the format string. 101 str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2 102 str_idx /= self.data_types.get_sizeof(DataTypes.INT) 103 104 ret = string_tbl.get(int(str_idx), f"<string@0x{arg:x}>") 105 106 return ret 107 108 109 def process_one_fmt_str(self, fmt_str, arg_list, string_tbl): 110 """Parse the format string to extract arguments from 111 the binary arglist and return a tuple usable with 112 Python's string formatting""" 113 idx = 0 114 arg_offset = 0 115 arg_data_type = None 116 is_parsing = False 117 do_extract = False 118 119 args = [] 120 121 # Translated from cbvprintf_package() 122 for idx, fmt in enumerate(fmt_str): 123 if not is_parsing: 124 if fmt == '%': 125 is_parsing = True 126 arg_data_type = DataTypes.INT 127 continue 128 129 elif fmt == '%': 130 # '%%' -> literal percentage sign 131 is_parsing = False 132 continue 133 134 elif fmt == '*': 135 pass 136 137 elif fmt.isdecimal() or str.lower(fmt) == 'l' \ 138 or fmt in (' ', '#', '-', '+', '.', 'h'): 139 # formatting modifiers, just ignore 140 continue 141 142 elif fmt in ('j', 'z', 't'): 143 # intmax_t, size_t or ptrdiff_t 144 arg_data_type = DataTypes.LONG 145 146 elif fmt in ('c', 'd', 'i', 'o', 'u', 'x', 'X'): 147 unsigned = fmt in ('c', 'o', 'u', 'x', 'X') 148 149 if fmt_str[idx - 1] == 'l': 150 if fmt_str[idx - 2] == 'l': 151 arg_data_type = DataTypes.ULONG_LONG if unsigned else DataTypes.LONG_LONG 152 else: 153 arg_data_type = DataTypes.ULONG if unsigned else DataTypes.LONG 154 else: 155 arg_data_type = DataTypes.UINT if unsigned else DataTypes.INT 156 157 is_parsing = False 158 do_extract = True 159 160 elif fmt in ('s', 'p', 'n'): 161 arg_data_type = DataTypes.PTR 162 163 is_parsing = False 164 do_extract = True 165 166 elif str.lower(fmt) in ('a', 'e', 'f', 'g'): 167 # Python doesn't do"long double". 168 # 169 # Parse it as double (probably incorrect), but 170 # still have to skip enough bytes. 171 if fmt_str[idx - 1] == 'L': 172 arg_data_type = DataTypes.LONG_DOUBLE 173 else: 174 arg_data_type = DataTypes.DOUBLE 175 176 is_parsing = False 177 do_extract = True 178 179 else: 180 is_parsing = False 181 continue 182 183 if do_extract: 184 do_extract = False 185 186 align = self.data_types.get_alignment(arg_data_type) 187 size = self.data_types.get_sizeof(arg_data_type) 188 unpack_fmt = self.data_types.get_formatter(arg_data_type) 189 190 # Align the argument list by rounding up 191 stack_align = self.data_types.get_stack_alignment(arg_data_type) 192 if stack_align > 1: 193 arg_offset = int((arg_offset + (align - 1)) / align) * align 194 195 one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0] 196 197 if fmt == 's': 198 one_arg = self.__get_string(one_arg, arg_offset, string_tbl) 199 200 args.append(one_arg) 201 arg_offset += size 202 203 # Align the offset 204 if stack_align > 1: 205 arg_offset = int((arg_offset + align - 1) / align) * align 206 207 return tuple(args) 208 209 210 @staticmethod 211 def extract_string_table(str_tbl): 212 """Extract string table in a packaged log message""" 213 tbl = {} 214 215 one_str = "" 216 next_new_string = True 217 # Translated from cbvprintf_package() 218 for one_ch in str_tbl: 219 if next_new_string: 220 str_idx = one_ch 221 next_new_string = False 222 continue 223 224 if one_ch == 0: 225 tbl[str_idx] = one_str 226 one_str = "" 227 next_new_string = True 228 continue 229 230 one_str += chr(one_ch) 231 232 return tbl 233 234 235 @staticmethod 236 def print_hexdump(hex_data, prefix_len, color): 237 """Print hex dump""" 238 hex_vals = "" 239 chr_vals = "" 240 chr_done = 0 241 242 for one_hex in hex_data: 243 hex_vals += f'{one_hex:02x} ' 244 chr_vals += chr(one_hex) 245 chr_done += 1 246 247 if chr_done == HEX_BYTES_IN_LINE / 2: 248 hex_vals += " " 249 chr_vals += " " 250 251 elif chr_done == HEX_BYTES_IN_LINE: 252 print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len), 253 hex_vals, chr_vals)) 254 hex_vals = "" 255 chr_vals = "" 256 chr_done = 0 257 258 if len(chr_vals) > 0: 259 hex_padding = " " * (HEX_BYTES_IN_LINE - chr_done) 260 print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len), 261 hex_vals, hex_padding, chr_vals)) 262 263 def get_full_msg_hdr_size(self): 264 """Get the size of the full message header""" 265 return struct.calcsize(self.fmt_msg_type) + \ 266 struct.calcsize(self.fmt_msg_hdr) + \ 267 struct.calcsize(self.fmt_msg_timestamp) 268 269 def get_normal_msg_size(self, logdata, offset): 270 """Get the needed size of the normal log message at offset""" 271 _, pkg_len, data_len, _ = struct.unpack_from( 272 self.fmt_msg_hdr, 273 logdata, 274 offset + struct.calcsize(self.fmt_msg_type), 275 ) 276 return self.get_full_msg_hdr_size() + pkg_len + data_len 277 278 def parse_one_normal_msg(self, logdata, offset): 279 """Parse one normal log message and print the encoded message""" 280 # Parse log message header 281 domain_lvl, pkg_len, data_len, source_id = struct.unpack_from(self.fmt_msg_hdr, 282 logdata, offset) 283 offset += struct.calcsize(self.fmt_msg_hdr) 284 285 timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0] 286 offset += struct.calcsize(self.fmt_msg_timestamp) 287 288 # domain_id, level 289 if self.is_big_endian: 290 level = domain_lvl & 0x0F 291 domain_id = (domain_lvl >> 4) & 0x0F 292 else: 293 domain_id = domain_lvl & 0x0F 294 level = (domain_lvl >> 4) & 0x0F 295 296 level_str, color = get_log_level_str_color(level) 297 source_id_str = self.database.get_log_source_string(domain_id, source_id) 298 299 # Skip over data to point to next message (save as return value) 300 next_msg_offset = offset + pkg_len + data_len 301 302 # Offset from beginning of cbprintf_packaged data to end of va_list arguments 303 offset_end_of_args = struct.unpack_from("B", logdata, offset)[0] 304 offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT) 305 offset_end_of_args += offset 306 307 # Extra data after packaged log 308 extra_data = logdata[(offset + pkg_len):next_msg_offset] 309 310 # Number of appended strings in package 311 num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0] 312 313 # Number of read-only string indexes 314 num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0] 315 offset_end_of_args += num_ro_str_indexes 316 317 # Number of read-write string indexes 318 num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0] 319 offset_end_of_args += num_rw_str_indexes 320 321 # Extract the string table in the packaged log message 322 string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)]) 323 324 if len(string_tbl) != num_packed_strings: 325 logger.error("------ Error extracting string table") 326 return None 327 328 # Skip packaged string header 329 offset += self.data_types.get_sizeof(DataTypes.PTR) 330 331 # Grab the format string 332 # 333 # Note the negative offset to __get_string(). It is because 334 # the offset begins at 0 for va_list. However, the format string 335 # itself is before the va_list, so need to go back the width of 336 # a pointer. 337 fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR), 338 logdata, offset)[0] 339 fmt_str = self.__get_string(fmt_str_ptr, 340 -self.data_types.get_sizeof(DataTypes.PTR), 341 string_tbl) 342 offset += self.data_types.get_sizeof(DataTypes.PTR) 343 344 if not fmt_str: 345 logger.error("------ Error getting format string at 0x%x", fmt_str_ptr) 346 return None 347 348 args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl) 349 350 fmt_str = formalize_fmt_string(fmt_str) 351 log_msg = fmt_str % args 352 353 if level == 0: 354 print(f"{log_msg}", end='') 355 log_prefix = "" 356 else: 357 log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: " 358 print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg)) 359 360 if data_len > 0: 361 # Has hexdump data 362 self.print_hexdump(extra_data, len(log_prefix), color) 363 364 # Point to next message 365 return next_msg_offset 366 367 def parse_one_msg(self, logdata, offset): 368 if offset + struct.calcsize(self.fmt_msg_type) > len(logdata): 369 return False, offset 370 371 # Get message type 372 msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] 373 374 if msg_type == MSG_TYPE_DROPPED: 375 376 if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata): 377 return False, offset 378 379 offset += struct.calcsize(self.fmt_msg_type) 380 381 num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) 382 offset += struct.calcsize(self.fmt_dropped_cnt) 383 384 print(f"--- {num_dropped} messages dropped ---") 385 386 elif msg_type == MSG_TYPE_NORMAL: 387 388 if ((offset + self.get_full_msg_hdr_size() > len(logdata)) or 389 (offset + self.get_normal_msg_size(logdata, offset) > len(logdata))): 390 return False, offset 391 392 offset += struct.calcsize(self.fmt_msg_type) 393 394 ret = self.parse_one_normal_msg(logdata, offset) 395 if ret is None: 396 raise ValueError("Error parsing normal log message") 397 398 offset = ret 399 400 else: 401 logger.error("------ Unknown message type: %s", msg_type) 402 raise ValueError(f"Unknown message type: {msg_type}") 403 404 return True, offset 405 406 def parse_log_data(self, logdata, debug=False): 407 """Parse binary log data and print the encoded log messages""" 408 offset = 0 409 still_parsing = True 410 411 while offset < len(logdata) and still_parsing: 412 still_parsing, offset = self.parse_one_msg(logdata, offset) 413 414 return offset 415 416 417colorama.init() 418