1#!/usr/bin/env python3 2# 3# Copyright (c) 2021 Intel Corporation 4# 5# SPDX-License-Identifier: Apache-2.0 6 7""" 8Dictionary-based Logging Parser Version 1 9 10This contains the implementation of the parser for 11version 1 databases. 12""" 13 14import logging 15import math 16import struct 17 18import colorama 19from colorama import Fore 20 21from .data_types import DataTypes 22from .log_parser import LogParser, formalize_fmt_string, get_log_level_str_color 23 24HEX_BYTES_IN_LINE = 16 25 26# Need to keep sync with struct log_dict_output_msg_hdr in 27# include/logging/log_output_dict.h. 28# 29# struct log_dict_output_normal_msg_hdr_t { 30# uint8_t type; 31# uint32_t domain:3; 32# uint32_t level:3; 33# uint32_t package_len:10; 34# uint32_t data_len:12; 35# uintptr_t source; 36# log_timestamp_t timestamp; 37# } __packed; 38# 39# Note "type" and "timestamp" are encoded separately below. 40FMT_MSG_HDR_32 = "II" 41FMT_MSG_HDR_64 = "IQ" 42 43# Message type 44# 0: normal message 45# 1: number of dropped messages 46FMT_MSG_TYPE = "B" 47 48# Depends on CONFIG_LOG_TIMESTAMP_64BIT 49FMT_MSG_TIMESTAMP_32 = "I" 50FMT_MSG_TIMESTAMP_64 = "Q" 51 52# Keep message types in sync with include/logging/log_output_dict.h 53MSG_TYPE_NORMAL = 0 54MSG_TYPE_DROPPED = 1 55 56# Number of dropped messages 57FMT_DROPPED_CNT = "H" 58 59 60logger = logging.getLogger("parser") 61 62 63class LogParserV1(LogParser): 64 """Log Parser V1""" 65 def __init__(self, database): 66 super().__init__(database=database) 67 68 if self.database.is_tgt_little_endian(): 69 endian = "<" 70 else: 71 endian = ">" 72 73 self.fmt_msg_type = endian + FMT_MSG_TYPE 74 self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT 75 76 if self.database.is_tgt_64bit(): 77 self.fmt_msg_hdr = endian + FMT_MSG_HDR_64 78 else: 79 self.fmt_msg_hdr = endian + FMT_MSG_HDR_32 80 81 if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs(): 82 self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64 83 else: 84 self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32 85 86 87 def __get_string(self, arg, arg_offset, string_tbl): 88 one_str = self.database.find_string(arg) 89 if one_str is not None: 90 ret = one_str 91 else: 92 # The index from the string table is basically 93 # the order in va_list. Need to add to the index 94 # to skip the packaged string header and 95 # the format string. 96 str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2 97 str_idx /= self.data_types.get_sizeof(DataTypes.INT) 98 99 ret = string_tbl.get(int(str_idx), f"<string@0x{arg:x}>") 100 101 return ret 102 103 104 def process_one_fmt_str(self, fmt_str, arg_list, string_tbl): 105 """Parse the format string to extract arguments from 106 the binary arglist and return a tuple usable with 107 Python's string formatting""" 108 idx = 0 109 arg_offset = 0 110 arg_data_type = None 111 is_parsing = False 112 do_extract = False 113 114 args = [] 115 116 # Translated from cbvprintf_package() 117 for idx, fmt in enumerate(fmt_str): 118 if not is_parsing: 119 if fmt == '%': 120 is_parsing = True 121 arg_data_type = DataTypes.INT 122 continue 123 124 elif fmt == '%': 125 # '%%' -> literal percentage sign 126 is_parsing = False 127 continue 128 129 elif fmt == '*': 130 pass 131 132 elif fmt.isdecimal() or str.lower(fmt) == 'l' \ 133 or fmt in (' ', '#', '-', '+', '.', 'h'): 134 # formatting modifiers, just ignore 135 continue 136 137 elif fmt in ('j', 'z', 't'): 138 # intmax_t, size_t or ptrdiff_t 139 arg_data_type = DataTypes.LONG 140 141 elif fmt in ('c', 'd', 'i', 'o', 'u', 'x', 'X'): 142 unsigned = fmt in ('c', 'o', 'u', 'x', 'X') 143 144 if fmt_str[idx - 1] == 'l': 145 if fmt_str[idx - 2] == 'l': 146 arg_data_type = DataTypes.ULONG_LONG if unsigned else DataTypes.LONG_LONG 147 else: 148 arg_data_type = DataTypes.ULONG if unsigned else DataTypes.LONG 149 else: 150 arg_data_type = DataTypes.UINT if unsigned else DataTypes.INT 151 152 is_parsing = False 153 do_extract = True 154 155 elif fmt in ('s', 'p', 'n'): 156 arg_data_type = DataTypes.PTR 157 158 is_parsing = False 159 do_extract = True 160 161 elif str.lower(fmt) in ('a', 'e', 'f', 'g'): 162 # Python doesn't do"long double". 163 # 164 # Parse it as double (probably incorrect), but 165 # still have to skip enough bytes. 166 if fmt_str[idx - 1] == 'L': 167 arg_data_type = DataTypes.LONG_DOUBLE 168 else: 169 arg_data_type = DataTypes.DOUBLE 170 171 is_parsing = False 172 do_extract = True 173 174 else: 175 is_parsing = False 176 continue 177 178 if do_extract: 179 do_extract = False 180 181 align = self.data_types.get_alignment(arg_data_type) 182 size = self.data_types.get_sizeof(arg_data_type) 183 unpack_fmt = self.data_types.get_formatter(arg_data_type) 184 185 # Align the argument list by rounding up 186 stack_align = self.data_types.get_stack_alignment(arg_data_type) 187 if stack_align > 1: 188 arg_offset = int((arg_offset + (align - 1)) / align) * align 189 190 one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0] 191 192 if fmt == 's': 193 one_arg = self.__get_string(one_arg, arg_offset, string_tbl) 194 195 args.append(one_arg) 196 arg_offset += size 197 198 # Align the offset 199 if stack_align > 1: 200 arg_offset = int((arg_offset + align - 1) / align) * align 201 202 return tuple(args) 203 204 205 @staticmethod 206 def extract_string_table(str_tbl): 207 """Extract string table in a packaged log message""" 208 tbl = {} 209 210 one_str = "" 211 next_new_string = True 212 # Translated from cbvprintf_package() 213 for one_ch in str_tbl: 214 if next_new_string: 215 str_idx = one_ch 216 next_new_string = False 217 continue 218 219 if one_ch == 0: 220 tbl[str_idx] = one_str 221 one_str = "" 222 next_new_string = True 223 continue 224 225 one_str += chr(one_ch) 226 227 return tbl 228 229 230 @staticmethod 231 def print_hexdump(hex_data, prefix_len, color): 232 """Print hex dump""" 233 hex_vals = "" 234 chr_vals = "" 235 chr_done = 0 236 237 for one_hex in hex_data: 238 hex_vals += f"{one_hex:x} " 239 chr_vals += chr(one_hex) 240 chr_done += 1 241 242 if chr_done == HEX_BYTES_IN_LINE / 2: 243 hex_vals += " " 244 chr_vals += " " 245 246 elif chr_done == HEX_BYTES_IN_LINE: 247 print(f"{color}%s%s|%s{Fore.RESET}" % ((" " * prefix_len), 248 hex_vals, chr_vals)) 249 hex_vals = "" 250 chr_vals = "" 251 chr_done = 0 252 253 if len(chr_vals) > 0: 254 hex_padding = " " * (HEX_BYTES_IN_LINE - chr_done) 255 print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len), 256 hex_vals, hex_padding, chr_vals)) 257 258 def get_full_msg_hdr_size(self): 259 """Get the size of the full message header""" 260 return struct.calcsize(self.fmt_msg_type) + \ 261 struct.calcsize(self.fmt_msg_hdr) + \ 262 struct.calcsize(self.fmt_msg_timestamp) 263 264 def get_normal_msg_size(self, logdata, offset): 265 """Get the needed size of the normal log message at offset""" 266 log_desc, _ = struct.unpack_from(self.fmt_msg_hdr, logdata, offset) 267 pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1) 268 data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1) 269 return self.get_full_msg_hdr_size() + pkg_len + data_len 270 271 def parse_one_normal_msg(self, logdata, offset): 272 """Parse one normal log message and print the encoded message""" 273 # Parse log message header 274 log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset) 275 offset += struct.calcsize(self.fmt_msg_hdr) 276 277 timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0] 278 offset += struct.calcsize(self.fmt_msg_timestamp) 279 280 # domain_id, level, pkg_len, data_len 281 domain_id = log_desc & 0x07 282 level = (log_desc >> 3) & 0x07 283 pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1) 284 data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1) 285 286 level_str, color = get_log_level_str_color(level) 287 source_id_str = self.database.get_log_source_string(domain_id, source_id) 288 289 # Skip over data to point to next message (save as return value) 290 next_msg_offset = offset + pkg_len + data_len 291 292 # Offset from beginning of cbprintf_packaged data to end of va_list arguments 293 offset_end_of_args = struct.unpack_from("B", logdata, offset)[0] 294 offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT) 295 offset_end_of_args += offset 296 297 # Extra data after packaged log 298 extra_data = logdata[(offset + pkg_len):next_msg_offset] 299 300 # Number of appended strings in package 301 num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0] 302 303 # Number of read-only string indexes 304 num_ro_str_indexes = struct.unpack_from("B", logdata, offset+2)[0] 305 offset_end_of_args += num_ro_str_indexes 306 307 # Number of read-write string indexes 308 num_rw_str_indexes = struct.unpack_from("B", logdata, offset+3)[0] 309 offset_end_of_args += num_rw_str_indexes 310 311 # Extract the string table in the packaged log message 312 string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)]) 313 314 if len(string_tbl) != num_packed_strings: 315 logger.error("------ Error extracting string table") 316 return None 317 318 # Skip packaged string header 319 offset += self.data_types.get_sizeof(DataTypes.PTR) 320 321 # Grab the format string 322 # 323 # Note the negative offset to __get_string(). It is because 324 # the offset begins at 0 for va_list. However, the format string 325 # itself is before the va_list, so need to go back the width of 326 # a pointer. 327 fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR), 328 logdata, offset)[0] 329 fmt_str = self.__get_string(fmt_str_ptr, 330 -self.data_types.get_sizeof(DataTypes.PTR), 331 string_tbl) 332 offset += self.data_types.get_sizeof(DataTypes.PTR) 333 334 if not fmt_str: 335 logger.error("------ Error getting format string at 0x%x", fmt_str_ptr) 336 return None 337 338 args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl) 339 340 fmt_str = formalize_fmt_string(fmt_str) 341 log_msg = fmt_str % args 342 343 if level == 0: 344 print(f"{log_msg}", end='') 345 else: 346 log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: " 347 print(f"{color}%s%s{Fore.RESET}" % (log_prefix, log_msg)) 348 349 if data_len > 0: 350 # Has hexdump data 351 self.print_hexdump(extra_data, len(log_prefix), color) 352 353 # Point to next message 354 return next_msg_offset 355 356 def parse_one_msg(self, logdata, offset): 357 if offset + struct.calcsize(self.fmt_msg_type) > len(logdata): 358 return False, offset 359 360 # Get message type 361 msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] 362 363 if msg_type == MSG_TYPE_DROPPED: 364 365 if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata): 366 return False, offset 367 offset += struct.calcsize(self.fmt_msg_type) 368 369 num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) 370 offset += struct.calcsize(self.fmt_dropped_cnt) 371 372 print(f"--- {num_dropped} messages dropped ---") 373 374 elif msg_type == MSG_TYPE_NORMAL: 375 376 if ((offset + self.get_full_msg_hdr_size() > len(logdata)) or 377 (offset + self.get_normal_msg_size(logdata, offset) > len(logdata))): 378 return False, offset 379 380 offset += struct.calcsize(self.fmt_msg_type) 381 382 ret = self.parse_one_normal_msg(logdata, offset) 383 if ret is None: 384 raise ValueError("Error parsing normal log message") 385 386 offset = ret 387 388 else: 389 logger.error("------ Unknown message type: %s", msg_type) 390 raise ValueError(f"Unknown message type: {msg_type}") 391 392 return True, offset 393 394 def parse_log_data(self, logdata, debug=False): 395 """Parse binary log data and print the encoded log messages""" 396 offset = 0 397 still_parsing = True 398 399 while offset < len(logdata) and still_parsing: 400 still_parsing, offset = self.parse_one_msg(logdata, offset) 401 402 return offset 403 404 405colorama.init() 406