1#!/usr/bin/env python3 2 3import re, json 4 5class TagDatabaseError(Exception): 6 pass 7 8# This is the dictionary for the rules that translates to proprietary comments: 9# - cppcheck: /* cppcheck-suppress[id] */ 10# - coverity: /* coverity[id] */ 11# - eclair: /* -E> hide id 1 "" */ 12# Add entries to support more analyzers 13tool_syntax = { 14 "cppcheck":"cppcheck-suppress[VID]", 15 "coverity":"coverity[VID]", 16 "eclair":"-E> hide VID 1 \"\"" 17} 18 19def get_xen_tag_regex(tool): 20 return rf'(?P<tag>SAF-(?P<id>\d+)-(?P<type>safe|false-positive-{tool}))' 21 22 23def get_xen_tag_index_type_regex(tool): 24 return rf'^{get_xen_tag_regex(tool)}$' 25 26 27def get_xen_tag_comment_regex(tool): 28 before_comment = r'(?P<before>.*)' 29 comment = rf'(?P<comment>/\* +{get_xen_tag_regex(tool)}.*\*/)' 30 return rf'^(?P<full_line>{before_comment}{comment})$' 31 32 33# Returns a data structure containing dictionaries for safe and false-positive-* 34# Xen tags, the key is the unique index of the tag and the content is the 35# proprietary in-code comment to be used when the tag is found in the codebase 36def load_tag_database(tool, input_files, data_struct = None, schema = "safe"): 37 ret = data_struct if data_struct is not None else { 38 "safe": {}, 39 "false-positive-" + tool: {} 40 } 41 database = [] 42 43 # Open all input files 44 for file in input_files: 45 try: 46 with open(file, "rt") as handle: 47 content = json.load(handle) 48 database = database + content['content'] 49 except json.JSONDecodeError as e: 50 raise TagDatabaseError("JSON decoding error in file {}: {}" 51 .format(file, e)) 52 except Exception as e: 53 raise TagDatabaseError("Can't open file {}: {}" 54 .format(file, e)) 55 56 for entry in database: 57 # If the false-positive schema is used, check the proprietary id in the 58 # 'violation-id' field, otherwise rely on the "safe" schema. 59 if schema == "false-positive": 60 proprietary_id = entry['violation-id'] 61 elif tool in entry['analyser']: 62 proprietary_id = entry['analyser'][tool] 63 else: 64 proprietary_id = "" 65 if proprietary_id != "": 66 comment=tool_syntax[tool].replace("VID",proprietary_id) 67 # Regex to capture the index of the Xen tag and the schema 68 xen_tag = re.match(get_xen_tag_index_type_regex(tool), entry["id"]) 69 if xen_tag and xen_tag.group('id') and xen_tag.group('type'): 70 # Save in safe or false-positive-* the key {#id: "comment"} 71 id_number = int(xen_tag.group('id')) 72 key = xen_tag.group('type') 73 ret[key][id_number] = "/* {} */".format(comment) 74 else: 75 raise TagDatabaseError( 76 "Error in database file, entry {} has unexpected " 77 "format.".format(entry["id"]) 78 ) 79 80 return ret 81 82 83def substitute_tags(tool, input_file, grep_struct, subs_rules): 84 try: 85 with open(grep_struct["file"], "wt") as outfile: 86 87 try: 88 with open(input_file, "rt") as infile: 89 parsed_content = infile.readlines() 90 except Exception as e: 91 raise TagDatabaseError("Issue with reading file {}: {}" 92 .format(input_file, e)) 93 94 # grep_struct contains the line number where the comments are, the 95 # line number starts from 1 but in the array the first line is zero. 96 # For every line where there is a Xen tag comment, get the Xen tag 97 # that is in the capture group zero, extract from the Xen tag the 98 # unique index and the type (safe, false-positive-*) and with those 99 # information access the subs_rules dictionary to see if there is 100 # a match 101 for line_number in grep_struct["matches"]: 102 xen_tag = grep_struct["matches"][line_number]['tag'] 103 xen_tag_regex_obj = re.match(get_xen_tag_index_type_regex(tool), 104 xen_tag) 105 id_number = int(xen_tag_regex_obj.group('id')) 106 key = xen_tag_regex_obj.group('type') 107 if id_number in subs_rules[key]: 108 comment_in = grep_struct["matches"][line_number]['comment'] 109 before = grep_struct["matches"][line_number]['before'] 110 comment_out = subs_rules[key][id_number] 111 if before != '' and not re.match(r'^[ \t]+$', before): 112 # The comment is at the end of some line with some code 113 if tool == "eclair": 114 # Eclair supports comment at the end of the line, so 115 # the only thing to do is use the right syntax in 116 # the comment, the default version of it is 117 # deviating the current line and the next one 118 comment_out = re.sub(r'\d+ ""', '0 ""', comment_out) 119 else: 120 # Other tool does not support deviating the same 121 # line of the comment, so we use a trick and we use 122 # the comment at the end of the previous line 123 if line_number-2 < 0: 124 raise TagDatabaseError( 125 "The comment {} using the tool '{}' can't " 126 "stay at the end of the line 1." 127 .format(comment_in, tool) 128 ) 129 parsed_content[line_number-2] = \ 130 parsed_content[line_number-2].replace("\n", 131 comment_out + '\n') 132 comment_out = '' 133 parsed_content[line_number-1] = re.sub( 134 re.escape(comment_in), comment_out, 135 parsed_content[line_number-1]) 136 137 outfile.writelines(parsed_content) 138 except Exception as e: 139 raise TagDatabaseError("Issue with writing file {}: {}" 140 .format(grep_struct["file"], e)) 141