1#!/usr/bin/env python3
2
3import re, json
4
5class TagDatabaseError(Exception):
6    pass
7
8# This is the dictionary for the rules that translates to proprietary comments:
9#  - cppcheck: /* cppcheck-suppress[id] */
10#  - coverity: /* coverity[id] */
11#  - eclair:   /* -E> hide id 1 "" */
12# Add entries to support more analyzers
13tool_syntax = {
14    "cppcheck":"cppcheck-suppress[VID]",
15    "coverity":"coverity[VID]",
16    "eclair":"-E> hide VID 1 \"\""
17}
18
19def get_xen_tag_regex(tool):
20    return rf'(?P<tag>SAF-(?P<id>\d+)-(?P<type>safe|false-positive-{tool}))'
21
22
23def get_xen_tag_index_type_regex(tool):
24    return rf'^{get_xen_tag_regex(tool)}$'
25
26
27def get_xen_tag_comment_regex(tool):
28    before_comment = r'(?P<before>.*)'
29    comment = rf'(?P<comment>/\* +{get_xen_tag_regex(tool)}.*\*/)'
30    return rf'^(?P<full_line>{before_comment}{comment})$'
31
32
33# Returns a data structure containing dictionaries for safe and false-positive-*
34# Xen tags, the key is the unique index of the tag and the content is the
35# proprietary in-code comment to be used when the tag is found in the codebase
36def load_tag_database(tool, input_files, data_struct = None, schema = "safe"):
37    ret = data_struct if data_struct is not None else {
38        "safe": {},
39        "false-positive-" + tool: {}
40    }
41    database = []
42
43    # Open all input files
44    for file in input_files:
45        try:
46            with open(file, "rt") as handle:
47                content = json.load(handle)
48                database = database + content['content']
49        except json.JSONDecodeError as e:
50            raise TagDatabaseError("JSON decoding error in file {}: {}"
51                                    .format(file, e))
52        except Exception as e:
53            raise TagDatabaseError("Can't open file {}: {}"
54                                    .format(file, e))
55
56    for entry in database:
57        # If the false-positive schema is used, check the proprietary id in the
58        # 'violation-id' field, otherwise rely on the "safe" schema.
59        if schema == "false-positive":
60            proprietary_id = entry['violation-id']
61        elif tool in entry['analyser']:
62            proprietary_id = entry['analyser'][tool]
63        else:
64            proprietary_id = ""
65        if proprietary_id != "":
66            comment=tool_syntax[tool].replace("VID",proprietary_id)
67            # Regex to capture the index of the Xen tag and the schema
68            xen_tag = re.match(get_xen_tag_index_type_regex(tool), entry["id"])
69            if xen_tag and xen_tag.group('id') and xen_tag.group('type'):
70                # Save in safe or false-positive-* the key {#id: "comment"}
71                id_number = int(xen_tag.group('id'))
72                key = xen_tag.group('type')
73                ret[key][id_number] = "/* {} */".format(comment)
74            else:
75                raise TagDatabaseError(
76                        "Error in database file, entry {} has unexpected "
77                        "format.".format(entry["id"])
78                    )
79
80    return ret
81
82
83def substitute_tags(tool, input_file, grep_struct, subs_rules):
84    try:
85        with open(grep_struct["file"], "wt") as outfile:
86
87            try:
88                with open(input_file, "rt") as infile:
89                    parsed_content = infile.readlines()
90            except Exception as e:
91                raise TagDatabaseError("Issue with reading file {}: {}"
92                                       .format(input_file, e))
93
94            # grep_struct contains the line number where the comments are, the
95            # line number starts from 1 but in the array the first line is zero.
96            # For every line where there is a Xen tag comment, get the Xen tag
97            # that is in the capture group zero, extract from the Xen tag the
98            # unique index and the type (safe, false-positive-*) and with those
99            # information access the subs_rules dictionary to see if there is
100            # a match
101            for line_number in grep_struct["matches"]:
102                xen_tag = grep_struct["matches"][line_number]['tag']
103                xen_tag_regex_obj = re.match(get_xen_tag_index_type_regex(tool),
104                                             xen_tag)
105                id_number = int(xen_tag_regex_obj.group('id'))
106                key = xen_tag_regex_obj.group('type')
107                if id_number in subs_rules[key]:
108                    comment_in = grep_struct["matches"][line_number]['comment']
109                    before = grep_struct["matches"][line_number]['before']
110                    comment_out = subs_rules[key][id_number]
111                    if before != '' and not re.match(r'^[ \t]+$', before):
112                        # The comment is at the end of some line with some code
113                        if tool == "eclair":
114                            # Eclair supports comment at the end of the line, so
115                            # the only thing to do is use the right syntax in
116                            # the comment, the default version of it is
117                            # deviating the current line and the next one
118                            comment_out = re.sub(r'\d+ ""', '0 ""', comment_out)
119                        else:
120                            # Other tool does not support deviating the same
121                            # line of the comment, so we use a trick and we use
122                            # the comment at the end of the previous line
123                            if line_number-2 < 0:
124                                raise TagDatabaseError(
125                                    "The comment {} using the tool '{}' can't "
126                                    "stay at the end of the line 1."
127                                    .format(comment_in, tool)
128                                )
129                            parsed_content[line_number-2] = \
130                                parsed_content[line_number-2].replace("\n",
131                                    comment_out + '\n')
132                            comment_out = ''
133                    parsed_content[line_number-1] = re.sub(
134                        re.escape(comment_in), comment_out,
135                        parsed_content[line_number-1])
136
137            outfile.writelines(parsed_content)
138    except Exception as e:
139        raise TagDatabaseError("Issue with writing file {}: {}"
140                               .format(grep_struct["file"], e))
141