1#!/usr/bin/env python3
2
3import re
4import sys
5
6try:
7    from enum import Enum
8except Exception:
9    if sys.version_info[0] == 2:
10        print("Please install enum34 package when using python 2.")
11    else:
12        print("Please use python version 3.5 or above.")
13    sys.exit(1)
14
15try:
16    from typing import Tuple
17except Exception:
18    if sys.version_info[0] == 2:
19        print("Please install typing package when using python 2.")
20    else:
21        print("Please use python version 3.5 or above.")
22    sys.exit(1)
23
24
25class UnifiedFormatParseError(Exception):
26    pass
27
28
29class ParserState(Enum):
30    FIND_DIFF_HEADER = 0
31    REGISTER_CHANGES = 1
32    FIND_HUNK_OR_DIFF_HEADER = 2
33
34
35class ChangeSet(object):
36    class ChangeType(Enum):
37        REMOVE = 0
38        ADD = 1
39
40    class ChangeMode(Enum):
41        NONE = 0
42        CHANGE = 1
43        RENAME = 2
44        DELETE = 3
45        COPY = 4
46
47    def __init__(self, a_file, b_file):
48        # type: (str, str) -> None
49        self.orig_file = a_file
50        self.dst_file = b_file
51        self.change_mode = ChangeSet.ChangeMode.NONE
52        self.__changes = []
53
54    def __str__(self):
55        # type: () -> str
56        str_out = "{}: {} -> {}:\n{}\n".format(
57            str(self.change_mode), self.orig_file, self.dst_file,
58            str(self.__changes)
59        )
60        return str_out
61
62    def set_change_mode(self, change_mode):
63        # type: (ChangeMode) -> None
64        self.change_mode = change_mode
65
66    def is_change_mode(self, change_mode):
67        # type: (ChangeMode) -> bool
68        return self.change_mode == change_mode
69
70    def add_change(self, line_number, change_type):
71        # type: (int, ChangeType) -> None
72        self.__changes.append((line_number, change_type))
73
74    def get_change_set(self):
75        # type: () -> dict
76        return self.__changes
77
78
79class UnifiedFormatParser(object):
80    def __init__(self, args):
81        # type: (str | list) -> None
82        if isinstance(args, str):
83            self.__diff_file = args
84            try:
85                with open(self.__diff_file, "rt") as infile:
86                    self.__diff_lines = infile.readlines()
87            except OSError as e:
88                raise UnifiedFormatParseError(
89                    "Issue with reading file {}: {}"
90                    .format(self.__diff_file, e)
91                )
92        elif isinstance(args, list):
93            self.__diff_file = "git-diff-local.txt"
94            self.__diff_lines = args
95        else:
96            raise UnifiedFormatParseError(
97                "UnifiedFormatParser constructor called with wrong arguments")
98
99        self.__git_diff_header = re.compile(r'^diff --git a/(.*) b/(.*)$')
100        self.__git_hunk_header = \
101            re.compile(r'^@@ -\d+,(\d+) \+(\d+),(\d+) @@.*$')
102        self.__diff_set = {}
103        self.__parse()
104
105    def get_diff_path(self):
106        # type: () -> str
107        return self.__diff_file
108
109    def add_change_set(self, change_set):
110        # type: (ChangeSet) -> None
111        if not change_set.is_change_mode(ChangeSet.ChangeMode.NONE):
112            if change_set.is_change_mode(ChangeSet.ChangeMode.COPY):
113                # Add copy change mode items using the dst_file key, because
114                # there might be other changes for the orig_file in this diff
115                self.__diff_set[change_set.dst_file] = change_set
116            else:
117                self.__diff_set[change_set.orig_file] = change_set
118
119    def __parse(self):
120        # type: () -> None
121        def parse_diff_header(line):
122            # type: (str) -> ChangeSet | None
123            change_item = None
124            diff_head = self.__git_diff_header.match(line)
125            if diff_head and diff_head.group(1) and diff_head.group(2):
126                change_item = ChangeSet(diff_head.group(1), diff_head.group(2))
127
128            return change_item
129
130        def parse_hunk_header(line):
131            # type: (str) -> Tuple[int, int, int]
132            file_linenum = -1
133            hunk_a_linemax = -1
134            hunk_b_linemax = -1
135            hunk_head = self.__git_hunk_header.match(line)
136            if hunk_head and hunk_head.group(1) and hunk_head.group(2) \
137               and hunk_head.group(3):
138                file_linenum = int(hunk_head.group(2))
139                hunk_a_linemax = int(hunk_head.group(1))
140                hunk_b_linemax = int(hunk_head.group(3))
141
142            return (file_linenum, hunk_a_linemax, hunk_b_linemax)
143
144        file_linenum = 0
145        hunk_a_linemax = 0
146        hunk_b_linemax = 0
147        consecutive_remove = 0
148        diff_elem = None
149        parse_state = ParserState.FIND_DIFF_HEADER
150        ChangeMode = ChangeSet.ChangeMode
151        ChangeType = ChangeSet.ChangeType
152
153        for line in self.__diff_lines:
154            if parse_state == ParserState.FIND_DIFF_HEADER:
155                diff_elem = parse_diff_header(line)
156                if diff_elem:
157                    # Found the diff header, go to the next stage
158                    parse_state = ParserState.FIND_HUNK_OR_DIFF_HEADER
159            elif parse_state == ParserState.FIND_HUNK_OR_DIFF_HEADER:
160                # Here only these change modalities will be registered:
161                # deleted file mode <mode>
162                # rename from <path>
163                # rename to <path>
164                # copy from <path>
165                # copy to <path>
166                #
167                # These will be ignored:
168                # old mode <mode>
169                # new mode <mode>
170                # new file mode <mode>
171                #
172                # Also these info will be ignored
173                # similarity index <number>
174                # dissimilarity index <number>
175                # index <hash>..<hash> <mode>
176                if line.startswith("deleted file"):
177                    # If the file is deleted, register it but don't go through
178                    # the changes that will be only a set of lines removed
179                    diff_elem.set_change_mode(ChangeMode.DELETE)
180                    parse_state = ParserState.FIND_DIFF_HEADER
181                elif line.startswith("new file"):
182                    # If the file is new, skip it, as it doesn't give any
183                    # useful information on the report translation
184                    parse_state = ParserState.FIND_DIFF_HEADER
185                elif line.startswith("rename to"):
186                    # Renaming operation can be a pure renaming or a rename
187                    # and a set of change, so keep looking for the hunk
188                    # header
189                    diff_elem.set_change_mode(ChangeMode.RENAME)
190                elif line.startswith("copy to"):
191                    # This is a copy operation, mark it
192                    diff_elem.set_change_mode(ChangeMode.COPY)
193                else:
194                    # Look for the hunk header
195                    (file_linenum, hunk_a_linemax, hunk_b_linemax) = \
196                        parse_hunk_header(line)
197                    if file_linenum >= 0:
198                        if diff_elem.is_change_mode(ChangeMode.NONE):
199                            # The file has only changes
200                            diff_elem.set_change_mode(ChangeMode.CHANGE)
201                        parse_state = ParserState.REGISTER_CHANGES
202                    else:
203                        # ... or there could be a diff header
204                        new_diff_elem = parse_diff_header(line)
205                        if new_diff_elem:
206                            # Found a diff header, register the last change
207                            # item
208                            self.add_change_set(diff_elem)
209                            diff_elem = new_diff_elem
210            elif parse_state == ParserState.REGISTER_CHANGES:
211                if (hunk_b_linemax > 0) and line.startswith("+"):
212                    diff_elem.add_change(file_linenum, ChangeType.ADD)
213                    hunk_b_linemax -= 1
214                    consecutive_remove = 0
215                elif (hunk_a_linemax > 0) and line.startswith("-"):
216                    diff_elem.add_change(file_linenum + consecutive_remove,
217                                         ChangeType.REMOVE)
218                    hunk_a_linemax -= 1
219                    file_linenum -= 1
220                    consecutive_remove += 1
221                elif ((hunk_a_linemax + hunk_b_linemax) > 0) and \
222                        line.startswith(" "):
223                    hunk_a_linemax -= 1 if (hunk_a_linemax > 0) else 0
224                    hunk_b_linemax -= 1 if (hunk_b_linemax > 0) else 0
225                    consecutive_remove = 0
226
227                if (hunk_a_linemax + hunk_b_linemax) <= 0:
228                    parse_state = ParserState.FIND_HUNK_OR_DIFF_HEADER
229
230                file_linenum += 1
231
232        if diff_elem is not None:
233            self.add_change_set(diff_elem)
234
235    def get_change_sets(self):
236        # type: () -> dict
237        return self.__diff_set
238