1#!/usr/bin/env python3 2 3import re 4import sys 5 6try: 7 from enum import Enum 8except Exception: 9 if sys.version_info[0] == 2: 10 print("Please install enum34 package when using python 2.") 11 else: 12 print("Please use python version 3.5 or above.") 13 sys.exit(1) 14 15try: 16 from typing import Tuple 17except Exception: 18 if sys.version_info[0] == 2: 19 print("Please install typing package when using python 2.") 20 else: 21 print("Please use python version 3.5 or above.") 22 sys.exit(1) 23 24 25class UnifiedFormatParseError(Exception): 26 pass 27 28 29class ParserState(Enum): 30 FIND_DIFF_HEADER = 0 31 REGISTER_CHANGES = 1 32 FIND_HUNK_OR_DIFF_HEADER = 2 33 34 35class ChangeSet(object): 36 class ChangeType(Enum): 37 REMOVE = 0 38 ADD = 1 39 40 class ChangeMode(Enum): 41 NONE = 0 42 CHANGE = 1 43 RENAME = 2 44 DELETE = 3 45 COPY = 4 46 47 def __init__(self, a_file, b_file): 48 # type: (str, str) -> None 49 self.orig_file = a_file 50 self.dst_file = b_file 51 self.change_mode = ChangeSet.ChangeMode.NONE 52 self.__changes = [] 53 54 def __str__(self): 55 # type: () -> str 56 str_out = "{}: {} -> {}:\n{}\n".format( 57 str(self.change_mode), self.orig_file, self.dst_file, 58 str(self.__changes) 59 ) 60 return str_out 61 62 def set_change_mode(self, change_mode): 63 # type: (ChangeMode) -> None 64 self.change_mode = change_mode 65 66 def is_change_mode(self, change_mode): 67 # type: (ChangeMode) -> bool 68 return self.change_mode == change_mode 69 70 def add_change(self, line_number, change_type): 71 # type: (int, ChangeType) -> None 72 self.__changes.append((line_number, change_type)) 73 74 def get_change_set(self): 75 # type: () -> dict 76 return self.__changes 77 78 79class UnifiedFormatParser(object): 80 def __init__(self, args): 81 # type: (str | list) -> None 82 if isinstance(args, str): 83 self.__diff_file = args 84 try: 85 with open(self.__diff_file, "rt") as infile: 86 self.__diff_lines = infile.readlines() 87 except OSError as e: 88 raise UnifiedFormatParseError( 89 "Issue with reading file {}: {}" 90 .format(self.__diff_file, e) 91 ) 92 elif isinstance(args, list): 93 self.__diff_file = "git-diff-local.txt" 94 self.__diff_lines = args 95 else: 96 raise UnifiedFormatParseError( 97 "UnifiedFormatParser constructor called with wrong arguments") 98 99 self.__git_diff_header = re.compile(r'^diff --git a/(.*) b/(.*)$') 100 self.__git_hunk_header = \ 101 re.compile(r'^@@ -\d+,(\d+) \+(\d+),(\d+) @@.*$') 102 self.__diff_set = {} 103 self.__parse() 104 105 def get_diff_path(self): 106 # type: () -> str 107 return self.__diff_file 108 109 def add_change_set(self, change_set): 110 # type: (ChangeSet) -> None 111 if not change_set.is_change_mode(ChangeSet.ChangeMode.NONE): 112 if change_set.is_change_mode(ChangeSet.ChangeMode.COPY): 113 # Add copy change mode items using the dst_file key, because 114 # there might be other changes for the orig_file in this diff 115 self.__diff_set[change_set.dst_file] = change_set 116 else: 117 self.__diff_set[change_set.orig_file] = change_set 118 119 def __parse(self): 120 # type: () -> None 121 def parse_diff_header(line): 122 # type: (str) -> ChangeSet | None 123 change_item = None 124 diff_head = self.__git_diff_header.match(line) 125 if diff_head and diff_head.group(1) and diff_head.group(2): 126 change_item = ChangeSet(diff_head.group(1), diff_head.group(2)) 127 128 return change_item 129 130 def parse_hunk_header(line): 131 # type: (str) -> Tuple[int, int, int] 132 file_linenum = -1 133 hunk_a_linemax = -1 134 hunk_b_linemax = -1 135 hunk_head = self.__git_hunk_header.match(line) 136 if hunk_head and hunk_head.group(1) and hunk_head.group(2) \ 137 and hunk_head.group(3): 138 file_linenum = int(hunk_head.group(2)) 139 hunk_a_linemax = int(hunk_head.group(1)) 140 hunk_b_linemax = int(hunk_head.group(3)) 141 142 return (file_linenum, hunk_a_linemax, hunk_b_linemax) 143 144 file_linenum = 0 145 hunk_a_linemax = 0 146 hunk_b_linemax = 0 147 consecutive_remove = 0 148 diff_elem = None 149 parse_state = ParserState.FIND_DIFF_HEADER 150 ChangeMode = ChangeSet.ChangeMode 151 ChangeType = ChangeSet.ChangeType 152 153 for line in self.__diff_lines: 154 if parse_state == ParserState.FIND_DIFF_HEADER: 155 diff_elem = parse_diff_header(line) 156 if diff_elem: 157 # Found the diff header, go to the next stage 158 parse_state = ParserState.FIND_HUNK_OR_DIFF_HEADER 159 elif parse_state == ParserState.FIND_HUNK_OR_DIFF_HEADER: 160 # Here only these change modalities will be registered: 161 # deleted file mode <mode> 162 # rename from <path> 163 # rename to <path> 164 # copy from <path> 165 # copy to <path> 166 # 167 # These will be ignored: 168 # old mode <mode> 169 # new mode <mode> 170 # new file mode <mode> 171 # 172 # Also these info will be ignored 173 # similarity index <number> 174 # dissimilarity index <number> 175 # index <hash>..<hash> <mode> 176 if line.startswith("deleted file"): 177 # If the file is deleted, register it but don't go through 178 # the changes that will be only a set of lines removed 179 diff_elem.set_change_mode(ChangeMode.DELETE) 180 parse_state = ParserState.FIND_DIFF_HEADER 181 elif line.startswith("new file"): 182 # If the file is new, skip it, as it doesn't give any 183 # useful information on the report translation 184 parse_state = ParserState.FIND_DIFF_HEADER 185 elif line.startswith("rename to"): 186 # Renaming operation can be a pure renaming or a rename 187 # and a set of change, so keep looking for the hunk 188 # header 189 diff_elem.set_change_mode(ChangeMode.RENAME) 190 elif line.startswith("copy to"): 191 # This is a copy operation, mark it 192 diff_elem.set_change_mode(ChangeMode.COPY) 193 else: 194 # Look for the hunk header 195 (file_linenum, hunk_a_linemax, hunk_b_linemax) = \ 196 parse_hunk_header(line) 197 if file_linenum >= 0: 198 if diff_elem.is_change_mode(ChangeMode.NONE): 199 # The file has only changes 200 diff_elem.set_change_mode(ChangeMode.CHANGE) 201 parse_state = ParserState.REGISTER_CHANGES 202 else: 203 # ... or there could be a diff header 204 new_diff_elem = parse_diff_header(line) 205 if new_diff_elem: 206 # Found a diff header, register the last change 207 # item 208 self.add_change_set(diff_elem) 209 diff_elem = new_diff_elem 210 elif parse_state == ParserState.REGISTER_CHANGES: 211 if (hunk_b_linemax > 0) and line.startswith("+"): 212 diff_elem.add_change(file_linenum, ChangeType.ADD) 213 hunk_b_linemax -= 1 214 consecutive_remove = 0 215 elif (hunk_a_linemax > 0) and line.startswith("-"): 216 diff_elem.add_change(file_linenum + consecutive_remove, 217 ChangeType.REMOVE) 218 hunk_a_linemax -= 1 219 file_linenum -= 1 220 consecutive_remove += 1 221 elif ((hunk_a_linemax + hunk_b_linemax) > 0) and \ 222 line.startswith(" "): 223 hunk_a_linemax -= 1 if (hunk_a_linemax > 0) else 0 224 hunk_b_linemax -= 1 if (hunk_b_linemax > 0) else 0 225 consecutive_remove = 0 226 227 if (hunk_a_linemax + hunk_b_linemax) <= 0: 228 parse_state = ParserState.FIND_HUNK_OR_DIFF_HEADER 229 230 file_linenum += 1 231 232 if diff_elem is not None: 233 self.add_change_set(diff_elem) 234 235 def get_change_sets(self): 236 # type: () -> dict 237 return self.__diff_set 238