1#!/usr/bin/env python3 2 3"""Assemble Mbed TLS change log entries into the change log file. 4 5Add changelog entries to the first level-2 section. 6Create a new level-2 section for unreleased changes if needed. 7Remove the input files unless --keep-entries is specified. 8 9In each level-3 section, entries are sorted in chronological order 10(oldest first). From oldest to newest: 11* Merged entry files are sorted according to their merge date (date of 12 the merge commit that brought the commit that created the file into 13 the target branch). 14* Committed but unmerged entry files are sorted according to the date 15 of the commit that adds them. 16* Uncommitted entry files are sorted according to their modification time. 17 18You must run this program from within a git working directory. 19""" 20 21# Copyright The Mbed TLS Contributors 22# SPDX-License-Identifier: Apache-2.0 23# 24# Licensed under the Apache License, Version 2.0 (the "License"); you may 25# not use this file except in compliance with the License. 26# You may obtain a copy of the License at 27# 28# http://www.apache.org/licenses/LICENSE-2.0 29# 30# Unless required by applicable law or agreed to in writing, software 31# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 32# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 33# See the License for the specific language governing permissions and 34# limitations under the License. 35 36import argparse 37from collections import OrderedDict, namedtuple 38import datetime 39import functools 40import glob 41import os 42import re 43import subprocess 44import sys 45 46class InputFormatError(Exception): 47 def __init__(self, filename, line_number, message, *args, **kwargs): 48 message = '{}:{}: {}'.format(filename, line_number, 49 message.format(*args, **kwargs)) 50 super().__init__(message) 51 52class CategoryParseError(Exception): 53 def __init__(self, line_offset, error_message): 54 self.line_offset = line_offset 55 self.error_message = error_message 56 super().__init__('{}: {}'.format(line_offset, error_message)) 57 58class LostContent(Exception): 59 def __init__(self, filename, line): 60 message = ('Lost content from {}: "{}"'.format(filename, line)) 61 super().__init__(message) 62 63# The category names we use in the changelog. 64# If you edit this, update ChangeLog.d/README.md. 65STANDARD_CATEGORIES = ( 66 'API changes', 67 'Default behavior changes', 68 'Requirement changes', 69 'New deprecations', 70 'Removals', 71 'Features', 72 'Security', 73 'Bugfix', 74 'Changes', 75) 76 77# The maximum line length for an entry 78MAX_LINE_LENGTH = 80 79 80CategoryContent = namedtuple('CategoryContent', [ 81 'name', 'title_line', # Title text and line number of the title 82 'body', 'body_line', # Body text and starting line number of the body 83]) 84 85class ChangelogFormat: 86 """Virtual class documenting how to write a changelog format class.""" 87 88 @classmethod 89 def extract_top_version(cls, changelog_file_content): 90 """Split out the top version section. 91 92 If the top version is already released, create a new top 93 version section for an unreleased version. 94 95 Return ``(header, top_version_title, top_version_body, trailer)`` 96 where the "top version" is the existing top version section if it's 97 for unreleased changes, and a newly created section otherwise. 98 To assemble the changelog after modifying top_version_body, 99 concatenate the four pieces. 100 """ 101 raise NotImplementedError 102 103 @classmethod 104 def version_title_text(cls, version_title): 105 """Return the text of a formatted version section title.""" 106 raise NotImplementedError 107 108 @classmethod 109 def split_categories(cls, version_body): 110 """Split a changelog version section body into categories. 111 112 Return a list of `CategoryContent` the name is category title 113 without any formatting. 114 """ 115 raise NotImplementedError 116 117 @classmethod 118 def format_category(cls, title, body): 119 """Construct the text of a category section from its title and body.""" 120 raise NotImplementedError 121 122class TextChangelogFormat(ChangelogFormat): 123 """The traditional Mbed TLS changelog format.""" 124 125 _unreleased_version_text = '= mbed TLS x.x.x branch released xxxx-xx-xx' 126 @classmethod 127 def is_released_version(cls, title): 128 # Look for an incomplete release date 129 return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) 130 131 _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', 132 re.DOTALL) 133 @classmethod 134 def extract_top_version(cls, changelog_file_content): 135 """A version section starts with a line starting with '='.""" 136 m = re.search(cls._top_version_re, changelog_file_content) 137 top_version_start = m.start(1) 138 top_version_end = m.end(2) 139 top_version_title = m.group(1) 140 top_version_body = m.group(2) 141 if cls.is_released_version(top_version_title): 142 top_version_end = top_version_start 143 top_version_title = cls._unreleased_version_text + '\n\n' 144 top_version_body = '' 145 return (changelog_file_content[:top_version_start], 146 top_version_title, top_version_body, 147 changelog_file_content[top_version_end:]) 148 149 @classmethod 150 def version_title_text(cls, version_title): 151 return re.sub(r'\n.*', version_title, re.DOTALL) 152 153 _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE) 154 @classmethod 155 def split_categories(cls, version_body): 156 """A category title is a line with the title in column 0.""" 157 if not version_body: 158 return [] 159 title_matches = list(re.finditer(cls._category_title_re, version_body)) 160 if not title_matches or title_matches[0].start() != 0: 161 # There is junk before the first category. 162 raise CategoryParseError(0, 'Junk found where category expected') 163 title_starts = [m.start(1) for m in title_matches] 164 body_starts = [m.end(0) for m in title_matches] 165 body_ends = title_starts[1:] + [len(version_body)] 166 bodies = [version_body[body_start:body_end].rstrip('\n') + '\n' 167 for (body_start, body_end) in zip(body_starts, body_ends)] 168 title_lines = [version_body[:pos].count('\n') for pos in title_starts] 169 body_lines = [version_body[:pos].count('\n') for pos in body_starts] 170 return [CategoryContent(title_match.group(1), title_line, 171 body, body_line) 172 for title_match, title_line, body, body_line 173 in zip(title_matches, title_lines, bodies, body_lines)] 174 175 @classmethod 176 def format_category(cls, title, body): 177 # `split_categories` ensures that each body ends with a newline. 178 # Make sure that there is additionally a blank line between categories. 179 if not body.endswith('\n\n'): 180 body += '\n' 181 return title + '\n' + body 182 183class ChangeLog: 184 """An Mbed TLS changelog. 185 186 A changelog file consists of some header text followed by one or 187 more version sections. The version sections are in reverse 188 chronological order. Each version section consists of a title and a body. 189 190 The body of a version section consists of zero or more category 191 subsections. Each category subsection consists of a title and a body. 192 193 A changelog entry file has the same format as the body of a version section. 194 195 A `ChangelogFormat` object defines the concrete syntax of the changelog. 196 Entry files must have the same format as the changelog file. 197 """ 198 199 # Only accept dotted version numbers (e.g. "3.1", not "3"). 200 # Refuse ".x" in a version number where x is a letter: this indicates 201 # a version that is not yet released. Something like "3.1a" is accepted. 202 _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+') 203 _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]') 204 _only_url_re = re.compile(r'^\s*\w+://\S+\s*$') 205 _has_url_re = re.compile(r'.*://.*') 206 207 def add_categories_from_text(self, filename, line_offset, 208 text, allow_unknown_category): 209 """Parse a version section or entry file.""" 210 try: 211 categories = self.format.split_categories(text) 212 except CategoryParseError as e: 213 raise InputFormatError(filename, line_offset + e.line_offset, 214 e.error_message) 215 for category in categories: 216 if not allow_unknown_category and \ 217 category.name not in self.categories: 218 raise InputFormatError(filename, 219 line_offset + category.title_line, 220 'Unknown category: "{}"', 221 category.name) 222 223 body_split = category.body.splitlines() 224 225 for line_number, line in enumerate(body_split, 1): 226 if not self._only_url_re.match(line) and \ 227 len(line) > MAX_LINE_LENGTH: 228 long_url_msg = '. URL exceeding length limit must be alone in its line.' \ 229 if self._has_url_re.match(line) else "" 230 raise InputFormatError(filename, 231 category.body_line + line_number, 232 'Line is longer than allowed: ' 233 'Length {} (Max {}){}', 234 len(line), MAX_LINE_LENGTH, 235 long_url_msg) 236 237 self.categories[category.name] += category.body 238 239 def __init__(self, input_stream, changelog_format): 240 """Create a changelog object. 241 242 Populate the changelog object from the content of the file 243 input_stream. 244 """ 245 self.format = changelog_format 246 whole_file = input_stream.read() 247 (self.header, 248 self.top_version_title, top_version_body, 249 self.trailer) = self.format.extract_top_version(whole_file) 250 # Split the top version section into categories. 251 self.categories = OrderedDict() 252 for category in STANDARD_CATEGORIES: 253 self.categories[category] = '' 254 offset = (self.header + self.top_version_title).count('\n') + 1 255 self.add_categories_from_text(input_stream.name, offset, 256 top_version_body, True) 257 258 def add_file(self, input_stream): 259 """Add changelog entries from a file. 260 """ 261 self.add_categories_from_text(input_stream.name, 1, 262 input_stream.read(), False) 263 264 def write(self, filename): 265 """Write the changelog to the specified file. 266 """ 267 with open(filename, 'w', encoding='utf-8') as out: 268 out.write(self.header) 269 out.write(self.top_version_title) 270 for title, body in self.categories.items(): 271 if not body: 272 continue 273 out.write(self.format.format_category(title, body)) 274 out.write(self.trailer) 275 276 277@functools.total_ordering 278class EntryFileSortKey: 279 """This classes defines an ordering on changelog entry files: older < newer. 280 281 * Merged entry files are sorted according to their merge date (date of 282 the merge commit that brought the commit that created the file into 283 the target branch). 284 * Committed but unmerged entry files are sorted according to the date 285 of the commit that adds them. 286 * Uncommitted entry files are sorted according to their modification time. 287 288 This class assumes that the file is in a git working directory with 289 the target branch checked out. 290 """ 291 292 # Categories of files. A lower number is considered older. 293 MERGED = 0 294 COMMITTED = 1 295 LOCAL = 2 296 297 @staticmethod 298 def creation_hash(filename): 299 """Return the git commit id at which the given file was created. 300 301 Return None if the file was never checked into git. 302 """ 303 hashes = subprocess.check_output(['git', 'log', '--format=%H', 304 '--follow', 305 '--', filename]) 306 m = re.search('(.+)$', hashes.decode('ascii')) 307 if not m: 308 # The git output is empty. This means that the file was 309 # never checked in. 310 return None 311 # The last commit in the log is the oldest one, which is when the 312 # file was created. 313 return m.group(0) 314 315 @staticmethod 316 def list_merges(some_hash, target, *options): 317 """List merge commits from some_hash to target. 318 319 Pass options to git to select which commits are included. 320 """ 321 text = subprocess.check_output(['git', 'rev-list', 322 '--merges', *options, 323 '..'.join([some_hash, target])]) 324 return text.decode('ascii').rstrip('\n').split('\n') 325 326 @classmethod 327 def merge_hash(cls, some_hash): 328 """Return the git commit id at which the given commit was merged. 329 330 Return None if the given commit was never merged. 331 """ 332 target = 'HEAD' 333 # List the merges from some_hash to the target in two ways. 334 # The ancestry list is the ones that are both descendants of 335 # some_hash and ancestors of the target. 336 ancestry = frozenset(cls.list_merges(some_hash, target, 337 '--ancestry-path')) 338 # The first_parents list only contains merges that are directly 339 # on the target branch. We want it in reverse order (oldest first). 340 first_parents = cls.list_merges(some_hash, target, 341 '--first-parent', '--reverse') 342 # Look for the oldest merge commit that's both on the direct path 343 # and directly on the target branch. That's the place where some_hash 344 # was merged on the target branch. See 345 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit 346 for commit in first_parents: 347 if commit in ancestry: 348 return commit 349 return None 350 351 @staticmethod 352 def commit_timestamp(commit_id): 353 """Return the timestamp of the given commit.""" 354 text = subprocess.check_output(['git', 'show', '-s', 355 '--format=%ct', 356 commit_id]) 357 return datetime.datetime.utcfromtimestamp(int(text)) 358 359 @staticmethod 360 def file_timestamp(filename): 361 """Return the modification timestamp of the given file.""" 362 mtime = os.stat(filename).st_mtime 363 return datetime.datetime.fromtimestamp(mtime) 364 365 def __init__(self, filename): 366 """Determine position of the file in the changelog entry order. 367 368 This constructor returns an object that can be used with comparison 369 operators, with `sort` and `sorted`, etc. Older entries are sorted 370 before newer entries. 371 """ 372 self.filename = filename 373 creation_hash = self.creation_hash(filename) 374 if not creation_hash: 375 self.category = self.LOCAL 376 self.datetime = self.file_timestamp(filename) 377 return 378 merge_hash = self.merge_hash(creation_hash) 379 if not merge_hash: 380 self.category = self.COMMITTED 381 self.datetime = self.commit_timestamp(creation_hash) 382 return 383 self.category = self.MERGED 384 self.datetime = self.commit_timestamp(merge_hash) 385 386 def sort_key(self): 387 """"Return a concrete sort key for this entry file sort key object. 388 389 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. 390 """ 391 return (self.category, self.datetime, self.filename) 392 393 def __eq__(self, other): 394 return self.sort_key() == other.sort_key() 395 396 def __lt__(self, other): 397 return self.sort_key() < other.sort_key() 398 399 400def check_output(generated_output_file, main_input_file, merged_files): 401 """Make sanity checks on the generated output. 402 403 The intent of these sanity checks is to have reasonable confidence 404 that no content has been lost. 405 406 The sanity check is that every line that is present in an input file 407 is also present in an output file. This is not perfect but good enough 408 for now. 409 """ 410 generated_output = set(open(generated_output_file, 'r', encoding='utf-8')) 411 for line in open(main_input_file, 'r', encoding='utf-8'): 412 if line not in generated_output: 413 raise LostContent('original file', line) 414 for merged_file in merged_files: 415 for line in open(merged_file, 'r', encoding='utf-8'): 416 if line not in generated_output: 417 raise LostContent(merged_file, line) 418 419def finish_output(changelog, output_file, input_file, merged_files): 420 """Write the changelog to the output file. 421 422 The input file and the list of merged files are used only for sanity 423 checks on the output. 424 """ 425 if os.path.exists(output_file) and not os.path.isfile(output_file): 426 # The output is a non-regular file (e.g. pipe). Write to it directly. 427 output_temp = output_file 428 else: 429 # The output is a regular file. Write to a temporary file, 430 # then move it into place atomically. 431 output_temp = output_file + '.tmp' 432 changelog.write(output_temp) 433 check_output(output_temp, input_file, merged_files) 434 if output_temp != output_file: 435 os.rename(output_temp, output_file) 436 437def remove_merged_entries(files_to_remove): 438 for filename in files_to_remove: 439 os.remove(filename) 440 441def list_files_to_merge(options): 442 """List the entry files to merge, oldest first. 443 444 "Oldest" is defined by `EntryFileSortKey`. 445 """ 446 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) 447 files_to_merge.sort(key=EntryFileSortKey) 448 return files_to_merge 449 450def merge_entries(options): 451 """Merge changelog entries into the changelog file. 452 453 Read the changelog file from options.input. 454 Read entries to merge from the directory options.dir. 455 Write the new changelog to options.output. 456 Remove the merged entries if options.keep_entries is false. 457 """ 458 with open(options.input, 'r', encoding='utf-8') as input_file: 459 changelog = ChangeLog(input_file, TextChangelogFormat) 460 files_to_merge = list_files_to_merge(options) 461 if not files_to_merge: 462 sys.stderr.write('There are no pending changelog entries.\n') 463 return 464 for filename in files_to_merge: 465 with open(filename, 'r', encoding='utf-8') as input_file: 466 changelog.add_file(input_file) 467 finish_output(changelog, options.output, options.input, files_to_merge) 468 if not options.keep_entries: 469 remove_merged_entries(files_to_merge) 470 471def show_file_timestamps(options): 472 """List the files to merge and their timestamp. 473 474 This is only intended for debugging purposes. 475 """ 476 files = list_files_to_merge(options) 477 for filename in files: 478 ts = EntryFileSortKey(filename) 479 print(ts.category, ts.datetime, filename) 480 481def set_defaults(options): 482 """Add default values for missing options.""" 483 output_file = getattr(options, 'output', None) 484 if output_file is None: 485 options.output = options.input 486 if getattr(options, 'keep_entries', None) is None: 487 options.keep_entries = (output_file is not None) 488 489def main(): 490 """Command line entry point.""" 491 parser = argparse.ArgumentParser(description=__doc__) 492 parser.add_argument('--dir', '-d', metavar='DIR', 493 default='ChangeLog.d', 494 help='Directory to read entries from' 495 ' (default: ChangeLog.d)') 496 parser.add_argument('--input', '-i', metavar='FILE', 497 default='ChangeLog', 498 help='Existing changelog file to read from and augment' 499 ' (default: ChangeLog)') 500 parser.add_argument('--keep-entries', 501 action='store_true', dest='keep_entries', default=None, 502 help='Keep the files containing entries' 503 ' (default: remove them if --output/-o is not specified)') 504 parser.add_argument('--no-keep-entries', 505 action='store_false', dest='keep_entries', 506 help='Remove the files containing entries after they are merged' 507 ' (default: remove them if --output/-o is not specified)') 508 parser.add_argument('--output', '-o', metavar='FILE', 509 help='Output changelog file' 510 ' (default: overwrite the input)') 511 parser.add_argument('--list-files-only', 512 action='store_true', 513 help=('Only list the files that would be processed ' 514 '(with some debugging information)')) 515 options = parser.parse_args() 516 set_defaults(options) 517 if options.list_files_only: 518 show_file_timestamps(options) 519 return 520 merge_entries(options) 521 522if __name__ == '__main__': 523 main() 524