| #!/usr/bin/env python3 |
| |
| """Assemble Mbed TLS change log entries into the change log file. |
| |
| Add changelog entries to the first level-2 section. |
| Create a new level-2 section for unreleased changes if needed. |
| Remove the input files unless --keep-entries is specified. |
| |
| In each level-3 section, entries are sorted in chronological order |
| (oldest first). From oldest to newest: |
| * Merged entry files are sorted according to their merge date (date of |
| the merge commit that brought the commit that created the file into |
| the target branch). |
| * Committed but unmerged entry files are sorted according to the date |
| of the commit that adds them. |
| * Uncommitted entry files are sorted according to their modification time. |
| |
| You must run this program from within a git working directory. |
| """ |
| |
| # Copyright The Mbed TLS Contributors |
| # SPDX-License-Identifier: Apache-2.0 |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import argparse |
| from collections import OrderedDict, namedtuple |
| import datetime |
| import functools |
| import glob |
| import os |
| import re |
| import subprocess |
| import sys |
| |
| class InputFormatError(Exception): |
| def __init__(self, filename, line_number, message, *args, **kwargs): |
| message = '{}:{}: {}'.format(filename, line_number, |
| message.format(*args, **kwargs)) |
| super().__init__(message) |
| |
| class CategoryParseError(Exception): |
| def __init__(self, line_offset, error_message): |
| self.line_offset = line_offset |
| self.error_message = error_message |
| super().__init__('{}: {}'.format(line_offset, error_message)) |
| |
| class LostContent(Exception): |
| def __init__(self, filename, line): |
| message = ('Lost content from {}: "{}"'.format(filename, line)) |
| super().__init__(message) |
| |
| # The category names we use in the changelog. |
| # If you edit this, update ChangeLog.d/README.md. |
| STANDARD_CATEGORIES = ( |
| 'API changes', |
| 'Default behavior changes', |
| 'Requirement changes', |
| 'New deprecations', |
| 'Removals', |
| 'Features', |
| 'Security', |
| 'Bugfix', |
| 'Changes', |
| ) |
| |
| # The maximum line length for an entry |
| MAX_LINE_LENGTH = 80 |
| |
| CategoryContent = namedtuple('CategoryContent', [ |
| 'name', 'title_line', # Title text and line number of the title |
| 'body', 'body_line', # Body text and starting line number of the body |
| ]) |
| |
| class ChangelogFormat: |
| """Virtual class documenting how to write a changelog format class.""" |
| |
| @classmethod |
| def extract_top_version(cls, changelog_file_content): |
| """Split out the top version section. |
| |
| If the top version is already released, create a new top |
| version section for an unreleased version. |
| |
| Return ``(header, top_version_title, top_version_body, trailer)`` |
| where the "top version" is the existing top version section if it's |
| for unreleased changes, and a newly created section otherwise. |
| To assemble the changelog after modifying top_version_body, |
| concatenate the four pieces. |
| """ |
| raise NotImplementedError |
| |
| @classmethod |
| def version_title_text(cls, version_title): |
| """Return the text of a formatted version section title.""" |
| raise NotImplementedError |
| |
| @classmethod |
| def split_categories(cls, version_body): |
| """Split a changelog version section body into categories. |
| |
| Return a list of `CategoryContent` the name is category title |
| without any formatting. |
| """ |
| raise NotImplementedError |
| |
| @classmethod |
| def format_category(cls, title, body): |
| """Construct the text of a category section from its title and body.""" |
| raise NotImplementedError |
| |
| class TextChangelogFormat(ChangelogFormat): |
| """The traditional Mbed TLS changelog format.""" |
| |
| _unreleased_version_text = '= mbed TLS x.x.x branch released xxxx-xx-xx' |
| @classmethod |
| def is_released_version(cls, title): |
| # Look for an incomplete release date |
| return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) |
| |
| _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', |
| re.DOTALL) |
| @classmethod |
| def extract_top_version(cls, changelog_file_content): |
| """A version section starts with a line starting with '='.""" |
| m = re.search(cls._top_version_re, changelog_file_content) |
| top_version_start = m.start(1) |
| top_version_end = m.end(2) |
| top_version_title = m.group(1) |
| top_version_body = m.group(2) |
| if cls.is_released_version(top_version_title): |
| top_version_end = top_version_start |
| top_version_title = cls._unreleased_version_text + '\n\n' |
| top_version_body = '' |
| return (changelog_file_content[:top_version_start], |
| top_version_title, top_version_body, |
| changelog_file_content[top_version_end:]) |
| |
| @classmethod |
| def version_title_text(cls, version_title): |
| return re.sub(r'\n.*', version_title, re.DOTALL) |
| |
| _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE) |
| @classmethod |
| def split_categories(cls, version_body): |
| """A category title is a line with the title in column 0.""" |
| if not version_body: |
| return [] |
| title_matches = list(re.finditer(cls._category_title_re, version_body)) |
| if not title_matches or title_matches[0].start() != 0: |
| # There is junk before the first category. |
| raise CategoryParseError(0, 'Junk found where category expected') |
| title_starts = [m.start(1) for m in title_matches] |
| body_starts = [m.end(0) for m in title_matches] |
| body_ends = title_starts[1:] + [len(version_body)] |
| bodies = [version_body[body_start:body_end].rstrip('\n') + '\n' |
| for (body_start, body_end) in zip(body_starts, body_ends)] |
| title_lines = [version_body[:pos].count('\n') for pos in title_starts] |
| body_lines = [version_body[:pos].count('\n') for pos in body_starts] |
| return [CategoryContent(title_match.group(1), title_line, |
| body, body_line) |
| for title_match, title_line, body, body_line |
| in zip(title_matches, title_lines, bodies, body_lines)] |
| |
| @classmethod |
| def format_category(cls, title, body): |
| # `split_categories` ensures that each body ends with a newline. |
| # Make sure that there is additionally a blank line between categories. |
| if not body.endswith('\n\n'): |
| body += '\n' |
| return title + '\n' + body |
| |
| class ChangeLog: |
| """An Mbed TLS changelog. |
| |
| A changelog file consists of some header text followed by one or |
| more version sections. The version sections are in reverse |
| chronological order. Each version section consists of a title and a body. |
| |
| The body of a version section consists of zero or more category |
| subsections. Each category subsection consists of a title and a body. |
| |
| A changelog entry file has the same format as the body of a version section. |
| |
| A `ChangelogFormat` object defines the concrete syntax of the changelog. |
| Entry files must have the same format as the changelog file. |
| """ |
| |
| # Only accept dotted version numbers (e.g. "3.1", not "3"). |
| # Refuse ".x" in a version number where x is a letter: this indicates |
| # a version that is not yet released. Something like "3.1a" is accepted. |
| _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+') |
| _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]') |
| _only_url_re = re.compile(r'^\s*\w+://\S+\s*$') |
| _has_url_re = re.compile(r'.*://.*') |
| |
| def add_categories_from_text(self, filename, line_offset, |
| text, allow_unknown_category): |
| """Parse a version section or entry file.""" |
| try: |
| categories = self.format.split_categories(text) |
| except CategoryParseError as e: |
| raise InputFormatError(filename, line_offset + e.line_offset, |
| e.error_message) |
| for category in categories: |
| if not allow_unknown_category and \ |
| category.name not in self.categories: |
| raise InputFormatError(filename, |
| line_offset + category.title_line, |
| 'Unknown category: "{}"', |
| category.name) |
| |
| body_split = category.body.splitlines() |
| |
| for line_number, line in enumerate(body_split, 1): |
| if not self._only_url_re.match(line) and \ |
| len(line) > MAX_LINE_LENGTH: |
| long_url_msg = '. URL exceeding length limit must be alone in its line.' \ |
| if self._has_url_re.match(line) else "" |
| raise InputFormatError(filename, |
| category.body_line + line_number, |
| 'Line is longer than allowed: ' |
| 'Length {} (Max {}){}', |
| len(line), MAX_LINE_LENGTH, |
| long_url_msg) |
| |
| self.categories[category.name] += category.body |
| |
| def __init__(self, input_stream, changelog_format): |
| """Create a changelog object. |
| |
| Populate the changelog object from the content of the file |
| input_stream. |
| """ |
| self.format = changelog_format |
| whole_file = input_stream.read() |
| (self.header, |
| self.top_version_title, top_version_body, |
| self.trailer) = self.format.extract_top_version(whole_file) |
| # Split the top version section into categories. |
| self.categories = OrderedDict() |
| for category in STANDARD_CATEGORIES: |
| self.categories[category] = '' |
| offset = (self.header + self.top_version_title).count('\n') + 1 |
| self.add_categories_from_text(input_stream.name, offset, |
| top_version_body, True) |
| |
| def add_file(self, input_stream): |
| """Add changelog entries from a file. |
| """ |
| self.add_categories_from_text(input_stream.name, 1, |
| input_stream.read(), False) |
| |
| def write(self, filename): |
| """Write the changelog to the specified file. |
| """ |
| with open(filename, 'w', encoding='utf-8') as out: |
| out.write(self.header) |
| out.write(self.top_version_title) |
| for title, body in self.categories.items(): |
| if not body: |
| continue |
| out.write(self.format.format_category(title, body)) |
| out.write(self.trailer) |
| |
| |
| @functools.total_ordering |
| class EntryFileSortKey: |
| """This classes defines an ordering on changelog entry files: older < newer. |
| |
| * Merged entry files are sorted according to their merge date (date of |
| the merge commit that brought the commit that created the file into |
| the target branch). |
| * Committed but unmerged entry files are sorted according to the date |
| of the commit that adds them. |
| * Uncommitted entry files are sorted according to their modification time. |
| |
| This class assumes that the file is in a git working directory with |
| the target branch checked out. |
| """ |
| |
| # Categories of files. A lower number is considered older. |
| MERGED = 0 |
| COMMITTED = 1 |
| LOCAL = 2 |
| |
| @staticmethod |
| def creation_hash(filename): |
| """Return the git commit id at which the given file was created. |
| |
| Return None if the file was never checked into git. |
| """ |
| hashes = subprocess.check_output(['git', 'log', '--format=%H', |
| '--follow', |
| '--', filename]) |
| m = re.search('(.+)$', hashes.decode('ascii')) |
| if not m: |
| # The git output is empty. This means that the file was |
| # never checked in. |
| return None |
| # The last commit in the log is the oldest one, which is when the |
| # file was created. |
| return m.group(0) |
| |
| @staticmethod |
| def list_merges(some_hash, target, *options): |
| """List merge commits from some_hash to target. |
| |
| Pass options to git to select which commits are included. |
| """ |
| text = subprocess.check_output(['git', 'rev-list', |
| '--merges', *options, |
| '..'.join([some_hash, target])]) |
| return text.decode('ascii').rstrip('\n').split('\n') |
| |
| @classmethod |
| def merge_hash(cls, some_hash): |
| """Return the git commit id at which the given commit was merged. |
| |
| Return None if the given commit was never merged. |
| """ |
| target = 'HEAD' |
| # List the merges from some_hash to the target in two ways. |
| # The ancestry list is the ones that are both descendants of |
| # some_hash and ancestors of the target. |
| ancestry = frozenset(cls.list_merges(some_hash, target, |
| '--ancestry-path')) |
| # The first_parents list only contains merges that are directly |
| # on the target branch. We want it in reverse order (oldest first). |
| first_parents = cls.list_merges(some_hash, target, |
| '--first-parent', '--reverse') |
| # Look for the oldest merge commit that's both on the direct path |
| # and directly on the target branch. That's the place where some_hash |
| # was merged on the target branch. See |
| # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit |
| for commit in first_parents: |
| if commit in ancestry: |
| return commit |
| return None |
| |
| @staticmethod |
| def commit_timestamp(commit_id): |
| """Return the timestamp of the given commit.""" |
| text = subprocess.check_output(['git', 'show', '-s', |
| '--format=%ct', |
| commit_id]) |
| return datetime.datetime.utcfromtimestamp(int(text)) |
| |
| @staticmethod |
| def file_timestamp(filename): |
| """Return the modification timestamp of the given file.""" |
| mtime = os.stat(filename).st_mtime |
| return datetime.datetime.fromtimestamp(mtime) |
| |
| def __init__(self, filename): |
| """Determine position of the file in the changelog entry order. |
| |
| This constructor returns an object that can be used with comparison |
| operators, with `sort` and `sorted`, etc. Older entries are sorted |
| before newer entries. |
| """ |
| self.filename = filename |
| creation_hash = self.creation_hash(filename) |
| if not creation_hash: |
| self.category = self.LOCAL |
| self.datetime = self.file_timestamp(filename) |
| return |
| merge_hash = self.merge_hash(creation_hash) |
| if not merge_hash: |
| self.category = self.COMMITTED |
| self.datetime = self.commit_timestamp(creation_hash) |
| return |
| self.category = self.MERGED |
| self.datetime = self.commit_timestamp(merge_hash) |
| |
| def sort_key(self): |
| """"Return a concrete sort key for this entry file sort key object. |
| |
| ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. |
| """ |
| return (self.category, self.datetime, self.filename) |
| |
| def __eq__(self, other): |
| return self.sort_key() == other.sort_key() |
| |
| def __lt__(self, other): |
| return self.sort_key() < other.sort_key() |
| |
| |
| def check_output(generated_output_file, main_input_file, merged_files): |
| """Make sanity checks on the generated output. |
| |
| The intent of these sanity checks is to have reasonable confidence |
| that no content has been lost. |
| |
| The sanity check is that every line that is present in an input file |
| is also present in an output file. This is not perfect but good enough |
| for now. |
| """ |
| generated_output = set(open(generated_output_file, 'r', encoding='utf-8')) |
| for line in open(main_input_file, 'r', encoding='utf-8'): |
| if line not in generated_output: |
| raise LostContent('original file', line) |
| for merged_file in merged_files: |
| for line in open(merged_file, 'r', encoding='utf-8'): |
| if line not in generated_output: |
| raise LostContent(merged_file, line) |
| |
| def finish_output(changelog, output_file, input_file, merged_files): |
| """Write the changelog to the output file. |
| |
| The input file and the list of merged files are used only for sanity |
| checks on the output. |
| """ |
| if os.path.exists(output_file) and not os.path.isfile(output_file): |
| # The output is a non-regular file (e.g. pipe). Write to it directly. |
| output_temp = output_file |
| else: |
| # The output is a regular file. Write to a temporary file, |
| # then move it into place atomically. |
| output_temp = output_file + '.tmp' |
| changelog.write(output_temp) |
| check_output(output_temp, input_file, merged_files) |
| if output_temp != output_file: |
| os.rename(output_temp, output_file) |
| |
| def remove_merged_entries(files_to_remove): |
| for filename in files_to_remove: |
| os.remove(filename) |
| |
| def list_files_to_merge(options): |
| """List the entry files to merge, oldest first. |
| |
| "Oldest" is defined by `EntryFileSortKey`. |
| """ |
| files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) |
| files_to_merge.sort(key=EntryFileSortKey) |
| return files_to_merge |
| |
| def merge_entries(options): |
| """Merge changelog entries into the changelog file. |
| |
| Read the changelog file from options.input. |
| Read entries to merge from the directory options.dir. |
| Write the new changelog to options.output. |
| Remove the merged entries if options.keep_entries is false. |
| """ |
| with open(options.input, 'r', encoding='utf-8') as input_file: |
| changelog = ChangeLog(input_file, TextChangelogFormat) |
| files_to_merge = list_files_to_merge(options) |
| if not files_to_merge: |
| sys.stderr.write('There are no pending changelog entries.\n') |
| return |
| for filename in files_to_merge: |
| with open(filename, 'r', encoding='utf-8') as input_file: |
| changelog.add_file(input_file) |
| finish_output(changelog, options.output, options.input, files_to_merge) |
| if not options.keep_entries: |
| remove_merged_entries(files_to_merge) |
| |
| def show_file_timestamps(options): |
| """List the files to merge and their timestamp. |
| |
| This is only intended for debugging purposes. |
| """ |
| files = list_files_to_merge(options) |
| for filename in files: |
| ts = EntryFileSortKey(filename) |
| print(ts.category, ts.datetime, filename) |
| |
| def set_defaults(options): |
| """Add default values for missing options.""" |
| output_file = getattr(options, 'output', None) |
| if output_file is None: |
| options.output = options.input |
| if getattr(options, 'keep_entries', None) is None: |
| options.keep_entries = (output_file is not None) |
| |
| def main(): |
| """Command line entry point.""" |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument('--dir', '-d', metavar='DIR', |
| default='ChangeLog.d', |
| help='Directory to read entries from' |
| ' (default: ChangeLog.d)') |
| parser.add_argument('--input', '-i', metavar='FILE', |
| default='ChangeLog', |
| help='Existing changelog file to read from and augment' |
| ' (default: ChangeLog)') |
| parser.add_argument('--keep-entries', |
| action='store_true', dest='keep_entries', default=None, |
| help='Keep the files containing entries' |
| ' (default: remove them if --output/-o is not specified)') |
| parser.add_argument('--no-keep-entries', |
| action='store_false', dest='keep_entries', |
| help='Remove the files containing entries after they are merged' |
| ' (default: remove them if --output/-o is not specified)') |
| parser.add_argument('--output', '-o', metavar='FILE', |
| help='Output changelog file' |
| ' (default: overwrite the input)') |
| parser.add_argument('--list-files-only', |
| action='store_true', |
| help=('Only list the files that would be processed ' |
| '(with some debugging information)')) |
| options = parser.parse_args() |
| set_defaults(options) |
| if options.list_files_only: |
| show_file_timestamps(options) |
| return |
| merge_entries(options) |
| |
| if __name__ == '__main__': |
| main() |