Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | |
Gilles Peskine | 42f384c | 2020-03-27 09:23:38 +0100 | [diff] [blame] | 3 | """Assemble Mbed TLS change log entries into the change log file. |
Gilles Peskine | a260796 | 2020-01-28 19:58:17 +0100 | [diff] [blame] | 4 | |
| 5 | Add changelog entries to the first level-2 section. |
| 6 | Create a new level-2 section for unreleased changes if needed. |
| 7 | Remove the input files unless --keep-entries is specified. |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 8 | |
| 9 | In each level-3 section, entries are sorted in chronological order |
| 10 | (oldest first). From oldest to newest: |
| 11 | * Merged entry files are sorted according to their merge date (date of |
| 12 | the merge commit that brought the commit that created the file into |
| 13 | the target branch). |
| 14 | * Committed but unmerged entry files are sorted according to the date |
| 15 | of the commit that adds them. |
| 16 | * Uncommitted entry files are sorted according to their modification time. |
| 17 | |
| 18 | You must run this program from within a git working directory. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 19 | """ |
| 20 | |
Bence Szépkúti | 1e14827 | 2020-08-07 13:07:28 +0200 | [diff] [blame] | 21 | # Copyright The Mbed TLS Contributors |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 22 | # SPDX-License-Identifier: Apache-2.0 |
| 23 | # |
| 24 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 25 | # not use this file except in compliance with the License. |
| 26 | # You may obtain a copy of the License at |
| 27 | # |
| 28 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 29 | # |
| 30 | # Unless required by applicable law or agreed to in writing, software |
| 31 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 32 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 33 | # See the License for the specific language governing permissions and |
| 34 | # limitations under the License. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 35 | |
| 36 | import argparse |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 37 | from collections import OrderedDict, namedtuple |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 38 | import datetime |
| 39 | import functools |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 40 | import glob |
| 41 | import os |
| 42 | import re |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 43 | import subprocess |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 44 | import sys |
| 45 | |
| 46 | class InputFormatError(Exception): |
| 47 | def __init__(self, filename, line_number, message, *args, **kwargs): |
Gilles Peskine | 566407d | 2020-01-22 15:55:36 +0100 | [diff] [blame] | 48 | message = '{}:{}: {}'.format(filename, line_number, |
| 49 | message.format(*args, **kwargs)) |
| 50 | super().__init__(message) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 51 | |
Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 52 | class CategoryParseError(Exception): |
| 53 | def __init__(self, line_offset, error_message): |
| 54 | self.line_offset = line_offset |
| 55 | self.error_message = error_message |
| 56 | super().__init__('{}: {}'.format(line_offset, error_message)) |
| 57 | |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 58 | class LostContent(Exception): |
| 59 | def __init__(self, filename, line): |
| 60 | message = ('Lost content from {}: "{}"'.format(filename, line)) |
| 61 | super().__init__(message) |
| 62 | |
Gilles Peskine | b695d5e | 2020-03-27 20:06:12 +0100 | [diff] [blame] | 63 | # The category names we use in the changelog. |
| 64 | # If you edit this, update ChangeLog.d/README.md. |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 65 | STANDARD_CATEGORIES = ( |
| 66 | b'API changes', |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 67 | b'Default behavior changes', |
| 68 | b'Requirement changes', |
| 69 | b'New deprecations', |
| 70 | b'Removals', |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 71 | b'Features', |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 72 | b'Security', |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 73 | b'Bugfix', |
| 74 | b'Changes', |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 75 | ) |
| 76 | |
Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 77 | # The maximum line length for an entry |
| 78 | MAX_LINE_LENGTH = 80 |
| 79 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 80 | CategoryContent = namedtuple('CategoryContent', [ |
| 81 | 'name', 'title_line', # Title text and line number of the title |
| 82 | 'body', 'body_line', # Body text and starting line number of the body |
| 83 | ]) |
| 84 | |
| 85 | class ChangelogFormat: |
| 86 | """Virtual class documenting how to write a changelog format class.""" |
| 87 | |
| 88 | @classmethod |
| 89 | def extract_top_version(cls, changelog_file_content): |
| 90 | """Split out the top version section. |
| 91 | |
Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 92 | If the top version is already released, create a new top |
| 93 | version section for an unreleased version. |
Gilles Peskine | dba4de0 | 2020-03-30 11:37:26 +0200 | [diff] [blame] | 94 | |
| 95 | Return ``(header, top_version_title, top_version_body, trailer)`` |
| 96 | where the "top version" is the existing top version section if it's |
| 97 | for unreleased changes, and a newly created section otherwise. |
| 98 | To assemble the changelog after modifying top_version_body, |
| 99 | concatenate the four pieces. |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 100 | """ |
| 101 | raise NotImplementedError |
| 102 | |
| 103 | @classmethod |
| 104 | def version_title_text(cls, version_title): |
| 105 | """Return the text of a formatted version section title.""" |
| 106 | raise NotImplementedError |
| 107 | |
| 108 | @classmethod |
| 109 | def split_categories(cls, version_body): |
| 110 | """Split a changelog version section body into categories. |
| 111 | |
| 112 | Return a list of `CategoryContent` the name is category title |
| 113 | without any formatting. |
| 114 | """ |
| 115 | raise NotImplementedError |
| 116 | |
| 117 | @classmethod |
| 118 | def format_category(cls, title, body): |
| 119 | """Construct the text of a category section from its title and body.""" |
| 120 | raise NotImplementedError |
| 121 | |
| 122 | class TextChangelogFormat(ChangelogFormat): |
| 123 | """The traditional Mbed TLS changelog format.""" |
| 124 | |
Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 125 | _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx' |
| 126 | @classmethod |
| 127 | def is_released_version(cls, title): |
| 128 | # Look for an incomplete release date |
| 129 | return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) |
| 130 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 131 | _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', |
| 132 | re.DOTALL) |
| 133 | @classmethod |
| 134 | def extract_top_version(cls, changelog_file_content): |
| 135 | """A version section starts with a line starting with '='.""" |
| 136 | m = re.search(cls._top_version_re, changelog_file_content) |
| 137 | top_version_start = m.start(1) |
| 138 | top_version_end = m.end(2) |
Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 139 | top_version_title = m.group(1) |
| 140 | top_version_body = m.group(2) |
| 141 | if cls.is_released_version(top_version_title): |
| 142 | top_version_end = top_version_start |
| 143 | top_version_title = cls._unreleased_version_text + b'\n\n' |
| 144 | top_version_body = b'' |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 145 | return (changelog_file_content[:top_version_start], |
Gilles Peskine | eebf24f | 2020-03-27 19:25:38 +0100 | [diff] [blame] | 146 | top_version_title, top_version_body, |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 147 | changelog_file_content[top_version_end:]) |
| 148 | |
| 149 | @classmethod |
| 150 | def version_title_text(cls, version_title): |
| 151 | return re.sub(br'\n.*', version_title, re.DOTALL) |
| 152 | |
| 153 | _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE) |
| 154 | @classmethod |
| 155 | def split_categories(cls, version_body): |
| 156 | """A category title is a line with the title in column 0.""" |
Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 157 | if not version_body: |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 158 | return [] |
Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 159 | title_matches = list(re.finditer(cls._category_title_re, version_body)) |
| 160 | if not title_matches or title_matches[0].start() != 0: |
| 161 | # There is junk before the first category. |
| 162 | raise CategoryParseError(0, 'Junk found where category expected') |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 163 | title_starts = [m.start(1) for m in title_matches] |
| 164 | body_starts = [m.end(0) for m in title_matches] |
| 165 | body_ends = title_starts[1:] + [len(version_body)] |
| 166 | bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n' |
| 167 | for (body_start, body_end) in zip(body_starts, body_ends)] |
| 168 | title_lines = [version_body[:pos].count(b'\n') for pos in title_starts] |
| 169 | body_lines = [version_body[:pos].count(b'\n') for pos in body_starts] |
| 170 | return [CategoryContent(title_match.group(1), title_line, |
| 171 | body, body_line) |
| 172 | for title_match, title_line, body, body_line |
| 173 | in zip(title_matches, title_lines, bodies, body_lines)] |
| 174 | |
| 175 | @classmethod |
| 176 | def format_category(cls, title, body): |
| 177 | # `split_categories` ensures that each body ends with a newline. |
| 178 | # Make sure that there is additionally a blank line between categories. |
| 179 | if not body.endswith(b'\n\n'): |
| 180 | body += b'\n' |
| 181 | return title + b'\n' + body |
| 182 | |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 183 | class ChangeLog: |
Gilles Peskine | 42f384c | 2020-03-27 09:23:38 +0100 | [diff] [blame] | 184 | """An Mbed TLS changelog. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 185 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 186 | A changelog file consists of some header text followed by one or |
| 187 | more version sections. The version sections are in reverse |
| 188 | chronological order. Each version section consists of a title and a body. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 189 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 190 | The body of a version section consists of zero or more category |
| 191 | subsections. Each category subsection consists of a title and a body. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 192 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 193 | A changelog entry file has the same format as the body of a version section. |
| 194 | |
| 195 | A `ChangelogFormat` object defines the concrete syntax of the changelog. |
| 196 | Entry files must have the same format as the changelog file. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 197 | """ |
| 198 | |
Gilles Peskine | a260796 | 2020-01-28 19:58:17 +0100 | [diff] [blame] | 199 | # Only accept dotted version numbers (e.g. "3.1", not "3"). |
Gilles Peskine | afc9db8 | 2020-01-30 11:38:01 +0100 | [diff] [blame] | 200 | # Refuse ".x" in a version number where x is a letter: this indicates |
| 201 | # a version that is not yet released. Something like "3.1a" is accepted. |
| 202 | _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+') |
| 203 | _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]') |
Gilles Peskine | a260796 | 2020-01-28 19:58:17 +0100 | [diff] [blame] | 204 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 205 | def add_categories_from_text(self, filename, line_offset, |
| 206 | text, allow_unknown_category): |
| 207 | """Parse a version section or entry file.""" |
Gilles Peskine | 4d977a4 | 2020-03-27 19:42:50 +0100 | [diff] [blame] | 208 | try: |
| 209 | categories = self.format.split_categories(text) |
| 210 | except CategoryParseError as e: |
| 211 | raise InputFormatError(filename, line_offset + e.line_offset, |
| 212 | e.error_message) |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 213 | for category in categories: |
| 214 | if not allow_unknown_category and \ |
| 215 | category.name not in self.categories: |
| 216 | raise InputFormatError(filename, |
| 217 | line_offset + category.title_line, |
| 218 | 'Unknown category: "{}"', |
| 219 | category.name.decode('utf8')) |
Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 220 | |
| 221 | body_split = category.body.splitlines() |
Paul Elliott | d75773e | 2021-03-18 18:07:46 +0000 | [diff] [blame] | 222 | for line_number, line in enumerate(body_split, 1): |
Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 223 | if len(line) > MAX_LINE_LENGTH: |
| 224 | raise InputFormatError(filename, |
Paul Elliott | d75773e | 2021-03-18 18:07:46 +0000 | [diff] [blame] | 225 | category.body_line + line_number, |
Paul Elliott | b05a59a | 2021-03-09 10:24:55 +0000 | [diff] [blame] | 226 | 'Line is longer than allowed: Length {} (Max {})', |
| 227 | len(line), MAX_LINE_LENGTH) |
Paul Elliott | f08648d | 2021-03-05 12:22:51 +0000 | [diff] [blame] | 228 | |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 229 | self.categories[category.name] += category.body |
| 230 | |
| 231 | def __init__(self, input_stream, changelog_format): |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 232 | """Create a changelog object. |
| 233 | |
Gilles Peskine | 974232f | 2020-01-22 12:43:29 +0100 | [diff] [blame] | 234 | Populate the changelog object from the content of the file |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 235 | input_stream. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 236 | """ |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 237 | self.format = changelog_format |
| 238 | whole_file = input_stream.read() |
| 239 | (self.header, |
| 240 | self.top_version_title, top_version_body, |
| 241 | self.trailer) = self.format.extract_top_version(whole_file) |
| 242 | # Split the top version section into categories. |
| 243 | self.categories = OrderedDict() |
| 244 | for category in STANDARD_CATEGORIES: |
| 245 | self.categories[category] = b'' |
Gilles Peskine | e248e83 | 2020-03-27 19:42:38 +0100 | [diff] [blame] | 246 | offset = (self.header + self.top_version_title).count(b'\n') + 1 |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 247 | self.add_categories_from_text(input_stream.name, offset, |
| 248 | top_version_body, True) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 249 | |
| 250 | def add_file(self, input_stream): |
| 251 | """Add changelog entries from a file. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 252 | """ |
Gilles Peskine | e248e83 | 2020-03-27 19:42:38 +0100 | [diff] [blame] | 253 | self.add_categories_from_text(input_stream.name, 1, |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 254 | input_stream.read(), False) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 255 | |
| 256 | def write(self, filename): |
| 257 | """Write the changelog to the specified file. |
| 258 | """ |
| 259 | with open(filename, 'wb') as out: |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 260 | out.write(self.header) |
| 261 | out.write(self.top_version_title) |
| 262 | for title, body in self.categories.items(): |
| 263 | if not body: |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 264 | continue |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 265 | out.write(self.format.format_category(title, body)) |
| 266 | out.write(self.trailer) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 267 | |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 268 | |
| 269 | @functools.total_ordering |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 270 | class EntryFileSortKey: |
| 271 | """This classes defines an ordering on changelog entry files: older < newer. |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 272 | |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 273 | * Merged entry files are sorted according to their merge date (date of |
| 274 | the merge commit that brought the commit that created the file into |
| 275 | the target branch). |
| 276 | * Committed but unmerged entry files are sorted according to the date |
| 277 | of the commit that adds them. |
| 278 | * Uncommitted entry files are sorted according to their modification time. |
| 279 | |
| 280 | This class assumes that the file is in a git working directory with |
| 281 | the target branch checked out. |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 282 | """ |
| 283 | |
| 284 | # Categories of files. A lower number is considered older. |
| 285 | MERGED = 0 |
| 286 | COMMITTED = 1 |
| 287 | LOCAL = 2 |
| 288 | |
| 289 | @staticmethod |
| 290 | def creation_hash(filename): |
| 291 | """Return the git commit id at which the given file was created. |
| 292 | |
| 293 | Return None if the file was never checked into git. |
| 294 | """ |
Gilles Peskine | 98a53aa | 2020-03-26 22:47:07 +0100 | [diff] [blame] | 295 | hashes = subprocess.check_output(['git', 'log', '--format=%H', |
| 296 | '--follow', |
| 297 | '--', filename]) |
Gilles Peskine | 13dc634 | 2020-03-26 22:46:47 +0100 | [diff] [blame] | 298 | m = re.search(b'(.+)$', hashes) |
| 299 | if not m: |
| 300 | # The git output is empty. This means that the file was |
| 301 | # never checked in. |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 302 | return None |
Gilles Peskine | 13dc634 | 2020-03-26 22:46:47 +0100 | [diff] [blame] | 303 | # The last commit in the log is the oldest one, which is when the |
| 304 | # file was created. |
| 305 | return m.group(0) |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 306 | |
| 307 | @staticmethod |
| 308 | def list_merges(some_hash, target, *options): |
| 309 | """List merge commits from some_hash to target. |
| 310 | |
| 311 | Pass options to git to select which commits are included. |
| 312 | """ |
| 313 | text = subprocess.check_output(['git', 'rev-list', |
| 314 | '--merges', *options, |
| 315 | b'..'.join([some_hash, target])]) |
| 316 | return text.rstrip(b'\n').split(b'\n') |
| 317 | |
| 318 | @classmethod |
| 319 | def merge_hash(cls, some_hash): |
| 320 | """Return the git commit id at which the given commit was merged. |
| 321 | |
| 322 | Return None if the given commit was never merged. |
| 323 | """ |
| 324 | target = b'HEAD' |
| 325 | # List the merges from some_hash to the target in two ways. |
| 326 | # The ancestry list is the ones that are both descendants of |
| 327 | # some_hash and ancestors of the target. |
| 328 | ancestry = frozenset(cls.list_merges(some_hash, target, |
| 329 | '--ancestry-path')) |
| 330 | # The first_parents list only contains merges that are directly |
| 331 | # on the target branch. We want it in reverse order (oldest first). |
| 332 | first_parents = cls.list_merges(some_hash, target, |
| 333 | '--first-parent', '--reverse') |
| 334 | # Look for the oldest merge commit that's both on the direct path |
| 335 | # and directly on the target branch. That's the place where some_hash |
| 336 | # was merged on the target branch. See |
| 337 | # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit |
| 338 | for commit in first_parents: |
| 339 | if commit in ancestry: |
| 340 | return commit |
| 341 | return None |
| 342 | |
| 343 | @staticmethod |
| 344 | def commit_timestamp(commit_id): |
Gilles Peskine | ac0f086 | 2020-03-27 10:56:45 +0100 | [diff] [blame] | 345 | """Return the timestamp of the given commit.""" |
| 346 | text = subprocess.check_output(['git', 'show', '-s', |
| 347 | '--format=%ct', |
| 348 | commit_id]) |
| 349 | return datetime.datetime.utcfromtimestamp(int(text)) |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 350 | |
| 351 | @staticmethod |
| 352 | def file_timestamp(filename): |
| 353 | """Return the modification timestamp of the given file.""" |
| 354 | mtime = os.stat(filename).st_mtime |
| 355 | return datetime.datetime.fromtimestamp(mtime) |
| 356 | |
| 357 | def __init__(self, filename): |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 358 | """Determine position of the file in the changelog entry order. |
| 359 | |
| 360 | This constructor returns an object that can be used with comparison |
| 361 | operators, with `sort` and `sorted`, etc. Older entries are sorted |
| 362 | before newer entries. |
| 363 | """ |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 364 | self.filename = filename |
| 365 | creation_hash = self.creation_hash(filename) |
| 366 | if not creation_hash: |
| 367 | self.category = self.LOCAL |
| 368 | self.datetime = self.file_timestamp(filename) |
| 369 | return |
| 370 | merge_hash = self.merge_hash(creation_hash) |
| 371 | if not merge_hash: |
| 372 | self.category = self.COMMITTED |
| 373 | self.datetime = self.commit_timestamp(creation_hash) |
| 374 | return |
| 375 | self.category = self.MERGED |
| 376 | self.datetime = self.commit_timestamp(merge_hash) |
| 377 | |
| 378 | def sort_key(self): |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 379 | """"Return a concrete sort key for this entry file sort key object. |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 380 | |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 381 | ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 382 | """ |
| 383 | return (self.category, self.datetime, self.filename) |
| 384 | |
| 385 | def __eq__(self, other): |
| 386 | return self.sort_key() == other.sort_key() |
| 387 | |
| 388 | def __lt__(self, other): |
| 389 | return self.sort_key() < other.sort_key() |
| 390 | |
| 391 | |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 392 | def check_output(generated_output_file, main_input_file, merged_files): |
| 393 | """Make sanity checks on the generated output. |
| 394 | |
| 395 | The intent of these sanity checks is to have reasonable confidence |
| 396 | that no content has been lost. |
| 397 | |
| 398 | The sanity check is that every line that is present in an input file |
| 399 | is also present in an output file. This is not perfect but good enough |
| 400 | for now. |
| 401 | """ |
| 402 | generated_output = set(open(generated_output_file, 'rb')) |
| 403 | for line in open(main_input_file, 'rb'): |
| 404 | if line not in generated_output: |
| 405 | raise LostContent('original file', line) |
| 406 | for merged_file in merged_files: |
| 407 | for line in open(merged_file, 'rb'): |
| 408 | if line not in generated_output: |
| 409 | raise LostContent(merged_file, line) |
| 410 | |
| 411 | def finish_output(changelog, output_file, input_file, merged_files): |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 412 | """Write the changelog to the output file. |
| 413 | |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 414 | The input file and the list of merged files are used only for sanity |
| 415 | checks on the output. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 416 | """ |
| 417 | if os.path.exists(output_file) and not os.path.isfile(output_file): |
| 418 | # The output is a non-regular file (e.g. pipe). Write to it directly. |
| 419 | output_temp = output_file |
| 420 | else: |
| 421 | # The output is a regular file. Write to a temporary file, |
| 422 | # then move it into place atomically. |
| 423 | output_temp = output_file + '.tmp' |
| 424 | changelog.write(output_temp) |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 425 | check_output(output_temp, input_file, merged_files) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 426 | if output_temp != output_file: |
| 427 | os.rename(output_temp, output_file) |
| 428 | |
Gilles Peskine | 5e39c9e | 2020-01-22 14:55:37 +0100 | [diff] [blame] | 429 | def remove_merged_entries(files_to_remove): |
| 430 | for filename in files_to_remove: |
| 431 | os.remove(filename) |
| 432 | |
Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 433 | def list_files_to_merge(options): |
| 434 | """List the entry files to merge, oldest first. |
| 435 | |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 436 | "Oldest" is defined by `EntryFileSortKey`. |
Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 437 | """ |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 438 | files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) |
Gilles Peskine | 7fa3eb7 | 2020-03-26 22:41:32 +0100 | [diff] [blame] | 439 | files_to_merge.sort(key=EntryFileSortKey) |
Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 440 | return files_to_merge |
| 441 | |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 442 | def merge_entries(options): |
| 443 | """Merge changelog entries into the changelog file. |
| 444 | |
| 445 | Read the changelog file from options.input. |
| 446 | Read entries to merge from the directory options.dir. |
| 447 | Write the new changelog to options.output. |
| 448 | Remove the merged entries if options.keep_entries is false. |
| 449 | """ |
| 450 | with open(options.input, 'rb') as input_file: |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 451 | changelog = ChangeLog(input_file, TextChangelogFormat) |
Gilles Peskine | 27a1fac | 2020-03-25 16:34:18 +0100 | [diff] [blame] | 452 | files_to_merge = list_files_to_merge(options) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 453 | if not files_to_merge: |
| 454 | sys.stderr.write('There are no pending changelog entries.\n') |
| 455 | return |
| 456 | for filename in files_to_merge: |
| 457 | with open(filename, 'rb') as input_file: |
| 458 | changelog.add_file(input_file) |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 459 | finish_output(changelog, options.output, options.input, files_to_merge) |
Gilles Peskine | 5e39c9e | 2020-01-22 14:55:37 +0100 | [diff] [blame] | 460 | if not options.keep_entries: |
| 461 | remove_merged_entries(files_to_merge) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 462 | |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 463 | def show_file_timestamps(options): |
| 464 | """List the files to merge and their timestamp. |
| 465 | |
| 466 | This is only intended for debugging purposes. |
| 467 | """ |
| 468 | files = list_files_to_merge(options) |
| 469 | for filename in files: |
Gilles Peskine | 28af958 | 2020-03-26 22:39:18 +0100 | [diff] [blame] | 470 | ts = EntryFileSortKey(filename) |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 471 | print(ts.category, ts.datetime, filename) |
| 472 | |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 473 | def set_defaults(options): |
| 474 | """Add default values for missing options.""" |
| 475 | output_file = getattr(options, 'output', None) |
| 476 | if output_file is None: |
| 477 | options.output = options.input |
| 478 | if getattr(options, 'keep_entries', None) is None: |
| 479 | options.keep_entries = (output_file is not None) |
| 480 | |
| 481 | def main(): |
| 482 | """Command line entry point.""" |
| 483 | parser = argparse.ArgumentParser(description=__doc__) |
| 484 | parser.add_argument('--dir', '-d', metavar='DIR', |
| 485 | default='ChangeLog.d', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 486 | help='Directory to read entries from' |
| 487 | ' (default: ChangeLog.d)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 488 | parser.add_argument('--input', '-i', metavar='FILE', |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 489 | default='ChangeLog', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 490 | help='Existing changelog file to read from and augment' |
Gilles Peskine | 6e97c43 | 2020-03-27 19:05:18 +0100 | [diff] [blame] | 491 | ' (default: ChangeLog)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 492 | parser.add_argument('--keep-entries', |
| 493 | action='store_true', dest='keep_entries', default=None, |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 494 | help='Keep the files containing entries' |
| 495 | ' (default: remove them if --output/-o is not specified)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 496 | parser.add_argument('--no-keep-entries', |
| 497 | action='store_false', dest='keep_entries', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 498 | help='Remove the files containing entries after they are merged' |
| 499 | ' (default: remove them if --output/-o is not specified)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 500 | parser.add_argument('--output', '-o', metavar='FILE', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 501 | help='Output changelog file' |
| 502 | ' (default: overwrite the input)') |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 503 | parser.add_argument('--list-files-only', |
| 504 | action='store_true', |
Gilles Peskine | c68c7c8 | 2020-03-27 19:01:35 +0100 | [diff] [blame] | 505 | help=('Only list the files that would be processed ' |
Gilles Peskine | ac0f086 | 2020-03-27 10:56:45 +0100 | [diff] [blame] | 506 | '(with some debugging information)')) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 507 | options = parser.parse_args() |
| 508 | set_defaults(options) |
Gilles Peskine | 8f46bbf | 2020-03-25 16:34:43 +0100 | [diff] [blame] | 509 | if options.list_files_only: |
| 510 | show_file_timestamps(options) |
| 511 | return |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 512 | merge_entries(options) |
| 513 | |
| 514 | if __name__ == '__main__': |
| 515 | main() |