blob: 6eeea61243fc262afdc62b59869cdccfdf64184f [file] [log] [blame]
Gilles Peskine40b3f412019-10-13 21:44:25 +02001#!/usr/bin/env python3
2
3"""Assemble Mbed Crypto change log entries into the change log file.
Gilles Peskinea2607962020-01-28 19:58:17 +01004
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
Gilles Peskine28af9582020-03-26 22:39:18 +01008
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
Gilles Peskine40b3f412019-10-13 21:44:25 +020019"""
20
21# Copyright (C) 2019, Arm Limited, All Rights Reserved
22# SPDX-License-Identifier: Apache-2.0
23#
24# Licensed under the Apache License, Version 2.0 (the "License"); you may
25# not use this file except in compliance with the License.
26# You may obtain a copy of the License at
27#
28# http://www.apache.org/licenses/LICENSE-2.0
29#
30# Unless required by applicable law or agreed to in writing, software
31# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
32# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33# See the License for the specific language governing permissions and
34# limitations under the License.
35#
36# This file is part of Mbed Crypto (https://tls.mbed.org)
37
38import argparse
Gilles Peskined8b6c772020-01-28 18:57:47 +010039from collections import OrderedDict
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010040import datetime
41import functools
Gilles Peskine40b3f412019-10-13 21:44:25 +020042import glob
43import os
44import re
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010045import subprocess
Gilles Peskine40b3f412019-10-13 21:44:25 +020046import sys
47
48class InputFormatError(Exception):
49 def __init__(self, filename, line_number, message, *args, **kwargs):
Gilles Peskine566407d2020-01-22 15:55:36 +010050 message = '{}:{}: {}'.format(filename, line_number,
51 message.format(*args, **kwargs))
52 super().__init__(message)
Gilles Peskine40b3f412019-10-13 21:44:25 +020053
Gilles Peskine2b242492020-01-22 15:41:50 +010054class LostContent(Exception):
55 def __init__(self, filename, line):
56 message = ('Lost content from {}: "{}"'.format(filename, line))
57 super().__init__(message)
58
Gilles Peskine40b3f412019-10-13 21:44:25 +020059STANDARD_SECTIONS = (
60 b'Interface changes',
61 b'Default behavior changes',
62 b'Requirement changes',
63 b'New deprecations',
64 b'Removals',
65 b'New features',
66 b'Security',
67 b'Bug fixes',
68 b'Performance improvements',
69 b'Other changes',
70)
71
72class ChangeLog:
73 """An Mbed Crypto changelog.
74
75 A changelog is a file in Markdown format. Each level 2 section title
76 starts a version, and versions are sorted in reverse chronological
77 order. Lines with a level 2 section title must start with '##'.
78
79 Within a version, there are multiple sections, each devoted to a kind
80 of change: bug fix, feature request, etc. Section titles should match
81 entries in STANDARD_SECTIONS exactly.
82
83 Within each section, each separate change should be on a line starting
84 with a '*' bullet. There may be blank lines surrounding titles, but
85 there should not be any blank line inside a section.
86 """
87
88 _title_re = re.compile(br'#*')
89 def title_level(self, line):
90 """Determine whether the line is a title.
91
92 Return (level, content) where level is the Markdown section level
93 (1 for '#', 2 for '##', etc.) and content is the section title
94 without leading or trailing whitespace. For a non-title line,
95 the level is 0.
96 """
97 level = re.match(self._title_re, line).end()
98 return level, line[level:].strip()
99
Gilles Peskinea2607962020-01-28 19:58:17 +0100100 # Only accept dotted version numbers (e.g. "3.1", not "3").
Gilles Peskineafc9db82020-01-30 11:38:01 +0100101 # Refuse ".x" in a version number where x is a letter: this indicates
102 # a version that is not yet released. Something like "3.1a" is accepted.
103 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
104 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Gilles Peskinea2607962020-01-28 19:58:17 +0100105
106 def section_is_released_version(self, title):
107 """Whether this section is for a released version.
108
109 True if the given level-2 section title indicates that this section
110 contains released changes, otherwise False.
111 """
112 # Assume that a released version has a numerical version number
113 # that follows a particular pattern. These criteria may be revised
114 # as needed in future versions of this script.
115 version_number = re.search(self._version_number_re, title)
Gilles Peskineafc9db82020-01-30 11:38:01 +0100116 if version_number:
117 return not re.search(self._incomplete_version_number_re,
118 version_number.group(0))
119 else:
120 return False
Gilles Peskinea2607962020-01-28 19:58:17 +0100121
122 def unreleased_version_title(self):
123 """The title to use if creating a new section for an unreleased version."""
124 # pylint: disable=no-self-use; this method may be overridden
125 return b'Unreleased changes'
126
Gilles Peskine40b3f412019-10-13 21:44:25 +0200127 def __init__(self, input_stream):
128 """Create a changelog object.
129
Gilles Peskine974232f2020-01-22 12:43:29 +0100130 Populate the changelog object from the content of the file
131 input_stream. This is typically a file opened for reading, but
132 can be any generator returning the lines to read.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200133 """
Gilles Peskine37d670a2020-01-28 19:14:15 +0100134 # Content before the level-2 section where the new entries are to be
135 # added.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200136 self.header = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100137 # Content of the level-3 sections of where the new entries are to
138 # be added.
Gilles Peskined8b6c772020-01-28 18:57:47 +0100139 self.section_content = OrderedDict()
140 for section in STANDARD_SECTIONS:
141 self.section_content[section] = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100142 # Content of level-2 sections for already-released versions.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200143 self.trailer = []
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100144 self.read_main_file(input_stream)
145
146 def read_main_file(self, input_stream):
147 """Populate the changelog object from the content of the file.
148
149 This method is only intended to be called as part of the constructor
150 of the class and may not act sensibly on an object that is already
151 partially populated.
152 """
Gilles Peskinea2607962020-01-28 19:58:17 +0100153 # Parse the first level-2 section, containing changelog entries
154 # for unreleased changes.
155 # If we'll be expanding this section, everything before the first
Gilles Peskine37d670a2020-01-28 19:14:15 +0100156 # level-3 section title ("###...") following the first level-2
157 # section title ("##...") is passed through as the header
158 # and everything after the second level-2 section title is passed
159 # through as the trailer. Inside the first level-2 section,
160 # split out the level-3 sections.
Gilles Peskinea2607962020-01-28 19:58:17 +0100161 # If we'll be creating a new version, the header is everything
162 # before the point where we want to add the level-2 section
163 # for this version, and the trailer is what follows.
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100164 level_2_seen = 0
165 current_section = None
Gilles Peskine40b3f412019-10-13 21:44:25 +0200166 for line in input_stream:
167 level, content = self.title_level(line)
168 if level == 2:
169 level_2_seen += 1
Gilles Peskinea2607962020-01-28 19:58:17 +0100170 if level_2_seen == 1:
171 if self.section_is_released_version(content):
172 self.header.append(b'## ' +
173 self.unreleased_version_title() +
174 b'\n\n')
175 level_2_seen = 2
Gilles Peskine40b3f412019-10-13 21:44:25 +0200176 elif level == 3 and level_2_seen == 1:
177 current_section = content
Gilles Peskined8b6c772020-01-28 18:57:47 +0100178 self.section_content.setdefault(content, [])
Gilles Peskine37d670a2020-01-28 19:14:15 +0100179 if level_2_seen == 1 and current_section is not None:
180 if level != 3 and line.strip():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200181 self.section_content[current_section].append(line)
182 elif level_2_seen <= 1:
183 self.header.append(line)
184 else:
185 self.trailer.append(line)
186
187 def add_file(self, input_stream):
188 """Add changelog entries from a file.
189
190 Read lines from input_stream, which is typically a file opened
191 for reading. These lines must contain a series of level 3
192 Markdown sections with recognized titles. The corresponding
193 content is injected into the respective sections in the changelog.
194 The section titles must be either one of the hard-coded values
Gilles Peskine974232f2020-01-22 12:43:29 +0100195 in STANDARD_SECTIONS in assemble_changelog.py or already present
196 in ChangeLog.md. Section titles must match byte-for-byte except that
197 leading or trailing whitespace is ignored.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200198 """
199 filename = input_stream.name
200 current_section = None
201 for line_number, line in enumerate(input_stream, 1):
202 if not line.strip():
203 continue
204 level, content = self.title_level(line)
205 if level == 3:
206 current_section = content
207 if current_section not in self.section_content:
208 raise InputFormatError(filename, line_number,
209 'Section {} is not recognized',
210 str(current_section)[1:])
211 elif level == 0:
212 if current_section is None:
213 raise InputFormatError(filename, line_number,
214 'Missing section title at the beginning of the file')
215 self.section_content[current_section].append(line)
216 else:
217 raise InputFormatError(filename, line_number,
218 'Only level 3 headers (###) are permitted')
219
220 def write(self, filename):
221 """Write the changelog to the specified file.
222 """
223 with open(filename, 'wb') as out:
224 for line in self.header:
225 out.write(line)
Gilles Peskined8b6c772020-01-28 18:57:47 +0100226 for section, lines in self.section_content.items():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200227 if not lines:
228 continue
229 out.write(b'### ' + section + b'\n\n')
230 for line in lines:
231 out.write(line)
232 out.write(b'\n')
233 for line in self.trailer:
234 out.write(line)
235
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100236
237@functools.total_ordering
Gilles Peskine28af9582020-03-26 22:39:18 +0100238class EntryFileSortKey:
239 """This classes defines an ordering on changelog entry files: older < newer.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100240
Gilles Peskine28af9582020-03-26 22:39:18 +0100241 * Merged entry files are sorted according to their merge date (date of
242 the merge commit that brought the commit that created the file into
243 the target branch).
244 * Committed but unmerged entry files are sorted according to the date
245 of the commit that adds them.
246 * Uncommitted entry files are sorted according to their modification time.
247
248 This class assumes that the file is in a git working directory with
249 the target branch checked out.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100250 """
251
252 # Categories of files. A lower number is considered older.
253 MERGED = 0
254 COMMITTED = 1
255 LOCAL = 2
256
257 @staticmethod
258 def creation_hash(filename):
259 """Return the git commit id at which the given file was created.
260
261 Return None if the file was never checked into git.
262 """
263 hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename])
Gilles Peskine13dc6342020-03-26 22:46:47 +0100264 m = re.search(b'(.+)$', hashes)
265 if not m:
266 # The git output is empty. This means that the file was
267 # never checked in.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100268 return None
Gilles Peskine13dc6342020-03-26 22:46:47 +0100269 # The last commit in the log is the oldest one, which is when the
270 # file was created.
271 return m.group(0)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100272
273 @staticmethod
274 def list_merges(some_hash, target, *options):
275 """List merge commits from some_hash to target.
276
277 Pass options to git to select which commits are included.
278 """
279 text = subprocess.check_output(['git', 'rev-list',
280 '--merges', *options,
281 b'..'.join([some_hash, target])])
282 return text.rstrip(b'\n').split(b'\n')
283
284 @classmethod
285 def merge_hash(cls, some_hash):
286 """Return the git commit id at which the given commit was merged.
287
288 Return None if the given commit was never merged.
289 """
290 target = b'HEAD'
291 # List the merges from some_hash to the target in two ways.
292 # The ancestry list is the ones that are both descendants of
293 # some_hash and ancestors of the target.
294 ancestry = frozenset(cls.list_merges(some_hash, target,
295 '--ancestry-path'))
296 # The first_parents list only contains merges that are directly
297 # on the target branch. We want it in reverse order (oldest first).
298 first_parents = cls.list_merges(some_hash, target,
299 '--first-parent', '--reverse')
300 # Look for the oldest merge commit that's both on the direct path
301 # and directly on the target branch. That's the place where some_hash
302 # was merged on the target branch. See
303 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
304 for commit in first_parents:
305 if commit in ancestry:
306 return commit
307 return None
308
309 @staticmethod
310 def commit_timestamp(commit_id):
311 """Return the timestamp of the given commit."""
312 text = subprocess.check_output(['git', 'show', '-s',
313 '--format=%ct',
314 commit_id])
315 return datetime.datetime.utcfromtimestamp(int(text))
316
317 @staticmethod
318 def file_timestamp(filename):
319 """Return the modification timestamp of the given file."""
320 mtime = os.stat(filename).st_mtime
321 return datetime.datetime.fromtimestamp(mtime)
322
323 def __init__(self, filename):
Gilles Peskine28af9582020-03-26 22:39:18 +0100324 """Determine position of the file in the changelog entry order.
325
326 This constructor returns an object that can be used with comparison
327 operators, with `sort` and `sorted`, etc. Older entries are sorted
328 before newer entries.
329 """
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100330 self.filename = filename
331 creation_hash = self.creation_hash(filename)
332 if not creation_hash:
333 self.category = self.LOCAL
334 self.datetime = self.file_timestamp(filename)
335 return
336 merge_hash = self.merge_hash(creation_hash)
337 if not merge_hash:
338 self.category = self.COMMITTED
339 self.datetime = self.commit_timestamp(creation_hash)
340 return
341 self.category = self.MERGED
342 self.datetime = self.commit_timestamp(merge_hash)
343
344 def sort_key(self):
Gilles Peskine28af9582020-03-26 22:39:18 +0100345 """"Return a concrete sort key for this entry file sort key object.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100346
Gilles Peskine28af9582020-03-26 22:39:18 +0100347 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100348 """
349 return (self.category, self.datetime, self.filename)
350
351 def __eq__(self, other):
352 return self.sort_key() == other.sort_key()
353
354 def __lt__(self, other):
355 return self.sort_key() < other.sort_key()
356
357
Gilles Peskine2b242492020-01-22 15:41:50 +0100358def check_output(generated_output_file, main_input_file, merged_files):
359 """Make sanity checks on the generated output.
360
361 The intent of these sanity checks is to have reasonable confidence
362 that no content has been lost.
363
364 The sanity check is that every line that is present in an input file
365 is also present in an output file. This is not perfect but good enough
366 for now.
367 """
368 generated_output = set(open(generated_output_file, 'rb'))
369 for line in open(main_input_file, 'rb'):
370 if line not in generated_output:
371 raise LostContent('original file', line)
372 for merged_file in merged_files:
373 for line in open(merged_file, 'rb'):
374 if line not in generated_output:
375 raise LostContent(merged_file, line)
376
377def finish_output(changelog, output_file, input_file, merged_files):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200378 """Write the changelog to the output file.
379
Gilles Peskine2b242492020-01-22 15:41:50 +0100380 The input file and the list of merged files are used only for sanity
381 checks on the output.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200382 """
383 if os.path.exists(output_file) and not os.path.isfile(output_file):
384 # The output is a non-regular file (e.g. pipe). Write to it directly.
385 output_temp = output_file
386 else:
387 # The output is a regular file. Write to a temporary file,
388 # then move it into place atomically.
389 output_temp = output_file + '.tmp'
390 changelog.write(output_temp)
Gilles Peskine2b242492020-01-22 15:41:50 +0100391 check_output(output_temp, input_file, merged_files)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200392 if output_temp != output_file:
393 os.rename(output_temp, output_file)
394
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100395def remove_merged_entries(files_to_remove):
396 for filename in files_to_remove:
397 os.remove(filename)
398
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100399def list_files_to_merge(options):
400 """List the entry files to merge, oldest first.
401
Gilles Peskine28af9582020-03-26 22:39:18 +0100402 "Oldest" is defined by `EntryFileSortKey`.
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100403 """
404 files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
Gilles Peskine7fa3eb72020-03-26 22:41:32 +0100405 files_to_merge.sort(key=EntryFileSortKey)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100406 return files_to_merge
407
Gilles Peskine40b3f412019-10-13 21:44:25 +0200408def merge_entries(options):
409 """Merge changelog entries into the changelog file.
410
411 Read the changelog file from options.input.
412 Read entries to merge from the directory options.dir.
413 Write the new changelog to options.output.
414 Remove the merged entries if options.keep_entries is false.
415 """
416 with open(options.input, 'rb') as input_file:
417 changelog = ChangeLog(input_file)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100418 files_to_merge = list_files_to_merge(options)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200419 if not files_to_merge:
420 sys.stderr.write('There are no pending changelog entries.\n')
421 return
422 for filename in files_to_merge:
423 with open(filename, 'rb') as input_file:
424 changelog.add_file(input_file)
Gilles Peskine2b242492020-01-22 15:41:50 +0100425 finish_output(changelog, options.output, options.input, files_to_merge)
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100426 if not options.keep_entries:
427 remove_merged_entries(files_to_merge)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200428
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100429def show_file_timestamps(options):
430 """List the files to merge and their timestamp.
431
432 This is only intended for debugging purposes.
433 """
434 files = list_files_to_merge(options)
435 for filename in files:
Gilles Peskine28af9582020-03-26 22:39:18 +0100436 ts = EntryFileSortKey(filename)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100437 print(ts.category, ts.datetime, filename)
438
Gilles Peskine40b3f412019-10-13 21:44:25 +0200439def set_defaults(options):
440 """Add default values for missing options."""
441 output_file = getattr(options, 'output', None)
442 if output_file is None:
443 options.output = options.input
444 if getattr(options, 'keep_entries', None) is None:
445 options.keep_entries = (output_file is not None)
446
447def main():
448 """Command line entry point."""
449 parser = argparse.ArgumentParser(description=__doc__)
450 parser.add_argument('--dir', '-d', metavar='DIR',
451 default='ChangeLog.d',
Gilles Peskine6e910092020-01-22 15:58:18 +0100452 help='Directory to read entries from'
453 ' (default: ChangeLog.d)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200454 parser.add_argument('--input', '-i', metavar='FILE',
455 default='ChangeLog.md',
Gilles Peskine6e910092020-01-22 15:58:18 +0100456 help='Existing changelog file to read from and augment'
457 ' (default: ChangeLog.md)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200458 parser.add_argument('--keep-entries',
459 action='store_true', dest='keep_entries', default=None,
Gilles Peskine6e910092020-01-22 15:58:18 +0100460 help='Keep the files containing entries'
461 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200462 parser.add_argument('--no-keep-entries',
463 action='store_false', dest='keep_entries',
Gilles Peskine6e910092020-01-22 15:58:18 +0100464 help='Remove the files containing entries after they are merged'
465 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200466 parser.add_argument('--output', '-o', metavar='FILE',
Gilles Peskine6e910092020-01-22 15:58:18 +0100467 help='Output changelog file'
468 ' (default: overwrite the input)')
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100469 parser.add_argument('--list-files-only',
470 action='store_true',
471 help='Only list the files that would be processed (with some debugging information)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200472 options = parser.parse_args()
473 set_defaults(options)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100474 if options.list_files_only:
475 show_file_timestamps(options)
476 return
Gilles Peskine40b3f412019-10-13 21:44:25 +0200477 merge_entries(options)
478
479if __name__ == '__main__':
480 main()