blob: c868a6c7e322979ce8276fca02823dd6f2781a55 [file] [log] [blame]
Gilles Peskine40b3f412019-10-13 21:44:25 +02001#!/usr/bin/env python3
2
Gilles Peskine42f384c2020-03-27 09:23:38 +01003"""Assemble Mbed TLS change log entries into the change log file.
Gilles Peskinea2607962020-01-28 19:58:17 +01004
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
Gilles Peskine28af9582020-03-26 22:39:18 +01008
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
Gilles Peskine40b3f412019-10-13 21:44:25 +020019"""
20
21# Copyright (C) 2019, Arm Limited, All Rights Reserved
22# SPDX-License-Identifier: Apache-2.0
23#
24# Licensed under the Apache License, Version 2.0 (the "License"); you may
25# not use this file except in compliance with the License.
26# You may obtain a copy of the License at
27#
28# http://www.apache.org/licenses/LICENSE-2.0
29#
30# Unless required by applicable law or agreed to in writing, software
31# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
32# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33# See the License for the specific language governing permissions and
34# limitations under the License.
35#
Gilles Peskine42f384c2020-03-27 09:23:38 +010036# This file is part of Mbed TLS (https://tls.mbed.org)
Gilles Peskine40b3f412019-10-13 21:44:25 +020037
38import argparse
Gilles Peskined8b6c772020-01-28 18:57:47 +010039from collections import OrderedDict
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010040import datetime
41import functools
Gilles Peskine40b3f412019-10-13 21:44:25 +020042import glob
43import os
44import re
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010045import subprocess
Gilles Peskine40b3f412019-10-13 21:44:25 +020046import sys
47
48class InputFormatError(Exception):
49 def __init__(self, filename, line_number, message, *args, **kwargs):
Gilles Peskine566407d2020-01-22 15:55:36 +010050 message = '{}:{}: {}'.format(filename, line_number,
51 message.format(*args, **kwargs))
52 super().__init__(message)
Gilles Peskine40b3f412019-10-13 21:44:25 +020053
Gilles Peskine2b242492020-01-22 15:41:50 +010054class LostContent(Exception):
55 def __init__(self, filename, line):
56 message = ('Lost content from {}: "{}"'.format(filename, line))
57 super().__init__(message)
58
Gilles Peskine40b3f412019-10-13 21:44:25 +020059STANDARD_SECTIONS = (
60 b'Interface changes',
61 b'Default behavior changes',
62 b'Requirement changes',
63 b'New deprecations',
64 b'Removals',
65 b'New features',
66 b'Security',
67 b'Bug fixes',
68 b'Performance improvements',
69 b'Other changes',
70)
71
72class ChangeLog:
Gilles Peskine42f384c2020-03-27 09:23:38 +010073 """An Mbed TLS changelog.
Gilles Peskine40b3f412019-10-13 21:44:25 +020074
75 A changelog is a file in Markdown format. Each level 2 section title
76 starts a version, and versions are sorted in reverse chronological
77 order. Lines with a level 2 section title must start with '##'.
78
79 Within a version, there are multiple sections, each devoted to a kind
80 of change: bug fix, feature request, etc. Section titles should match
81 entries in STANDARD_SECTIONS exactly.
82
83 Within each section, each separate change should be on a line starting
84 with a '*' bullet. There may be blank lines surrounding titles, but
85 there should not be any blank line inside a section.
86 """
87
88 _title_re = re.compile(br'#*')
89 def title_level(self, line):
90 """Determine whether the line is a title.
91
92 Return (level, content) where level is the Markdown section level
93 (1 for '#', 2 for '##', etc.) and content is the section title
94 without leading or trailing whitespace. For a non-title line,
95 the level is 0.
96 """
97 level = re.match(self._title_re, line).end()
98 return level, line[level:].strip()
99
Gilles Peskinea2607962020-01-28 19:58:17 +0100100 # Only accept dotted version numbers (e.g. "3.1", not "3").
Gilles Peskineafc9db82020-01-30 11:38:01 +0100101 # Refuse ".x" in a version number where x is a letter: this indicates
102 # a version that is not yet released. Something like "3.1a" is accepted.
103 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
104 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Gilles Peskinea2607962020-01-28 19:58:17 +0100105
106 def section_is_released_version(self, title):
107 """Whether this section is for a released version.
108
109 True if the given level-2 section title indicates that this section
110 contains released changes, otherwise False.
111 """
112 # Assume that a released version has a numerical version number
113 # that follows a particular pattern. These criteria may be revised
114 # as needed in future versions of this script.
115 version_number = re.search(self._version_number_re, title)
Gilles Peskineafc9db82020-01-30 11:38:01 +0100116 if version_number:
117 return not re.search(self._incomplete_version_number_re,
118 version_number.group(0))
119 else:
120 return False
Gilles Peskinea2607962020-01-28 19:58:17 +0100121
122 def unreleased_version_title(self):
123 """The title to use if creating a new section for an unreleased version."""
124 # pylint: disable=no-self-use; this method may be overridden
125 return b'Unreleased changes'
126
Gilles Peskine40b3f412019-10-13 21:44:25 +0200127 def __init__(self, input_stream):
128 """Create a changelog object.
129
Gilles Peskine974232f2020-01-22 12:43:29 +0100130 Populate the changelog object from the content of the file
131 input_stream. This is typically a file opened for reading, but
132 can be any generator returning the lines to read.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200133 """
Gilles Peskine37d670a2020-01-28 19:14:15 +0100134 # Content before the level-2 section where the new entries are to be
135 # added.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200136 self.header = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100137 # Content of the level-3 sections of where the new entries are to
138 # be added.
Gilles Peskined8b6c772020-01-28 18:57:47 +0100139 self.section_content = OrderedDict()
140 for section in STANDARD_SECTIONS:
141 self.section_content[section] = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100142 # Content of level-2 sections for already-released versions.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200143 self.trailer = []
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100144 self.read_main_file(input_stream)
145
146 def read_main_file(self, input_stream):
147 """Populate the changelog object from the content of the file.
148
149 This method is only intended to be called as part of the constructor
150 of the class and may not act sensibly on an object that is already
151 partially populated.
152 """
Gilles Peskinea2607962020-01-28 19:58:17 +0100153 # Parse the first level-2 section, containing changelog entries
154 # for unreleased changes.
155 # If we'll be expanding this section, everything before the first
Gilles Peskine37d670a2020-01-28 19:14:15 +0100156 # level-3 section title ("###...") following the first level-2
157 # section title ("##...") is passed through as the header
158 # and everything after the second level-2 section title is passed
159 # through as the trailer. Inside the first level-2 section,
160 # split out the level-3 sections.
Gilles Peskinea2607962020-01-28 19:58:17 +0100161 # If we'll be creating a new version, the header is everything
162 # before the point where we want to add the level-2 section
163 # for this version, and the trailer is what follows.
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100164 level_2_seen = 0
165 current_section = None
Gilles Peskine40b3f412019-10-13 21:44:25 +0200166 for line in input_stream:
167 level, content = self.title_level(line)
168 if level == 2:
169 level_2_seen += 1
Gilles Peskinea2607962020-01-28 19:58:17 +0100170 if level_2_seen == 1:
171 if self.section_is_released_version(content):
172 self.header.append(b'## ' +
173 self.unreleased_version_title() +
174 b'\n\n')
175 level_2_seen = 2
Gilles Peskine40b3f412019-10-13 21:44:25 +0200176 elif level == 3 and level_2_seen == 1:
177 current_section = content
Gilles Peskined8b6c772020-01-28 18:57:47 +0100178 self.section_content.setdefault(content, [])
Gilles Peskine37d670a2020-01-28 19:14:15 +0100179 if level_2_seen == 1 and current_section is not None:
180 if level != 3 and line.strip():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200181 self.section_content[current_section].append(line)
182 elif level_2_seen <= 1:
183 self.header.append(line)
184 else:
185 self.trailer.append(line)
186
187 def add_file(self, input_stream):
188 """Add changelog entries from a file.
189
190 Read lines from input_stream, which is typically a file opened
191 for reading. These lines must contain a series of level 3
192 Markdown sections with recognized titles. The corresponding
193 content is injected into the respective sections in the changelog.
194 The section titles must be either one of the hard-coded values
Gilles Peskine974232f2020-01-22 12:43:29 +0100195 in STANDARD_SECTIONS in assemble_changelog.py or already present
196 in ChangeLog.md. Section titles must match byte-for-byte except that
197 leading or trailing whitespace is ignored.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200198 """
199 filename = input_stream.name
200 current_section = None
201 for line_number, line in enumerate(input_stream, 1):
202 if not line.strip():
203 continue
204 level, content = self.title_level(line)
205 if level == 3:
206 current_section = content
207 if current_section not in self.section_content:
208 raise InputFormatError(filename, line_number,
209 'Section {} is not recognized',
210 str(current_section)[1:])
211 elif level == 0:
212 if current_section is None:
213 raise InputFormatError(filename, line_number,
214 'Missing section title at the beginning of the file')
215 self.section_content[current_section].append(line)
216 else:
217 raise InputFormatError(filename, line_number,
218 'Only level 3 headers (###) are permitted')
219
220 def write(self, filename):
221 """Write the changelog to the specified file.
222 """
223 with open(filename, 'wb') as out:
224 for line in self.header:
225 out.write(line)
Gilles Peskined8b6c772020-01-28 18:57:47 +0100226 for section, lines in self.section_content.items():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200227 if not lines:
228 continue
229 out.write(b'### ' + section + b'\n\n')
230 for line in lines:
231 out.write(line)
232 out.write(b'\n')
233 for line in self.trailer:
234 out.write(line)
235
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100236
237@functools.total_ordering
Gilles Peskine28af9582020-03-26 22:39:18 +0100238class EntryFileSortKey:
239 """This classes defines an ordering on changelog entry files: older < newer.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100240
Gilles Peskine28af9582020-03-26 22:39:18 +0100241 * Merged entry files are sorted according to their merge date (date of
242 the merge commit that brought the commit that created the file into
243 the target branch).
244 * Committed but unmerged entry files are sorted according to the date
245 of the commit that adds them.
246 * Uncommitted entry files are sorted according to their modification time.
247
248 This class assumes that the file is in a git working directory with
249 the target branch checked out.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100250 """
251
252 # Categories of files. A lower number is considered older.
253 MERGED = 0
254 COMMITTED = 1
255 LOCAL = 2
256
257 @staticmethod
258 def creation_hash(filename):
259 """Return the git commit id at which the given file was created.
260
261 Return None if the file was never checked into git.
262 """
Gilles Peskine98a53aa2020-03-26 22:47:07 +0100263 hashes = subprocess.check_output(['git', 'log', '--format=%H',
264 '--follow',
265 '--', filename])
Gilles Peskine13dc6342020-03-26 22:46:47 +0100266 m = re.search(b'(.+)$', hashes)
267 if not m:
268 # The git output is empty. This means that the file was
269 # never checked in.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100270 return None
Gilles Peskine13dc6342020-03-26 22:46:47 +0100271 # The last commit in the log is the oldest one, which is when the
272 # file was created.
273 return m.group(0)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100274
275 @staticmethod
276 def list_merges(some_hash, target, *options):
277 """List merge commits from some_hash to target.
278
279 Pass options to git to select which commits are included.
280 """
281 text = subprocess.check_output(['git', 'rev-list',
282 '--merges', *options,
283 b'..'.join([some_hash, target])])
284 return text.rstrip(b'\n').split(b'\n')
285
286 @classmethod
287 def merge_hash(cls, some_hash):
288 """Return the git commit id at which the given commit was merged.
289
290 Return None if the given commit was never merged.
291 """
292 target = b'HEAD'
293 # List the merges from some_hash to the target in two ways.
294 # The ancestry list is the ones that are both descendants of
295 # some_hash and ancestors of the target.
296 ancestry = frozenset(cls.list_merges(some_hash, target,
297 '--ancestry-path'))
298 # The first_parents list only contains merges that are directly
299 # on the target branch. We want it in reverse order (oldest first).
300 first_parents = cls.list_merges(some_hash, target,
301 '--first-parent', '--reverse')
302 # Look for the oldest merge commit that's both on the direct path
303 # and directly on the target branch. That's the place where some_hash
304 # was merged on the target branch. See
305 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
306 for commit in first_parents:
307 if commit in ancestry:
308 return commit
309 return None
310
311 @staticmethod
312 def commit_timestamp(commit_id):
Gilles Peskineac0f0862020-03-27 10:56:45 +0100313 """Return the timestamp of the given commit."""
314 text = subprocess.check_output(['git', 'show', '-s',
315 '--format=%ct',
316 commit_id])
317 return datetime.datetime.utcfromtimestamp(int(text))
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100318
319 @staticmethod
320 def file_timestamp(filename):
321 """Return the modification timestamp of the given file."""
322 mtime = os.stat(filename).st_mtime
323 return datetime.datetime.fromtimestamp(mtime)
324
325 def __init__(self, filename):
Gilles Peskine28af9582020-03-26 22:39:18 +0100326 """Determine position of the file in the changelog entry order.
327
328 This constructor returns an object that can be used with comparison
329 operators, with `sort` and `sorted`, etc. Older entries are sorted
330 before newer entries.
331 """
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100332 self.filename = filename
333 creation_hash = self.creation_hash(filename)
334 if not creation_hash:
335 self.category = self.LOCAL
336 self.datetime = self.file_timestamp(filename)
337 return
338 merge_hash = self.merge_hash(creation_hash)
339 if not merge_hash:
340 self.category = self.COMMITTED
341 self.datetime = self.commit_timestamp(creation_hash)
342 return
343 self.category = self.MERGED
344 self.datetime = self.commit_timestamp(merge_hash)
345
346 def sort_key(self):
Gilles Peskine28af9582020-03-26 22:39:18 +0100347 """"Return a concrete sort key for this entry file sort key object.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100348
Gilles Peskine28af9582020-03-26 22:39:18 +0100349 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100350 """
351 return (self.category, self.datetime, self.filename)
352
353 def __eq__(self, other):
354 return self.sort_key() == other.sort_key()
355
356 def __lt__(self, other):
357 return self.sort_key() < other.sort_key()
358
359
Gilles Peskine2b242492020-01-22 15:41:50 +0100360def check_output(generated_output_file, main_input_file, merged_files):
361 """Make sanity checks on the generated output.
362
363 The intent of these sanity checks is to have reasonable confidence
364 that no content has been lost.
365
366 The sanity check is that every line that is present in an input file
367 is also present in an output file. This is not perfect but good enough
368 for now.
369 """
370 generated_output = set(open(generated_output_file, 'rb'))
371 for line in open(main_input_file, 'rb'):
372 if line not in generated_output:
373 raise LostContent('original file', line)
374 for merged_file in merged_files:
375 for line in open(merged_file, 'rb'):
376 if line not in generated_output:
377 raise LostContent(merged_file, line)
378
379def finish_output(changelog, output_file, input_file, merged_files):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200380 """Write the changelog to the output file.
381
Gilles Peskine2b242492020-01-22 15:41:50 +0100382 The input file and the list of merged files are used only for sanity
383 checks on the output.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200384 """
385 if os.path.exists(output_file) and not os.path.isfile(output_file):
386 # The output is a non-regular file (e.g. pipe). Write to it directly.
387 output_temp = output_file
388 else:
389 # The output is a regular file. Write to a temporary file,
390 # then move it into place atomically.
391 output_temp = output_file + '.tmp'
392 changelog.write(output_temp)
Gilles Peskine2b242492020-01-22 15:41:50 +0100393 check_output(output_temp, input_file, merged_files)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200394 if output_temp != output_file:
395 os.rename(output_temp, output_file)
396
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100397def remove_merged_entries(files_to_remove):
398 for filename in files_to_remove:
399 os.remove(filename)
400
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100401def list_files_to_merge(options):
402 """List the entry files to merge, oldest first.
403
Gilles Peskine28af9582020-03-26 22:39:18 +0100404 "Oldest" is defined by `EntryFileSortKey`.
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100405 """
406 files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
Gilles Peskine7fa3eb72020-03-26 22:41:32 +0100407 files_to_merge.sort(key=EntryFileSortKey)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100408 return files_to_merge
409
Gilles Peskine40b3f412019-10-13 21:44:25 +0200410def merge_entries(options):
411 """Merge changelog entries into the changelog file.
412
413 Read the changelog file from options.input.
414 Read entries to merge from the directory options.dir.
415 Write the new changelog to options.output.
416 Remove the merged entries if options.keep_entries is false.
417 """
418 with open(options.input, 'rb') as input_file:
419 changelog = ChangeLog(input_file)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100420 files_to_merge = list_files_to_merge(options)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200421 if not files_to_merge:
422 sys.stderr.write('There are no pending changelog entries.\n')
423 return
424 for filename in files_to_merge:
425 with open(filename, 'rb') as input_file:
426 changelog.add_file(input_file)
Gilles Peskine2b242492020-01-22 15:41:50 +0100427 finish_output(changelog, options.output, options.input, files_to_merge)
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100428 if not options.keep_entries:
429 remove_merged_entries(files_to_merge)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200430
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100431def show_file_timestamps(options):
432 """List the files to merge and their timestamp.
433
434 This is only intended for debugging purposes.
435 """
436 files = list_files_to_merge(options)
437 for filename in files:
Gilles Peskine28af9582020-03-26 22:39:18 +0100438 ts = EntryFileSortKey(filename)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100439 print(ts.category, ts.datetime, filename)
440
Gilles Peskine40b3f412019-10-13 21:44:25 +0200441def set_defaults(options):
442 """Add default values for missing options."""
443 output_file = getattr(options, 'output', None)
444 if output_file is None:
445 options.output = options.input
446 if getattr(options, 'keep_entries', None) is None:
447 options.keep_entries = (output_file is not None)
448
449def main():
450 """Command line entry point."""
451 parser = argparse.ArgumentParser(description=__doc__)
452 parser.add_argument('--dir', '-d', metavar='DIR',
453 default='ChangeLog.d',
Gilles Peskine6e910092020-01-22 15:58:18 +0100454 help='Directory to read entries from'
455 ' (default: ChangeLog.d)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200456 parser.add_argument('--input', '-i', metavar='FILE',
457 default='ChangeLog.md',
Gilles Peskine6e910092020-01-22 15:58:18 +0100458 help='Existing changelog file to read from and augment'
459 ' (default: ChangeLog.md)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200460 parser.add_argument('--keep-entries',
461 action='store_true', dest='keep_entries', default=None,
Gilles Peskine6e910092020-01-22 15:58:18 +0100462 help='Keep the files containing entries'
463 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200464 parser.add_argument('--no-keep-entries',
465 action='store_false', dest='keep_entries',
Gilles Peskine6e910092020-01-22 15:58:18 +0100466 help='Remove the files containing entries after they are merged'
467 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200468 parser.add_argument('--output', '-o', metavar='FILE',
Gilles Peskine6e910092020-01-22 15:58:18 +0100469 help='Output changelog file'
470 ' (default: overwrite the input)')
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100471 parser.add_argument('--list-files-only',
472 action='store_true',
Gilles Peskineac0f0862020-03-27 10:56:45 +0100473 help=('Only list the files that would be processed'
474 '(with some debugging information)'))
Gilles Peskine40b3f412019-10-13 21:44:25 +0200475 options = parser.parse_args()
476 set_defaults(options)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100477 if options.list_files_only:
478 show_file_timestamps(options)
479 return
Gilles Peskine40b3f412019-10-13 21:44:25 +0200480 merge_entries(options)
481
482if __name__ == '__main__':
483 main()