| """Collect macro definitions from header files. |
| """ |
| |
| # Copyright The Mbed TLS Contributors |
| # SPDX-License-Identifier: Apache-2.0 |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import itertools |
| import re |
| from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union |
| |
| |
| class ReadFileLineException(Exception): |
| def __init__(self, filename: str, line_number: Union[int, str]) -> None: |
| message = 'in {} at {}'.format(filename, line_number) |
| super(ReadFileLineException, self).__init__(message) |
| self.filename = filename |
| self.line_number = line_number |
| |
| |
| class read_file_lines: |
| # Dear Pylint, conventionally, a context manager class name is lowercase. |
| # pylint: disable=invalid-name,too-few-public-methods |
| """Context manager to read a text file line by line. |
| |
| ``` |
| with read_file_lines(filename) as lines: |
| for line in lines: |
| process(line) |
| ``` |
| is equivalent to |
| ``` |
| with open(filename, 'r') as input_file: |
| for line in input_file: |
| process(line) |
| ``` |
| except that if process(line) raises an exception, then the read_file_lines |
| snippet annotates the exception with the file name and line number. |
| """ |
| def __init__(self, filename: str, binary: bool = False) -> None: |
| self.filename = filename |
| self.line_number = 'entry' #type: Union[int, str] |
| self.generator = None #type: Optional[Iterable[Tuple[int, str]]] |
| self.binary = binary |
| def __enter__(self) -> 'read_file_lines': |
| self.generator = enumerate(open(self.filename, |
| 'rb' if self.binary else 'r')) |
| return self |
| def __iter__(self) -> Iterator[str]: |
| assert self.generator is not None |
| for line_number, content in self.generator: |
| self.line_number = line_number |
| yield content |
| self.line_number = 'exit' |
| def __exit__(self, exc_type, exc_value, exc_traceback) -> None: |
| if exc_type is not None: |
| raise ReadFileLineException(self.filename, self.line_number) \ |
| from exc_value |
| |
| |
| class PSAMacroEnumerator: |
| """Information about constructors of various PSA Crypto types. |
| |
| This includes macro names as well as information about their arguments |
| when applicable. |
| |
| This class only provides ways to enumerate expressions that evaluate to |
| values of the covered types. Derived classes are expected to populate |
| the set of known constructors of each kind, as well as populate |
| `self.arguments_for` for arguments that are not of a kind that is |
| enumerated here. |
| """ |
| #pylint: disable=too-many-instance-attributes |
| |
| def __init__(self) -> None: |
| """Set up an empty set of known constructor macros. |
| """ |
| self.statuses = set() #type: Set[str] |
| self.lifetimes = set() #type: Set[str] |
| self.locations = set() #type: Set[str] |
| self.persistence_levels = set() #type: Set[str] |
| self.algorithms = set() #type: Set[str] |
| self.ecc_curves = set() #type: Set[str] |
| self.dh_groups = set() #type: Set[str] |
| self.key_types = set() #type: Set[str] |
| self.key_usage_flags = set() #type: Set[str] |
| self.hash_algorithms = set() #type: Set[str] |
| self.mac_algorithms = set() #type: Set[str] |
| self.ka_algorithms = set() #type: Set[str] |
| self.kdf_algorithms = set() #type: Set[str] |
| self.pake_algorithms = set() #type: Set[str] |
| self.aead_algorithms = set() #type: Set[str] |
| # macro name -> list of argument names |
| self.argspecs = {} #type: Dict[str, List[str]] |
| # argument name -> list of values |
| self.arguments_for = { |
| 'mac_length': [], |
| 'min_mac_length': [], |
| 'tag_length': [], |
| 'min_tag_length': [], |
| } #type: Dict[str, List[str]] |
| # Whether to include intermediate macros in enumerations. Intermediate |
| # macros serve as category headers and are not valid values of their |
| # type. See `is_internal_name`. |
| # Always false in this class, may be set to true in derived classes. |
| self.include_intermediate = False |
| |
| def is_internal_name(self, name: str) -> bool: |
| """Whether this is an internal macro. Internal macros will be skipped.""" |
| if not self.include_intermediate: |
| if name.endswith('_BASE') or name.endswith('_NONE'): |
| return True |
| if '_CATEGORY_' in name: |
| return True |
| return name.endswith('_FLAG') or name.endswith('_MASK') |
| |
| def gather_arguments(self) -> None: |
| """Populate the list of values for macro arguments. |
| |
| Call this after parsing all the inputs. |
| """ |
| self.arguments_for['hash_alg'] = sorted(self.hash_algorithms) |
| self.arguments_for['mac_alg'] = sorted(self.mac_algorithms) |
| self.arguments_for['ka_alg'] = sorted(self.ka_algorithms) |
| self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms) |
| self.arguments_for['aead_alg'] = sorted(self.aead_algorithms) |
| self.arguments_for['curve'] = sorted(self.ecc_curves) |
| self.arguments_for['group'] = sorted(self.dh_groups) |
| self.arguments_for['persistence'] = sorted(self.persistence_levels) |
| self.arguments_for['location'] = sorted(self.locations) |
| self.arguments_for['lifetime'] = sorted(self.lifetimes) |
| |
| @staticmethod |
| def _format_arguments(name: str, arguments: Iterable[str]) -> str: |
| """Format a macro call with arguments. |
| |
| The resulting format is consistent with |
| `InputsForTest.normalize_argument`. |
| """ |
| return name + '(' + ', '.join(arguments) + ')' |
| |
| _argument_split_re = re.compile(r' *, *') |
| @classmethod |
| def _argument_split(cls, arguments: str) -> List[str]: |
| return re.split(cls._argument_split_re, arguments) |
| |
| def distribute_arguments(self, name: str) -> Iterator[str]: |
| """Generate macro calls with each tested argument set. |
| |
| If name is a macro without arguments, just yield "name". |
| If name is a macro with arguments, yield a series of |
| "name(arg1,...,argN)" where each argument takes each possible |
| value at least once. |
| """ |
| try: |
| if name not in self.argspecs: |
| yield name |
| return |
| argspec = self.argspecs[name] |
| if argspec == []: |
| yield name + '()' |
| return |
| argument_lists = [self.arguments_for[arg] for arg in argspec] |
| arguments = [values[0] for values in argument_lists] |
| yield self._format_arguments(name, arguments) |
| # Dear Pylint, enumerate won't work here since we're modifying |
| # the array. |
| # pylint: disable=consider-using-enumerate |
| for i in range(len(arguments)): |
| for value in argument_lists[i][1:]: |
| arguments[i] = value |
| yield self._format_arguments(name, arguments) |
| arguments[i] = argument_lists[0][0] |
| except BaseException as e: |
| raise Exception('distribute_arguments({})'.format(name)) from e |
| |
| def distribute_arguments_without_duplicates( |
| self, seen: Set[str], name: str |
| ) -> Iterator[str]: |
| """Same as `distribute_arguments`, but don't repeat seen results.""" |
| for result in self.distribute_arguments(name): |
| if result not in seen: |
| seen.add(result) |
| yield result |
| |
| def generate_expressions(self, names: Iterable[str]) -> Iterator[str]: |
| """Generate expressions covering values constructed from the given names. |
| |
| `names` can be any iterable collection of macro names. |
| |
| For example: |
| * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])`` |
| generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for |
| every known hash algorithm ``h``. |
| * ``macros.generate_expressions(macros.key_types)`` generates all |
| key types. |
| """ |
| seen = set() #type: Set[str] |
| return itertools.chain(*( |
| self.distribute_arguments_without_duplicates(seen, name) |
| for name in names |
| )) |
| |
| |
| class PSAMacroCollector(PSAMacroEnumerator): |
| """Collect PSA crypto macro definitions from C header files. |
| """ |
| |
| def __init__(self, include_intermediate: bool = False) -> None: |
| """Set up an object to collect PSA macro definitions. |
| |
| Call the read_file method of the constructed object on each header file. |
| |
| * include_intermediate: if true, include intermediate macros such as |
| PSA_XXX_BASE that do not designate semantic values. |
| """ |
| super().__init__() |
| self.include_intermediate = include_intermediate |
| self.key_types_from_curve = {} #type: Dict[str, str] |
| self.key_types_from_group = {} #type: Dict[str, str] |
| self.algorithms_from_hash = {} #type: Dict[str, str] |
| |
| def record_algorithm_subtype(self, name: str, expansion: str) -> None: |
| """Record the subtype of an algorithm constructor. |
| |
| Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm |
| is of a subtype that is tracked in its own set, add it to the relevant |
| set. |
| """ |
| # This code is very ad hoc and fragile. It should be replaced by |
| # something more robust. |
| if re.match(r'MAC(?:_|\Z)', name): |
| self.mac_algorithms.add(name) |
| elif re.match(r'KDF(?:_|\Z)', name): |
| self.kdf_algorithms.add(name) |
| elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion): |
| self.hash_algorithms.add(name) |
| elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion): |
| self.mac_algorithms.add(name) |
| elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion): |
| self.aead_algorithms.add(name) |
| elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion): |
| self.ka_algorithms.add(name) |
| elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion): |
| self.kdf_algorithms.add(name) |
| |
| # "#define" followed by a macro name with either no parameters |
| # or a single parameter and a non-empty expansion. |
| # Grab the macro name in group 1, the parameter name if any in group 2 |
| # and the expansion in group 3. |
| _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' + |
| r'(?:\s+|\((\w+)\)\s*)' + |
| r'(.+)') |
| _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED') |
| |
| def read_line(self, line): |
| """Parse a C header line and record the PSA identifier it defines if any. |
| This function analyzes lines that start with "#define PSA_" |
| (up to non-significant whitespace) and skips all non-matching lines. |
| """ |
| # pylint: disable=too-many-branches |
| m = re.match(self._define_directive_re, line) |
| if not m: |
| return |
| name, parameter, expansion = m.groups() |
| expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion) |
| if parameter: |
| self.argspecs[name] = [parameter] |
| if re.match(self._deprecated_definition_re, expansion): |
| # Skip deprecated values, which are assumed to be |
| # backward compatibility aliases that share |
| # numerical values with non-deprecated values. |
| return |
| if self.is_internal_name(name): |
| # Macro only to build actual values |
| return |
| elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \ |
| and not parameter: |
| self.statuses.add(name) |
| elif name.startswith('PSA_KEY_TYPE_') and not parameter: |
| self.key_types.add(name) |
| elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve': |
| self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:] |
| elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group': |
| self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:] |
| elif name.startswith('PSA_ECC_FAMILY_') and not parameter: |
| self.ecc_curves.add(name) |
| elif name.startswith('PSA_DH_FAMILY_') and not parameter: |
| self.dh_groups.add(name) |
| elif name.startswith('PSA_ALG_') and not parameter: |
| if name in ['PSA_ALG_ECDSA_BASE', |
| 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']: |
| # Ad hoc skipping of duplicate names for some numerical values |
| return |
| self.algorithms.add(name) |
| self.record_algorithm_subtype(name, expansion) |
| elif name.startswith('PSA_ALG_') and parameter == 'hash_alg': |
| if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']: |
| # A naming irregularity |
| tester = name[:8] + 'IS_RANDOMIZED_' + name[8:] |
| else: |
| tester = name[:8] + 'IS_' + name[8:] |
| self.algorithms_from_hash[name] = tester |
| elif name.startswith('PSA_KEY_USAGE_') and not parameter: |
| self.key_usage_flags.add(name) |
| else: |
| # Other macro without parameter |
| return |
| |
| _nonascii_re = re.compile(rb'[^\x00-\x7f]+') |
| _continued_line_re = re.compile(rb'\\\r?\n\Z') |
| def read_file(self, header_file): |
| for line in header_file: |
| m = re.search(self._continued_line_re, line) |
| while m: |
| cont = next(header_file) |
| line = line[:m.start(0)] + cont |
| m = re.search(self._continued_line_re, line) |
| line = re.sub(self._nonascii_re, rb'', line).decode('ascii') |
| self.read_line(line) |
| |
| |
| class InputsForTest(PSAMacroEnumerator): |
| # pylint: disable=too-many-instance-attributes |
| """Accumulate information about macros to test. |
| enumerate |
| This includes macro names as well as information about their arguments |
| when applicable. |
| """ |
| |
| def __init__(self) -> None: |
| super().__init__() |
| self.all_declared = set() #type: Set[str] |
| # Identifier prefixes |
| self.table_by_prefix = { |
| 'ERROR': self.statuses, |
| 'ALG': self.algorithms, |
| 'ECC_CURVE': self.ecc_curves, |
| 'DH_GROUP': self.dh_groups, |
| 'KEY_LIFETIME': self.lifetimes, |
| 'KEY_LOCATION': self.locations, |
| 'KEY_PERSISTENCE': self.persistence_levels, |
| 'KEY_TYPE': self.key_types, |
| 'KEY_USAGE': self.key_usage_flags, |
| } #type: Dict[str, Set[str]] |
| # Test functions |
| self.table_by_test_function = { |
| # Any function ending in _algorithm also gets added to |
| # self.algorithms. |
| 'key_type': [self.key_types], |
| 'block_cipher_key_type': [self.key_types], |
| 'stream_cipher_key_type': [self.key_types], |
| 'ecc_key_family': [self.ecc_curves], |
| 'ecc_key_types': [self.ecc_curves], |
| 'dh_key_family': [self.dh_groups], |
| 'dh_key_types': [self.dh_groups], |
| 'hash_algorithm': [self.hash_algorithms], |
| 'mac_algorithm': [self.mac_algorithms], |
| 'cipher_algorithm': [], |
| 'hmac_algorithm': [self.mac_algorithms], |
| 'aead_algorithm': [self.aead_algorithms], |
| 'key_derivation_algorithm': [self.kdf_algorithms], |
| 'key_agreement_algorithm': [self.ka_algorithms], |
| 'asymmetric_signature_algorithm': [], |
| 'asymmetric_signature_wildcard': [self.algorithms], |
| 'asymmetric_encryption_algorithm': [], |
| 'pake_algorithm': [self.pake_algorithms], |
| 'other_algorithm': [], |
| 'lifetime': [self.lifetimes], |
| } #type: Dict[str, List[Set[str]]] |
| self.arguments_for['mac_length'] += ['1', '63'] |
| self.arguments_for['min_mac_length'] += ['1', '63'] |
| self.arguments_for['tag_length'] += ['1', '63'] |
| self.arguments_for['min_tag_length'] += ['1', '63'] |
| |
| def add_numerical_values(self) -> None: |
| """Add numerical values that are not supported to the known identifiers.""" |
| # Sets of names per type |
| self.algorithms.add('0xffffffff') |
| self.ecc_curves.add('0xff') |
| self.dh_groups.add('0xff') |
| self.key_types.add('0xffff') |
| self.key_usage_flags.add('0x80000000') |
| |
| # Hard-coded values for unknown algorithms |
| # |
| # These have to have values that are correct for their respective |
| # PSA_ALG_IS_xxx macros, but are also not currently assigned and are |
| # not likely to be assigned in the near future. |
| self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH |
| self.mac_algorithms.add('0x03007fff') |
| self.ka_algorithms.add('0x09fc0000') |
| self.kdf_algorithms.add('0x080000ff') |
| self.pake_algorithms.add('0x0a0000ff') |
| # For AEAD algorithms, the only variability is over the tag length, |
| # and this only applies to known algorithms, so don't test an |
| # unknown algorithm. |
| |
| def get_names(self, type_word: str) -> Set[str]: |
| """Return the set of known names of values of the given type.""" |
| return { |
| 'status': self.statuses, |
| 'algorithm': self.algorithms, |
| 'ecc_curve': self.ecc_curves, |
| 'dh_group': self.dh_groups, |
| 'key_type': self.key_types, |
| 'key_usage': self.key_usage_flags, |
| }[type_word] |
| |
| # Regex for interesting header lines. |
| # Groups: 1=macro name, 2=type, 3=argument list (optional). |
| _header_line_re = \ |
| re.compile(r'#define +' + |
| r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' + |
| r'(?:\(([^\n()]*)\))?') |
| # Regex of macro names to exclude. |
| _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z') |
| # Additional excluded macros. |
| _excluded_names = set([ |
| # Macros that provide an alternative way to build the same |
| # algorithm as another macro. |
| 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG', |
| 'PSA_ALG_FULL_LENGTH_MAC', |
| # Auxiliary macro whose name doesn't fit the usual patterns for |
| # auxiliary macros. |
| 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE', |
| ]) |
| def parse_header_line(self, line: str) -> None: |
| """Parse a C header line, looking for "#define PSA_xxx".""" |
| m = re.match(self._header_line_re, line) |
| if not m: |
| return |
| name = m.group(1) |
| self.all_declared.add(name) |
| if re.search(self._excluded_name_re, name) or \ |
| name in self._excluded_names or \ |
| self.is_internal_name(name): |
| return |
| dest = self.table_by_prefix.get(m.group(2)) |
| if dest is None: |
| return |
| dest.add(name) |
| if m.group(3): |
| self.argspecs[name] = self._argument_split(m.group(3)) |
| |
| _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern |
| def parse_header(self, filename: str) -> None: |
| """Parse a C header file, looking for "#define PSA_xxx".""" |
| with read_file_lines(filename, binary=True) as lines: |
| for line in lines: |
| line = re.sub(self._nonascii_re, rb'', line).decode('ascii') |
| self.parse_header_line(line) |
| |
| _macro_identifier_re = re.compile(r'[A-Z]\w+') |
| def generate_undeclared_names(self, expr: str) -> Iterable[str]: |
| for name in re.findall(self._macro_identifier_re, expr): |
| if name not in self.all_declared: |
| yield name |
| |
| def accept_test_case_line(self, function: str, argument: str) -> bool: |
| #pylint: disable=unused-argument |
| undeclared = list(self.generate_undeclared_names(argument)) |
| if undeclared: |
| raise Exception('Undeclared names in test case', undeclared) |
| return True |
| |
| @staticmethod |
| def normalize_argument(argument: str) -> str: |
| """Normalize whitespace in the given C expression. |
| |
| The result uses the same whitespace as |
| ` PSAMacroEnumerator.distribute_arguments`. |
| """ |
| return re.sub(r',', r', ', re.sub(r' +', r'', argument)) |
| |
| def add_test_case_line(self, function: str, argument: str) -> None: |
| """Parse a test case data line, looking for algorithm metadata tests.""" |
| sets = [] |
| if function.endswith('_algorithm'): |
| sets.append(self.algorithms) |
| if function == 'key_agreement_algorithm' and \ |
| argument.startswith('PSA_ALG_KEY_AGREEMENT('): |
| # We only want *raw* key agreement algorithms as such, so |
| # exclude ones that are already chained with a KDF. |
| # Keep the expression as one to test as an algorithm. |
| function = 'other_algorithm' |
| sets += self.table_by_test_function[function] |
| if self.accept_test_case_line(function, argument): |
| for s in sets: |
| s.add(self.normalize_argument(argument)) |
| |
| # Regex matching a *.data line containing a test function call and |
| # its arguments. The actual definition is partly positional, but this |
| # regex is good enough in practice. |
| _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)') |
| def parse_test_cases(self, filename: str) -> None: |
| """Parse a test case file (*.data), looking for algorithm metadata tests.""" |
| with read_file_lines(filename) as lines: |
| for line in lines: |
| m = re.match(self._test_case_line_re, line) |
| if m: |
| self.add_test_case_line(m.group(1), m.group(2)) |